diff --git a/src/main/java/htsjdk/variant/variantcontext/StructuralVariantType.java b/src/main/java/htsjdk/variant/variantcontext/StructuralVariantType.java new file mode 100644 index 000000000..36b517a2a --- /dev/null +++ b/src/main/java/htsjdk/variant/variantcontext/StructuralVariantType.java @@ -0,0 +1,47 @@ +/* + * The MIT License + * + * Copyright (c) 2016 Pierre Lindenbaum @yokofakun Institut du Thorax - Nantes - France + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +package htsjdk.variant.variantcontext; + +/** + * Type of Structural Variant as defined in the VCF spec 4.2 + * + */ +public enum StructuralVariantType { + /** Deletion relative to the reference */ + DEL, + /** Insertion of novel sequence relative to the reference */ + INS, + /** Region of elevated copy number relative to the reference */ + DUP, + /** Inversion of reference sequence */ + INV, + /** Copy number variable region */ + CNV, + /** breakend structural variation. VCF Specification : An arbitrary rearrangement + * event can be summarized as a set of novel adjacencies. + * Each adjacency ties together two breakends. + */ + BND +} diff --git a/src/main/java/htsjdk/variant/variantcontext/VariantContext.java b/src/main/java/htsjdk/variant/variantcontext/VariantContext.java index f64b0ff8e..0baf0bd70 100644 --- a/src/main/java/htsjdk/variant/variantcontext/VariantContext.java +++ b/src/main/java/htsjdk/variant/variantcontext/VariantContext.java @@ -1732,4 +1732,13 @@ public int getAlleleIndex(final Allele allele) { if ( index == -1 ) throw new IllegalArgumentException("Allele " + targetAllele + " not in this VariantContex " + this); return GenotypeLikelihoods.getPLIndecesOfAlleles(0, index); } + + /** + * Search for the INFO=SVTYPE and return the type of Structural Variant + * @return the StructuralVariantType of null if there is no property SVTYPE + * */ + public StructuralVariantType getStructuralVariantType() { + final String svType = this.getAttributeAsString(VCFConstants.SVTYPE, null); + return svType == null ? null : StructuralVariantType.valueOf(svType); + } } diff --git a/src/main/java/htsjdk/variant/vcf/VCFConstants.java b/src/main/java/htsjdk/variant/vcf/VCFConstants.java index b05856d5e..6a52d1df0 100644 --- a/src/main/java/htsjdk/variant/vcf/VCFConstants.java +++ b/src/main/java/htsjdk/variant/vcf/VCFConstants.java @@ -63,7 +63,10 @@ public static final String SOMATIC_KEY = "SOMATIC"; public static final String VALIDATED_KEY = "VALIDATED"; public static final String THOUSAND_GENOMES_KEY = "1000G"; - + + // reserved INFO for structural variants + /** INFO Type of structural variant */ + public static final String SVTYPE = "SVTYPE"; // separators public static final String FORMAT_FIELD_SEPARATOR = ":"; diff --git a/src/test/java/htsjdk/variant/variantcontext/VariantContextUnitTest.java b/src/test/java/htsjdk/variant/variantcontext/VariantContextUnitTest.java index be55f8f58..34854eaea 100644 --- a/src/test/java/htsjdk/variant/variantcontext/VariantContextUnitTest.java +++ b/src/test/java/htsjdk/variant/variantcontext/VariantContextUnitTest.java @@ -26,6 +26,9 @@ package htsjdk.variant.variantcontext; +import htsjdk.samtools.util.CloseableIterator; +import htsjdk.samtools.util.CloserUtil; + // the imports for unit testing. import htsjdk.samtools.util.TestUtil; @@ -37,6 +40,8 @@ import htsjdk.tribble.TribbleException; import htsjdk.variant.VariantBaseTest; import htsjdk.variant.vcf.VCFConstants; +import htsjdk.variant.vcf.VCFFileReader; + import org.testng.Assert; import org.testng.annotations.BeforeMethod; import org.testng.annotations.BeforeSuite; @@ -1447,4 +1452,30 @@ public void testExtraStrictValidationFailure(final VariantContext vc, final Alle // extraStrictValidation throws exceptions if it fails, so no Asserts here... vc.extraStrictValidation(reportedReference, observedReference, rsIDs); } + + + @DataProvider(name = "structuralVariationsTestData") + public Object[][] getStructuralVariationsTestData() { + return new Object[][] { + {new File("src/test/resources/htsjdk/variant/structuralvariants.vcf")} + }; + } + + @Test(dataProvider = "structuralVariationsTestData") + public void testExtractStructuralVariationsData(final File vcfFile) { + VCFFileReader reader = null; + CloseableIterator iter = null; + try { + reader = new VCFFileReader(vcfFile , false ); + iter = reader.iterator(); + while(iter.hasNext()) { + final VariantContext ctx = iter.next(); + final StructuralVariantType st = ctx.getStructuralVariantType(); + Assert.assertNotNull(st); + } + } finally { + CloserUtil.close(iter); + CloserUtil.close(reader); + } + } } diff --git a/src/test/resources/htsjdk/variant/structuralvariants.vcf b/src/test/resources/htsjdk/variant/structuralvariants.vcf new file mode 100644 index 000000000..5ffad2f94 --- /dev/null +++ b/src/test/resources/htsjdk/variant/structuralvariants.vcf @@ -0,0 +1,22 @@ +##fileformat=VCFv4.2 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##ALT= +##ALT= +##ALT= +##ALT= +##FORMAT= +##FORMAT= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 +1 20 . N 1 . IMPRECISE;SVTYPE=DUP;END=4641652;END_CHR=1;STRAND_1=-;STRAND_2=+;SVLEN=4641632;METHOD=LUMPY;DP=90 GT:AO 1/1:90 +1 33 . N 1 . IMPRECISE;SVTYPE=DUP;END=2640388;END_CHR=1;STRAND_1=-;STRAND_2=+;SVLEN=2640355;METHOD=LUMPY;DP=3 GT:AO 1/1:3 +1 70 . N 1 . IMPRECISE;SVTYPE=DEL;END=4641583;END_CHR=1;STRAND_1=+;STRAND_2=-;SVLEN=-4641513;METHOD=LUMPY;DP=1 GT:AO 1/1:1 +1 101 . N 1 . IMPRECISE;SVTYPE=INV;END=1988714;END_CHR=1;STRAND_1=-;STRAND_2=-;SVLEN=1988613;METHOD=LUMPY;DP=2 GT:AO 1/1:2