diff --git a/src/main/java/picard/vcf/LiftoverVcf.java b/src/main/java/picard/vcf/LiftoverVcf.java index 4be6ab754..4c9bb03c6 100644 --- a/src/main/java/picard/vcf/LiftoverVcf.java +++ b/src/main/java/picard/vcf/LiftoverVcf.java @@ -4,26 +4,14 @@ import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.ValidationStringency; import htsjdk.samtools.liftover.LiftOver; +import htsjdk.samtools.reference.ReferenceSequence; import htsjdk.samtools.reference.ReferenceSequenceFileWalker; -import htsjdk.samtools.util.CloserUtil; -import htsjdk.samtools.util.CollectionUtil; -import htsjdk.samtools.util.IOUtil; -import htsjdk.samtools.util.Interval; -import htsjdk.samtools.util.Log; -import htsjdk.samtools.util.ProgressLogger; -import htsjdk.samtools.util.SequenceUtil; -import htsjdk.samtools.util.SortingCollection; -import htsjdk.samtools.util.StringUtil; +import htsjdk.samtools.util.*; import htsjdk.variant.variantcontext.*; import htsjdk.variant.variantcontext.writer.Options; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder; -import htsjdk.variant.vcf.VCFFileReader; -import htsjdk.variant.vcf.VCFFilterHeaderLine; -import htsjdk.variant.vcf.VCFHeader; -import htsjdk.variant.vcf.VCFHeaderLineType; -import htsjdk.variant.vcf.VCFInfoHeaderLine; -import htsjdk.variant.vcf.VCFRecordCodec; +import htsjdk.variant.vcf.*; import picard.cmdline.CommandLineProgram; import picard.cmdline.CommandLineProgramProperties; import picard.cmdline.Option; @@ -33,10 +21,8 @@ import java.io.File; import java.text.DecimalFormat; import java.text.NumberFormat; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; +import java.util.stream.Collectors; /** * Tool for lifting over a VCF to another genome build and producing a properly header'd, @@ -69,20 +55,20 @@ "" + "For additional information, please see: http://genome.ucsc.edu/cgi-bin/hgLiftOver" + "
"; - @Option(shortName=StandardOptionDefinitions.INPUT_SHORT_NAME, doc="The input VCF/BCF file to be lifted over.") + @Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "The input VCF/BCF file to be lifted over.") public File INPUT; - @Option(shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc="The output location to write the lifted over VCF/BCF to.") + @Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "The output location to write the lifted over VCF/BCF to.") public File OUTPUT; - @Option(shortName="C", doc="The liftover chain file. See https://genome.ucsc.edu/goldenPath/help/chain.html for a description" + + @Option(shortName = "C", doc = "The liftover chain file. See https://genome.ucsc.edu/goldenPath/help/chain.html for a description" + " of chain files. See http://hgdownload.soe.ucsc.edu/downloads.html#terms for where to download chain files.") public File CHAIN; - @Option(doc="File to which to write rejected records.") + @Option(doc = "File to which to write rejected records.") public File REJECT; - @Option(shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME, common=false, + @Option(shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME, common = false, doc = "The reference sequence (fasta) for the TARGET genome build. The fasta file must have an " + "accompanying sequence dictionary (.dict file).") public File REFERENCE_SEQUENCE = Defaults.REFERENCE_FASTA; @@ -104,42 +90,59 @@ // When a contig used in the chain is not in the reference, exit with this value instead of 0. protected static int EXIT_CODE_WHEN_CONTIG_NOT_IN_REFERENCE = 1; - /** Filter name to use when a target cannot be lifted over. */ + /** + * Filter name to use when a target cannot be lifted over. + */ public static final String FILTER_CANNOT_LIFTOVER_INDEL = "ReverseComplementedIndel"; - /** Filter name to use when a target cannot be lifted over. */ + /** + * Filter name to use when a target cannot be lifted over. + */ public static final String FILTER_NO_TARGET = "NoTarget"; - /** Filter name to use when a target is lifted over, but the reference allele doens't match the new reference. */ + /** + * Filter name to use when a target is lifted over, but the reference allele doens't match the new reference. + */ public static final String FILTER_MISMATCHING_REF_ALLELE = "MismatchedRefAllele"; - /** Filters to be added to the REJECT file. */ + /** + * Filters to be added to the REJECT file. + */ private static final List FILTERS = CollectionUtil.makeList( new VCFFilterHeaderLine(FILTER_CANNOT_LIFTOVER_INDEL, "Indel falls into a reverse complemented region in the target genome."), new VCFFilterHeaderLine(FILTER_NO_TARGET, "Variant could not be lifted between genome builds."), new VCFFilterHeaderLine(FILTER_MISMATCHING_REF_ALLELE, "Reference allele does not match reference genome sequence after liftover.") ); - /** Attribute used to store the name of the source contig/chromosome prior to liftover. */ + /** + * Attribute used to store the name of the source contig/chromosome prior to liftover. + */ public static final String ORIGINAL_CONTIG = "OriginalContig"; - /** Attribute used to store the position of the variant on the source contig prior to liftover. */ + /** + * Attribute used to store the position of the variant on the source contig prior to liftover. + */ public static final String ORIGINAL_START = "OriginalStart"; - /** Metadata to be added to the Passing file. */ + /** + * Attribute used to store the position of the failed variant on the target contig prior to finding out that alleles do not match. + */ + public static final String ATTEMPTED_LOCUS = "AttemptedLocus"; + + /** + * Metadata to be added to the Passing file. + */ private static final List ATTRS = CollectionUtil.makeList( new VCFInfoHeaderLine(ORIGINAL_CONTIG, 1, VCFHeaderLineType.String, "The name of the source contig/chromosome prior to liftover."), new VCFInfoHeaderLine(ORIGINAL_START, 1, VCFHeaderLineType.String, "The position of the variant on the source contig prior to liftover.") ); + private VariantContextWriter rejects; private final Log log = Log.getInstance(LiftoverVcf.class); + private SortingCollection sorter; - // Stock main method - public static void main(final String[] args) { - new LiftoverVcf().instanceMainWithExit(args); - } - - @Override protected int doWork() { + @Override + protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE); IOUtil.assertFileIsReadable(CHAIN); @@ -154,13 +157,12 @@ public static void main(final String[] args) { log.info("Loading up the target reference genome."); final ReferenceSequenceFileWalker walker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE); - final Map refSeqs = new HashMap(); - for (final SAMSequenceRecord rec: walker.getSequenceDictionary().getSequences()) { - refSeqs.put(rec.getSequenceName(), walker.get(rec.getSequenceIndex()).getBases()); + final Map refSeqs = new HashMap<>(); + for (final SAMSequenceRecord rec : walker.getSequenceDictionary().getSequences()) { + refSeqs.put(rec.getSequenceName(), walker.get(rec.getSequenceIndex())); } CloserUtil.close(walker); - //////////////////////////////////////////////////////////////////////// // Setup the outputs //////////////////////////////////////////////////////////////////////// @@ -170,21 +172,22 @@ public static void main(final String[] args) { if (WRITE_ORIGINAL_POSITION) { for (final VCFInfoHeaderLine line : ATTRS) outHeader.addMetaDataLine(line); } - final VariantContextWriter out = new VariantContextWriterBuilder().setOption(Options.INDEX_ON_THE_FLY) + final VariantContextWriter out = new VariantContextWriterBuilder() + .setOption(Options.INDEX_ON_THE_FLY) .modifyOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER, ALLOW_MISSING_FIELDS_IN_HEADER) .setOutputFile(OUTPUT).setReferenceDictionary(walker.getSequenceDictionary()).build(); out.writeHeader(outHeader); - final VariantContextWriter rejects = new VariantContextWriterBuilder().setOutputFile(REJECT).unsetOption(Options.INDEX_ON_THE_FLY) + rejects = new VariantContextWriterBuilder().setOutputFile(REJECT) + .unsetOption(Options.INDEX_ON_THE_FLY) .modifyOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER, ALLOW_MISSING_FIELDS_IN_HEADER) .build(); final VCFHeader rejectHeader = new VCFHeader(in.getFileHeader()); for (final VCFFilterHeaderLine line : FILTERS) rejectHeader.addMetaDataLine(line); - if (WRITE_ORIGINAL_POSITION) { - for (final VCFInfoHeaderLine line : ATTRS) rejectHeader.addMetaDataLine(line); - } - rejects.writeHeader(rejectHeader); + rejectHeader.addMetaDataLine(new VCFInfoHeaderLine(ATTEMPTED_LOCUS,1, VCFHeaderLineType.String, "The locus of the variant in the TARGET prior to failing due to mismatching alleles.")); + + rejects.writeHeader(rejectHeader); //////////////////////////////////////////////////////////////////////// // Read the input VCF, lift the records over and write to the sorting @@ -193,7 +196,7 @@ public static void main(final String[] args) { long failedLiftover = 0, failedAlleleCheck = 0, total = 0; log.info("Lifting variants over and sorting."); - final SortingCollection sorter = SortingCollection.newInstance(VariantContext.class, + sorter = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(outHeader, ALLOW_MISSING_FIELDS_IN_HEADER || VALIDATION_STRINGENCY != ValidationStringency.STRICT), outHeader.getVCFRecordComparator(), MAX_RECORDS_IN_RAM, @@ -201,15 +204,16 @@ public static void main(final String[] args) { ProgressLogger progress = new ProgressLogger(log, 1000000, "read"); // a mapping from original allele to reverse complemented allele - final Map reverseComplementAlleleMap = new HashMap(10); + final Map reverseComplementAlleleMap = new HashMap<>(10); for (final VariantContext ctx : in) { ++total; final Interval source = new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), false, ctx.getContig() + ":" + ctx.getStart() + "-" + ctx.getEnd()); final Interval target = liftOver.liftOver(source, LIFTOVER_MIN_MATCH); + final ReferenceSequence refSeq; - // if the target is null OR (the target is reverse complemented AND the variant is an indel or mixed), then we cannot lift it over - if (target == null || (target.isNegativeStrand() && (ctx.isMixed() || ctx.isIndel()))) { + // if the target is null OR (the target is reverse complemented AND the variant is a non-biallelic indel or mixed), then we cannot lift it over + if (target == null || (target.isNegativeStrand() && (ctx.isMixed() || (ctx.isIndel() && !ctx.isBiallelic())))) { final String reason = (target == null) ? FILTER_NO_TARGET : FILTER_CANNOT_LIFTOVER_INDEL; rejects.add(new VariantContextBuilder(ctx).filter(reason).make()); failedLiftover++; @@ -217,72 +221,33 @@ public static void main(final String[] args) { rejects.add(new VariantContextBuilder(ctx).filter(FILTER_NO_TARGET).make()); failedLiftover++; - String missingContigMessage = "Encountered a contig, " + target.getContig() + " that is not part of the target reference."; - if(WARN_ON_MISSING_CONTIG) { + final String missingContigMessage = "Encountered a contig, " + target.getContig() + " that is not part of the target reference."; + if (WARN_ON_MISSING_CONTIG) { log.warn(missingContigMessage); } else { log.error(missingContigMessage); return EXIT_CODE_WHEN_CONTIG_NOT_IN_REFERENCE; } - } else { - // Fix the alleles if we went from positive to negative strand - reverseComplementAlleleMap.clear(); - final List alleles = new ArrayList(); + } else if (target.isNegativeStrand() && ctx.isIndel() && ctx.isBiallelic()) { + refSeq = refSeqs.get(target.getContig()); + //flipping indels: - for (final Allele oldAllele : ctx.getAlleles()) { - if (target.isPositiveStrand() || oldAllele.isSymbolic()) { - alleles.add(oldAllele); - } - else { - final Allele fixedAllele = Allele.create(SequenceUtil.reverseComplement(oldAllele.getBaseString()), oldAllele.isReference()); - alleles.add(fixedAllele); - reverseComplementAlleleMap.put(oldAllele, fixedAllele); - } - } - - // Build the new variant context - final VariantContextBuilder builder = new VariantContextBuilder( - ctx.getSource(), - target.getContig(), - target.getStart(), - target.getEnd(), - alleles); - - builder.id(ctx.getID()); - builder.attributes(ctx.getAttributes()); - - if (WRITE_ORIGINAL_POSITION) { - builder.attribute(ORIGINAL_CONTIG, source.getContig()); - builder.attribute(ORIGINAL_START, source.getStart()); - } - builder.genotypes(fixGenotypes(ctx.getGenotypes(), reverseComplementAlleleMap)); - builder.filters(ctx.getFilters()); - builder.log10PError(ctx.getLog10PError()); - - // Check that the reference allele still agrees with the reference sequence - boolean mismatchesReference = false; - for (final Allele allele : builder.getAlleles()) { - if (allele.isReference()) { - final byte[] ref = refSeqs.get(target.getContig()); - final String refString = StringUtil.bytesToString(ref, target.getStart()-1, target.length()); - - if (!refString.equalsIgnoreCase(allele.getBaseString())) { - mismatchesReference = true; - } - - break; + final VariantContext flippedIndel = flipIndel(ctx, liftOver, refSeq); + if (flippedIndel == null) { + throw new IllegalArgumentException("Unexpectedly found null VC. This should have not happened."); + } else { + if (!tryToAddVariant(flippedIndel, refSeq, reverseComplementAlleleMap, ctx)){ + failedAlleleCheck++; } } + } else { + refSeq = refSeqs.get(target.getContig()); + final VariantContext liftedVariant = liftSnp(ctx, target, refSeq); - if (mismatchesReference) { - rejects.add(new VariantContextBuilder(ctx).filter(FILTER_MISMATCHING_REF_ALLELE).make()); + if (!tryToAddVariant(liftedVariant, refSeq, reverseComplementAlleleMap, ctx)) { failedAlleleCheck++; } - else { - sorter.add(builder.make()); - } } - progress.record(ctx.getContig(), ctx.getStart()); } @@ -313,21 +278,261 @@ public static void main(final String[] args) { return 0; } - protected static GenotypesContext fixGenotypes(final GenotypesContext originals, final Map reverseComplementAlleleMap) { + /** + * utility function to attempt to add a variant. Checks that the reference allele still matches the reference (which may have changed) + * + * @param vc new {@link VariantContext} + * @param refSeq {@link ReferenceSequence} of new reference + * @param alleleMap a {@link Map} mapping the old alleles to the new alleles (for fixing the genotypes) + * @param source the original {@link VariantContext} to use for putting the original location information into vc + * @return true if successful, false if failed due to mismatching reference allele. + */ + private boolean tryToAddVariant(final VariantContext vc, final ReferenceSequence refSeq, final Map alleleMap, final VariantContext source) { + + final VariantContextBuilder builder = new VariantContextBuilder(vc); + + + builder.genotypes(fixGenotypes(source.getGenotypes(), alleleMap)); + builder.filters(source.getFilters()); + builder.log10PError(source.getLog10PError()); + builder.attributes(source.getAttributes()); + builder.id(source.getID()); + + if (WRITE_ORIGINAL_POSITION) { + builder.attribute(ORIGINAL_CONTIG, source.getContig()); + builder.attribute(ORIGINAL_START, source.getStart()); + } + + // Check that the reference allele still agrees with the reference sequence + boolean mismatchesReference = false; + for (final Allele allele : builder.getAlleles()) { + if (allele.isReference()) { + final byte[] ref = refSeq.getBases(); + final String refString = StringUtil.bytesToString(ref, vc.getStart() - 1, vc.getEnd()-vc.getStart()+1); + + if (!refString.equalsIgnoreCase(allele.getBaseString())) { + mismatchesReference = true; + } + break; + } + } + + if (mismatchesReference) { + rejects.add(new VariantContextBuilder(source) + .filter(FILTER_MISMATCHING_REF_ALLELE) + .attribute(ATTEMPTED_LOCUS, String.format("%s:%d-%d",vc.getContig(),vc.getStart(),vc.getEnd())) + .make()); + return false; + } else { + sorter.add(builder.make()); + return true; + } + } + + + protected static VariantContext liftSnp(final VariantContext source, final Interval target , final ReferenceSequence referenceSequence){ + // Fix the alleles if we went from positive to negative strand + final Map reverseComplementAlleleMap = new HashMap<>(2); + + final List alleles = new ArrayList<>(); + + for (final Allele oldAllele : source.getAlleles()) { + if (target.isPositiveStrand() || oldAllele.isSymbolic()) { + alleles.add(oldAllele); + } else { + final Allele fixedAllele = Allele.create(SequenceUtil.reverseComplement(oldAllele.getBaseString()), oldAllele.isReference()); + alleles.add(fixedAllele); + reverseComplementAlleleMap.put(oldAllele, fixedAllele); + } + } + + // Build the new variant context + final VariantContextBuilder builder = new VariantContextBuilder( + source.getSource(), + target.getContig(), + target.getStart(), + target.getEnd(), + alleles); + + builder.id(source.getID()); + + return builder.make(); + } + + /** + * @param source original variant context + * @param liftOver the LiftOver object to use for flipping + * @param referenceSequence the reference sequence of the target + * @return a flipped variant-context. + */ + protected static VariantContext flipIndel(final VariantContext source, final LiftOver liftOver, final ReferenceSequence referenceSequence) { + if (!source.isBiallelic()) return null; //only supporting biallelic indels, for now. + + final Interval originalLocus = new Interval(source.getContig(), source.getStart(), source.getEnd()); + final Locatable target = liftOver.liftOver(originalLocus); + + if (target == null) return null; + + // a boolean to protect against trying to access the -1 position in the reference array + final boolean addToStart = target.getStart() > 1; + + final Map reverseComplementAlleleMap = new HashMap<>(2); + + reverseComplementAlleleMap.clear(); + final List alleles = new ArrayList<>(); + + for (final Allele oldAllele : source.getAlleles()) { + // target.getStart is 1-based, reference bases are 0-based + final StringBuilder alleleBuilder = new StringBuilder(target.getEnd() - target.getStart() + 1); + + if (addToStart) alleleBuilder.append((char) referenceSequence.getBases()[target.getStart() - 2]); + alleleBuilder.append(SequenceUtil.reverseComplement(oldAllele.getBaseString().substring(1, oldAllele.length()))); + if (!addToStart) alleleBuilder.append((char) referenceSequence.getBases()[target.getEnd() - 1]); + + final Allele fixedAllele = Allele.create(alleleBuilder.toString(), oldAllele.isReference()); + alleles.add(fixedAllele); + reverseComplementAlleleMap.put(oldAllele, fixedAllele); + } + + final VariantContextBuilder builder = new VariantContextBuilder(source.getSource(), + target.getContig(), + target.getStart() - (addToStart ? 1 : 0), + target.getEnd() - (addToStart ? 1 : 0), + alleles); + + builder.id(source.getID()); + builder.attributes(source.getAttributes()); + + builder.genotypes(fixGenotypes(source.getGenotypes(), reverseComplementAlleleMap)); + builder.filters(source.getFilters()); + builder.log10PError(source.getLog10PError()); + + return leftAlignVariant(builder.make(), referenceSequence); + } + + protected static GenotypesContext fixGenotypes(final GenotypesContext originals, final Map alleleMap) { // optimization: if nothing needs to be fixed then don't bother - if ( reverseComplementAlleleMap.isEmpty() ) { + if (alleleMap.isEmpty()) { return originals; } final GenotypesContext fixedGenotypes = GenotypesContext.create(originals.size()); - for ( final Genotype genotype : originals ) { - final List fixedAlleles = new ArrayList(); - for ( final Allele allele : genotype.getAlleles() ) { - final Allele fixedAllele = reverseComplementAlleleMap.containsKey(allele) ? reverseComplementAlleleMap.get(allele) : allele; + for (final Genotype genotype : originals) { + final List fixedAlleles = new ArrayList<>(); + for (final Allele allele : genotype.getAlleles()) { + final Allele fixedAllele = alleleMap.getOrDefault(allele, allele); fixedAlleles.add(fixedAllele); } fixedGenotypes.add(new GenotypeBuilder(genotype).alleles(fixedAlleles).make()); } return fixedGenotypes; } -} + + /** + * Normalizes and left aligns a {@link VariantContext}. + * + * Based on Adrian Tan, Gonçalo R. Abecasis and Hyun Min Kang. (2015) + * Unified Representation of Genetic Variants. Bioinformatics. + * + * @param vc the {@link VariantContext} to be normalized + * @param referenceSequence the {@link ReferenceSequence} of the same contig as vc + * @return a new {@link VariantContext} which represents the same variation as vc but has + * been normalized and left-aligned + */ + protected static VariantContext leftAlignVariant(final VariantContext vc, final ReferenceSequence referenceSequence) { + + boolean changesInAlleles = true; + + int start = vc.getStart(); + int end = vc.getEnd(); + + if (!vc.getContig().equals(referenceSequence.getName())) { + throw new IllegalArgumentException("vc contig doesn't match that of supplied reference: " + vc.getContig() + " != " + referenceSequence.getName()); + } + + final Map alleleBasesMap = new HashMap<>(); + vc.getAlleles().forEach(a -> alleleBasesMap.put(a, a.getBases())); + + // 1. while changes in alleles do + while (changesInAlleles) { + + changesInAlleles = false; + // 2. if alleles end with the same nucleotide then + if (alleleBasesMap.values().stream() + .collect(Collectors.groupingBy(a -> a[a.length - 1], Collectors.toSet())) + .size() == 1 && end > 1) { + // 3. truncate rightmost nucleotide of each allele + for (final Allele allele : alleleBasesMap.keySet()) { + alleleBasesMap.put(allele, truncateBase(alleleBasesMap.get(allele), true)); + } + changesInAlleles = true; + end--; + // 4. end if + } + + // 5. if there exists an empty allele then + if (alleleBasesMap.values().stream() + .map(a -> a.length) + .anyMatch(l -> l == 0)) { + // 6. extend alleles 1 nucleotide to the left + for (final Allele allele : alleleBasesMap.keySet()) { + // the first -1 for zero-base (getBases) versus 1-based (variant position) + // another -1 to get the base prior to the location of the start of the allele + final byte extraBase = (start > 1) ? + referenceSequence.getBases()[start - 2] : + referenceSequence.getBases()[end]; + + alleleBasesMap.put(allele, extendOneBase(alleleBasesMap.get(allele), extraBase)); + } + changesInAlleles = true; + start--; + + // 7. end if + } + } + + // 8. while leftmost nucleotide of each allele are the same and all alleles have length 2 or more do + while (alleleBasesMap.values().stream() + .allMatch(a -> a.length >= 2) && + + alleleBasesMap.values().stream() + .collect(Collectors.groupingBy(a -> a[0], Collectors.toSet())) + .size() == 1 + ) { + + //9. truncate the leftmost base of the alleles + for (final Allele allele : alleleBasesMap.keySet()) { + alleleBasesMap.put(allele, truncateBase(alleleBasesMap.get(allele), false)); + } + start++; + } + + final VariantContextBuilder builder = new VariantContextBuilder(vc); + builder.start(start); + builder.stop(end); + + final Map fixedAlleleMap = alleleBasesMap.entrySet().stream() + .collect(Collectors.toMap(Map.Entry::getKey, me -> Allele.create(me.getValue(), me.getKey().isReference()))); + builder.alleles(fixedAlleleMap.values()); + builder.genotypes(fixGenotypes(vc.getGenotypes(), fixedAlleleMap)); + + return builder.make(); + } + + private static byte[] truncateBase(final byte[] allele, final boolean truncateRightmost) { + return Arrays.copyOfRange(allele, truncateRightmost ? 0 : 1, truncateRightmost ? + allele.length - 1 : + allele.length); + } + + //creates a new byte array with the base added at the begining + private static byte[] extendOneBase(final byte[] bases, final byte base) { + + final byte[] newBases = new byte[bases.length + 1]; + + System.arraycopy(bases, 0, newBases, 1, bases.length); + newBases[0] = base; + + return newBases; + } +} \ No newline at end of file diff --git a/src/test/java/picard/vcf/LiftoverVcfTest.java b/src/test/java/picard/vcf/LiftoverVcfTest.java index 758967e4d..acabe826a 100644 --- a/src/test/java/picard/vcf/LiftoverVcfTest.java +++ b/src/test/java/picard/vcf/LiftoverVcfTest.java @@ -1,5 +1,8 @@ package picard.vcf; +import htsjdk.samtools.liftover.LiftOver; +import htsjdk.samtools.reference.ReferenceSequence; +import htsjdk.samtools.util.CollectionUtil; import htsjdk.samtools.util.IOUtil; import htsjdk.variant.variantcontext.*; import htsjdk.variant.vcf.VCFFileReader; @@ -12,9 +15,11 @@ import java.io.File; import java.util.*; +import static picard.vcf.LiftoverVcf.leftAlignVariant; + /** * Test class for LiftoverVcf. - * + *

* Created by ebanks on 8/11/15. */ public class LiftoverVcfTest extends CommandLineProgramTest { @@ -34,11 +39,21 @@ public void teardown() { IOUtil.deleteDirectoryTree(OUTPUT_DATA_PATH); } - @Test - public void testDoNotFixReverseComplementedIndels() { + @DataProvider(name = "liftoverReverseStrand") + public Object[][] liftoverReverseStrand() { + return new Object[][]{ + {"testLiftoverBiallelicIndels.vcf", 3, 0}, + {"testLiftoverMultiallelicIndels.vcf", 0, 2}, + {"testLiftoverFailingVariants.vcf", 0, 3}, + }; + } + + + @Test(dataProvider = "liftoverReverseStrand" ) + public void testReverseComplementedIndels(final String filename, final int expectedPassing, final int expectedFailing) { final File liftOutputFile = new File(OUTPUT_DATA_PATH, "lift-delete-me.vcf"); final File rejectOutputFile = new File(OUTPUT_DATA_PATH, "reject-delete-me.vcf"); - final File input = new File(TEST_DATA_PATH, "testLiftover.vcf"); + final File input = new File(TEST_DATA_PATH, filename); liftOutputFile.deleteOnExit(); rejectOutputFile.deleteOnExit(); @@ -54,45 +69,10 @@ public void testDoNotFixReverseComplementedIndels() { Assert.assertEquals(runPicardCommandLine(args), 0); final VCFFileReader liftReader = new VCFFileReader(liftOutputFile, false); - for (final VariantContext inputContext : liftReader) { - Assert.fail("there should be no passing indels in the liftover"); - } - final VCFFileReader rejectReader = new VCFFileReader(rejectOutputFile, false); - int counter = 0; - for (final VariantContext inputContext : rejectReader) { - counter++; - } - Assert.assertEquals(counter, 2, "the wrong number of rejected indels failed the liftover"); - } + Assert.assertEquals(liftReader.iterator().stream().count(), expectedPassing, "The wrong number of variants was lifted over"); - @Test - public void testFixReverseComplementedGenotypes() { - - final Allele refA = Allele.create("A", true); - final Allele altC = Allele.create("C", false); - final GenotypesContext originalGenotypes = GenotypesContext.create(3); - originalGenotypes.add(new GenotypeBuilder("homref").alleles(Arrays.asList(refA, refA)).make()); - originalGenotypes.add(new GenotypeBuilder("het").alleles(Arrays.asList(refA, altC)).make()); - originalGenotypes.add(new GenotypeBuilder("homvar").alleles(Arrays.asList(altC, altC)).make()); - - final Allele refT = Allele.create("T", true); - final Allele altG = Allele.create("G", false); - final GenotypesContext expectedGenotypes = GenotypesContext.create(3); - expectedGenotypes.add(new GenotypeBuilder("homref").alleles(Arrays.asList(refT, refT)).make()); - expectedGenotypes.add(new GenotypeBuilder("het").alleles(Arrays.asList(refT, altG)).make()); - expectedGenotypes.add(new GenotypeBuilder("homvar").alleles(Arrays.asList(altG, altG)).make()); - - final Map reverseComplementAlleleMap = new HashMap(2); - reverseComplementAlleleMap.put(refA, refT); - reverseComplementAlleleMap.put(altC, altG); - final GenotypesContext actualGenotypes = LiftoverVcf.fixGenotypes(originalGenotypes, reverseComplementAlleleMap); - - for ( final String sample : Arrays.asList("homref", "het", "homvar") ) { - final List expected = expectedGenotypes.get(sample).getAlleles(); - final List actual = actualGenotypes.get(sample).getAlleles(); - Assert.assertEquals(expected.get(0), actual.get(0)); - Assert.assertEquals(expected.get(1), actual.get(1)); - } + final VCFFileReader rejectReader = new VCFFileReader(rejectOutputFile, false); + Assert.assertEquals(rejectReader.iterator().stream().count(), expectedFailing, "The wrong number of variants was lifted over"); } @DataProvider(name = "dataTestMissingContigInReference") @@ -104,7 +84,7 @@ public void testFixReverseComplementedGenotypes() { } @Test(dataProvider = "dataTestMissingContigInReference") - public void testMissingContigInReference(boolean warnOnMissingContext, int expectedReturnCode) { + public void testMissingContigInReference(final boolean warnOnMissingContext, final int expectedReturnCode) { final File liftOutputFile = new File(OUTPUT_DATA_PATH, "lift-delete-me.vcf"); final File rejectOutputFile = new File(OUTPUT_DATA_PATH, "reject-delete-me.vcf"); final File input = new File(TEST_DATA_PATH, "testLiftoverUsingMissingContig.vcf"); @@ -134,10 +114,10 @@ public void testMissingContigInReference(boolean warnOnMissingContext, int expec } @Test(dataProvider = "dataTestWriteOriginalPosition") - public void testWriteOriginalPosition(boolean shouldWriteOriginalPosition) { + public void testWriteOriginalPosition(final boolean shouldWriteOriginalPosition) { final File liftOutputFile = new File(OUTPUT_DATA_PATH, "lift-delete-me.vcf"); final File rejectOutputFile = new File(OUTPUT_DATA_PATH, "reject-delete-me.vcf"); - final File input = new File(TEST_DATA_PATH, "testLiftover.vcf"); + final File input = new File(TEST_DATA_PATH, "testLiftoverBiallelicIndels.vcf"); liftOutputFile.deleteOnExit(); rejectOutputFile.deleteOnExit(); @@ -154,17 +134,398 @@ public void testWriteOriginalPosition(boolean shouldWriteOriginalPosition) { runPicardCommandLine(args); - try (VCFFileReader liftReader = new VCFFileReader(liftOutputFile, false)) { - for (VariantContext vc : liftReader) { + try (final VCFFileReader liftReader = new VCFFileReader(liftOutputFile, false)) { + for (final VariantContext vc : liftReader) { if (shouldWriteOriginalPosition) { Assert.assertNotNull(vc.getAttribute(LiftoverVcf.ORIGINAL_CONTIG)); Assert.assertNotNull(vc.getAttribute(LiftoverVcf.ORIGINAL_START)); - } - else { + } else { Assert.assertFalse(vc.hasAttribute(LiftoverVcf.ORIGINAL_CONTIG)); Assert.assertFalse(vc.hasAttribute(LiftoverVcf.ORIGINAL_START)); } } } } + + @DataProvider(name = "indelFlipData") + public Iterator indelFlipData() { + + final ReferenceSequence reference = new ReferenceSequence("chr1", 0, + "CAAAAAAAAAACGTACGTACTCTCTCTCTACGT".getBytes()); + // 123456789 123456789 123456789 123 + + final Allele RefCAA = Allele.create("CAA", true); + final Allele RefGTT = Allele.create("GTT", true); + final Allele RefACGT = Allele.create("ACGT", true); + final Allele RefAACG = Allele.create("AACG", true); + final Allele RefTTT = Allele.create("TTT", true); + final Allele RefCG = Allele.create("CG", true); + final Allele RefT = Allele.create("T", true); + final Allele RefA = Allele.create("A", true); + final Allele RefC = Allele.create("C", true); + final Allele RefG = Allele.create("G", true); + + final Allele A = Allele.create("A", false); + final Allele T = Allele.create("T", false); + final Allele C = Allele.create("C", false); + final Allele CG = Allele.create("CG", false); + final Allele GA = Allele.create("GA", false); + final Allele TC = Allele.create("TC", false); + final Allele CAA = Allele.create("CAA", false); + final Allele ACT = Allele.create("ACT", false); + final Allele TTT = Allele.create("TTT", false); + final Allele ATT = Allele.create("ATT", false); + final Allele GTT = Allele.create("GTT", false); + final Allele AAC= Allele.create("AAC", false); + final Allele TAG = Allele.create("TAG", false); + final Allele ACGT = Allele.create("ACGT", false); + final Allele AACG = Allele.create("AACG", false); + + final List tests = new ArrayList<>(); + + final int CHAIN_SIZE = 540; // the length of the single chain in CHAIN_FILE + + final VariantContextBuilder builder = new VariantContextBuilder().source("test1").chr("chr1"); + final VariantContextBuilder result_builder = new VariantContextBuilder().source("test1").chr("chr1"); + + // simple deletion + // TTT*/T -> AAA*/A turns into left-aligned CAA*/C at position 1 + int start = CHAIN_SIZE - 3 ; + int stop = start + 2; + builder.start(start).stop(stop).alleles(CollectionUtil.makeList(RefTTT, T)); + result_builder.start(1).stop(3).alleles(CollectionUtil.makeList(RefCAA, C)); + tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + + //simple insertion + // T*/TTT -> A*/AAA -> turns into left-aligned C*/CAA at position 1 + stop = start; + builder.source("test2").alleles(CollectionUtil.makeList(RefT, TTT)).stop(stop); + result_builder.alleles(CollectionUtil.makeList(RefC, CAA)).start(1).stop(1); + tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + + // non-simple deletion + // ACGT(T)*/A(T) -> AACG(T)*/A(T) + // position of 13 ^ in result + start = CHAIN_SIZE - 13; + stop = start + 3; + + builder.source("test3").start(start).stop(stop).alleles(CollectionUtil.makeList(RefACGT, A)); + result_builder.start(CHAIN_SIZE - stop).stop(CHAIN_SIZE - start).alleles(CollectionUtil.makeList(RefAACG, A)); + tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + + // "CAAAAAAAAAACG---CGTACTCTCTCTCTACGT" -- Allele A + // "CAAAAAAAAAACGacgCGTACTCTCTCTCTACGT" -- Allele B + + // is equivalent to + + // "CAAAAAAAAA---ACGCGTACTCTCTCTCTACGT" -- Allele A + // "CAAAAAAAAAACGacgCGTACTCTCTCTCTACGT" -- Allele B + + // non-simple insertion + // A(C)*/ACGT(C) -> G(T)*/GACG(T) -> gets moves 3 bases back due to left-aligning... + // position of 13 ^ in result + // ...and becomes A->AACG at position 10 + + start = CHAIN_SIZE - 13; + stop = start; + builder.source("test4").stop(stop).start(start).alleles(CollectionUtil.makeList(RefA, ACGT)); + result_builder.start(10).stop(10).alleles(CollectionUtil.makeList(RefA, AACG)); + tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + + // just outside of chain & contig, testing that we do not read into negative indices + // or reference + start = stop = CHAIN_SIZE; + builder.source("test5").stop(stop).start(start).alleles(CollectionUtil.makeList(RefG, GTT)); + result_builder.start(1).stop(1).alleles(CollectionUtil.makeList(RefC, AAC)); + tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + + // outside of chain + start = stop = CHAIN_SIZE + 1; + builder.source("test6").stop(stop).start(start).alleles(CollectionUtil.makeList(RefA, ACGT)); + tests.add(new Object[]{builder.make(), reference, null}); + + // MNP + // GTT*(T)/ACGT(T) -> AAA(C)*/AACG(T) -> which is then normalized to A*/CG at position 11 + // pos 11 ^ in the result + start = CHAIN_SIZE - 11; + stop = start + 2; + builder.source("test7").stop(stop).start(start).alleles(CollectionUtil.makeList(RefGTT, ACGT)); + result_builder.start(11).stop(11).alleles(CollectionUtil.makeList(RefA, CG)); + tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + + // MNP + // ACGT*(T)/ATT*(T) -> AACG(T)*/AAA(T) -> by normalization CG(T)*/A(T) + // pos 13 ^ in the result + start = CHAIN_SIZE - 13; + stop = start + 3; + builder.source("test8").stop(stop).start(start).alleles(CollectionUtil.makeList(RefACGT, ATT)); + result_builder.start(CHAIN_SIZE - stop + 2).stop(CHAIN_SIZE - stop + 3).alleles(CollectionUtil.makeList(RefCG, A)); + tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + + // needs left-aligning + // T->TAG --> T(A)/TCT(A) -> by normalization A/ACT @ 19 + // position 29 ^ + start = CHAIN_SIZE - 29; + stop = start; + builder.source("test9").stop(stop).start(start).alleles(CollectionUtil.makeList(RefT, TAG)); + result_builder.start(19).stop(19).alleles(CollectionUtil.makeList(RefA, ACT)); + tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + + // insertion at end of section + // a test that converts the initial C to a AC which requires + // code in LiftOver::extendOneBase(., false) + // + // G*(.)/GA(.) -> .(C)/.T(C) but that's not legal, so + // -> C/TC + start = CHAIN_SIZE; + stop = CHAIN_SIZE; + + builder.start(start).stop(stop).alleles(CollectionUtil.makeList(RefG, GA)); + result_builder.start(1).stop(1).alleles(CollectionUtil.makeList(RefC, TC)); + tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + + // insertion at end of section + // a test that converts the initial C to a AC which requires + // code in LiftOver::extendOneBase(., false) + // + // G*(.)/GG(.) -> .(C)/.C(C) but that's not legal, so + // -> C/CC + start = CHAIN_SIZE; + stop = CHAIN_SIZE; + + builder.start(start).stop(stop).alleles(CollectionUtil.makeList(RefG, Allele.create("GG"))); + result_builder.start(1).stop(1).alleles(CollectionUtil.makeList(RefC, Allele.create("CC"))); + tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + + // insertion at end of section + // a test that converts the initial C to a AC which requires + // code in LiftOver::extendOneBase(., false) + // + // improperly aligned + // TTTT(G)->TTTTG(G) -> C/CC + start = CHAIN_SIZE - 4; + stop = CHAIN_SIZE - 1; + + builder.start(start).stop(stop).alleles(CollectionUtil.makeList(Allele.create("TTTT",true), Allele.create("TTTTG"))); + result_builder.start(1).stop(1).alleles(CollectionUtil.makeList(RefC, Allele.create("CC"))); + tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + + + return tests.iterator(); + } + + @Test(dataProvider = "indelFlipData") + public void testFlipIndel(final VariantContext source, final ReferenceSequence reference, final VariantContext result) { + + final LiftOver liftOver = new LiftOver(CHAIN_FILE); + + final VariantContext flipped = LiftoverVcf.flipIndel(source, liftOver, reference); + + assertVcAreEqual(flipped, result); + } + + @DataProvider(name = "leftAlignAllelesData") + public Iterator leftAlignAllelesData() { + + final ReferenceSequence reference = new ReferenceSequence("chr1", 0, + "CAAAAAAAAAACGTACGTACTCTCTCTCTACGT".getBytes()); + // 123456789 123456789 123456789 123 + + final Allele RefG = Allele.create("G", true); + final Allele A = Allele.create("A", false); + + final Allele RefA = Allele.create("A", true); + final Allele RefC = Allele.create("C", true); + final Allele RefAA = Allele.create("AA", true); + final Allele RefCA = Allele.create("CA", true); + final Allele RefCT = Allele.create("CT", true); + final Allele RefTC = Allele.create("TC", true); + final Allele RefACT = Allele.create("ACT", true); + final Allele RefCTCT = Allele.create("CTCT", true); + final Allele RefTCTC = Allele.create("TCTC", true); + + final Allele AA = Allele.create("AA", false); + final Allele C = Allele.create("C", false); + final Allele CA = Allele.create("CA", false); + final Allele ACT = Allele.create("ACT", false); + final Allele CTCT = Allele.create("CTCT", false); + final Allele TCTC = Allele.create("TCTC", false); + final Allele CT = Allele.create("CT", false); + final Allele TC = Allele.create("TC", false); + + final Allele T = Allele.create("T", false); + + final List tests = new ArrayList<>(); + + final VariantContextBuilder builder = new VariantContextBuilder().source("test1").chr("chr1"); + final VariantContextBuilder result_builder = new VariantContextBuilder().source("test1").chr("chr1"); + + // left aligning at the edge of the reference + // simple SNP + // CAAA*/CCAAA -> C->CC + int start = 1; + int stop = 4; + builder.start(start).stop(stop).alleles(CollectionUtil.makeList(Allele.create("CAAA",true), Allele.create("CCAAA") )); + + result_builder.start(1).stop(1).alleles(CollectionUtil.makeList(RefC, Allele.create("CC"))); + + tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + + + // simple SNP + // G*/A -> G/A + start = 13; + stop = start; + builder.start(start).stop(stop).alleles(CollectionUtil.makeList(RefG, A)); + result_builder.start(stop).stop(start).alleles(CollectionUtil.makeList(RefG, A)); + tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + + + for (start = 1; start <= reference.getBases().length; start++) { + builder.start(start).stop(start); + builder.alleles(CollectionUtil.makeList( + Allele.create(reference.getBaseString().substring(start - 1, start), true), + reference.getBaseString().charAt(start - 1) == 'A' ? T : A)); + + tests.add(new Object[]{builder.make(), reference, builder.make()}); + } + + // AA/A in initial polyA repeat -> CA/C at the beginning + result_builder.start(1).stop(2).alleles(CollectionUtil.makeList(RefCA, C)); + for (start = 2; start <= 11; start++) { + builder.start(start).stop(start + 1); + builder.alleles(CollectionUtil.makeList( + RefAA, A)); + + tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + } + + // A/AA in initial polyA repeat -> C/CA at the beginning + result_builder.start(1).stop(1).alleles(CollectionUtil.makeList(RefC, CA)); + for (start = 2; start <= 11; start++) { + builder.start(start).stop(start); + builder.alleles(CollectionUtil.makeList( + RefA, AA)); + + tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + } + + + + //CT/CTCT -> A/ACT in CT repeat region + result_builder.start(19).stop(19).alleles(CollectionUtil.makeList(RefA, ACT)); + for (start = 20; start <= 27; start += 2) { + builder.start(start).stop(start + 1); + builder.alleles(CollectionUtil.makeList( + RefCT, CTCT)); + + tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + } + + //TC/TCTC -> A/ACT in CT repeat region + for (start = 21; start <= 29; start += 2) { + builder.start(start).stop(start + 1); + builder.alleles(CollectionUtil.makeList( + RefTC, TCTC)); + + tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + } + + //CTCT/CT -> ACT/A in CT repeat region + result_builder.start(19).stop(21).alleles(CollectionUtil.makeList(RefACT, A)); + for (start = 20; start <= 27; start += 2) { + builder.start(start).stop(start + 3); + builder.alleles(CollectionUtil.makeList( + RefCTCT, CT)); + + tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + } + + //TCTC/TC-> ACT/A in CT repeat region + for (start = 21; start <= 29; start += 2) { + builder.start(start).stop(start + 3); + builder.alleles(CollectionUtil.makeList( + RefTCTC, TC)); + + tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + } + + // for ease of reading, here's the reference sequence + // "CAAAAAAAAAACGTACGTACTCTCTCTCTACGT" + // 123456789 123456789 123456789 123 + + result_builder.alleles("AACGT", "A").start(10).stop(14); + for (start = 10; start < 17; start++) { + for (stop = start + 4; stop < 20; stop++) { + builder.alleles( + // -1 here due to reference string being 0-based. + reference.getBaseString().substring(start - 1, stop + 1 - 1), + reference.getBaseString().substring(start - 1, stop - 3 - 1)).start(start).stop(stop); + tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + } + } + + // test vc with genotypes: + + result_builder.start(10).stop(14).alleles("AACGT", "A", "AACG", "AACGTACGT"); + builder.start(9).stop(18).alleles("AAACGTACGT", "AAACGT", "AAACGACGT", "AAACGTACGTACGT"); + final Collection genotypes = new ArrayList<>(); + final Collection results_genotypes = new ArrayList<>(); + final GenotypeBuilder gBuilder = new GenotypeBuilder(); + + for (int sample = 1; sample < 4; sample++) { + gBuilder.name("sample" + sample) + .alleles(CollectionUtil.makeList(builder.getAlleles().get(0), builder.getAlleles().get(sample))); + genotypes.add(gBuilder.make()); + gBuilder.alleles(CollectionUtil.makeList(result_builder.getAlleles().get(0), result_builder.getAlleles().get(sample))); + results_genotypes.add(gBuilder.make()); + } + + gBuilder.name("sample_non_ref_het") + .alleles(CollectionUtil.makeList(builder.getAlleles().get(1), builder.getAlleles().get(2))); + genotypes.add(gBuilder.make()); + gBuilder.alleles(CollectionUtil.makeList(result_builder.getAlleles().get(1), result_builder.getAlleles().get(2))); + results_genotypes.add(gBuilder.make()); + + builder.genotypes(genotypes); + result_builder.genotypes(results_genotypes); + + tests.add(new Object[]{builder.make(), reference, result_builder.make()}); + + return tests.iterator(); + } + + @Test(dataProvider = "leftAlignAllelesData") + public void testLeftAlignVariants(final VariantContext source, final ReferenceSequence reference, final VariantContext result) { + + assertVcAreEqual(leftAlignVariant(source, reference), result); + } + + private void assertVcAreEqual(final VariantContext actual, final VariantContext expected) { + + if (expected == null) { + Assert.assertNull(actual); + return; + } + + Assert.assertNotNull(actual, "null status"); + Assert.assertEquals(actual.getContig(), expected.getContig(),"Different contigs: "); + Assert.assertEquals(actual.getStart(), expected.getStart(), "Different starts: "); + Assert.assertEquals(actual.getEnd(), expected.getEnd(), "Different ends: "); + + Assert.assertTrue(actual.hasSameAllelesAs(expected), "Alleles differ between " + actual + " and " + expected + ": "); + assertGenotypeContextsAreEquals(actual.getGenotypes(), expected.getGenotypes()); + } + + private void assertGenotypeContextsAreEquals(final GenotypesContext actual, final GenotypesContext expected) { + if (expected == null) { + Assert.assertNull(actual); + return; + } + Assert.assertEquals(actual.getSampleNamesOrderedByName(), expected.getSampleNamesOrderedByName(), "Sample names differ"); + + for (final String name : expected.getSampleNamesOrderedByName()) { + Assert.assertEquals(actual.get(name).getAlleles(), expected.get(name).getAlleles(),"Alleles differ for sample "+ name); + } + } } diff --git a/testdata/picard/vcf/testLiftover.vcf b/testdata/picard/vcf/testLiftoverBiallelicIndels.vcf similarity index 63% rename from testdata/picard/vcf/testLiftover.vcf rename to testdata/picard/vcf/testLiftoverBiallelicIndels.vcf index e6161ed82..cf058794b 100644 --- a/testdata/picard/vcf/testLiftover.vcf +++ b/testdata/picard/vcf/testLiftoverBiallelicIndels.vcf @@ -1,4 +1,5 @@ ##fileformat=VCFv4.1 #CHROM POS ID REF ALT QUAL FILTER INFO chr1 1 . C CCCCT 15676.17 PASS . -chr1 61 . CA C 724.43 PASS . +chr1 61 . GT G 724.43 PASS . +chr1 72 . T A 100 PASS . \ No newline at end of file diff --git a/testdata/picard/vcf/testLiftoverFailingVariants.vcf b/testdata/picard/vcf/testLiftoverFailingVariants.vcf new file mode 100644 index 000000000..d128062a2 --- /dev/null +++ b/testdata/picard/vcf/testLiftoverFailingVariants.vcf @@ -0,0 +1,5 @@ +##fileformat=VCFv4.1 +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 1 . C T 100 PASS . +chr1 61 . A T 100 PASS . +chr1 71 . CA C 100 PASS . diff --git a/testdata/picard/vcf/testLiftoverMultiallelicIndels.vcf b/testdata/picard/vcf/testLiftoverMultiallelicIndels.vcf new file mode 100644 index 000000000..b37615299 --- /dev/null +++ b/testdata/picard/vcf/testLiftoverMultiallelicIndels.vcf @@ -0,0 +1,4 @@ +##fileformat=VCFv4.1 +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 1 . C CCCCT,CCCt 15676.17 PASS . +chr1 61 . CA C,CAA 724.43 PASS .