";
- @Option(shortName=StandardOptionDefinitions.INPUT_SHORT_NAME, doc="The input VCF/BCF file to be lifted over.")
+ @Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "The input VCF/BCF file to be lifted over.")
public File INPUT;
- @Option(shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc="The output location to write the lifted over VCF/BCF to.")
+ @Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "The output location to write the lifted over VCF/BCF to.")
public File OUTPUT;
- @Option(shortName="C", doc="The liftover chain file. See https://genome.ucsc.edu/goldenPath/help/chain.html for a description" +
+ @Option(shortName = "C", doc = "The liftover chain file. See https://genome.ucsc.edu/goldenPath/help/chain.html for a description" +
" of chain files. See http://hgdownload.soe.ucsc.edu/downloads.html#terms for where to download chain files.")
public File CHAIN;
- @Option(doc="File to which to write rejected records.")
+ @Option(doc = "File to which to write rejected records.")
public File REJECT;
- @Option(shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME, common=false,
+ @Option(shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME, common = false,
doc = "The reference sequence (fasta) for the TARGET genome build. The fasta file must have an " +
"accompanying sequence dictionary (.dict file).")
public File REFERENCE_SEQUENCE = Defaults.REFERENCE_FASTA;
@@ -104,42 +90,59 @@
// When a contig used in the chain is not in the reference, exit with this value instead of 0.
protected static int EXIT_CODE_WHEN_CONTIG_NOT_IN_REFERENCE = 1;
- /** Filter name to use when a target cannot be lifted over. */
+ /**
+ * Filter name to use when a target cannot be lifted over.
+ */
public static final String FILTER_CANNOT_LIFTOVER_INDEL = "ReverseComplementedIndel";
- /** Filter name to use when a target cannot be lifted over. */
+ /**
+ * Filter name to use when a target cannot be lifted over.
+ */
public static final String FILTER_NO_TARGET = "NoTarget";
- /** Filter name to use when a target is lifted over, but the reference allele doens't match the new reference. */
+ /**
+ * Filter name to use when a target is lifted over, but the reference allele doens't match the new reference.
+ */
public static final String FILTER_MISMATCHING_REF_ALLELE = "MismatchedRefAllele";
- /** Filters to be added to the REJECT file. */
+ /**
+ * Filters to be added to the REJECT file.
+ */
private static final List FILTERS = CollectionUtil.makeList(
new VCFFilterHeaderLine(FILTER_CANNOT_LIFTOVER_INDEL, "Indel falls into a reverse complemented region in the target genome."),
new VCFFilterHeaderLine(FILTER_NO_TARGET, "Variant could not be lifted between genome builds."),
new VCFFilterHeaderLine(FILTER_MISMATCHING_REF_ALLELE, "Reference allele does not match reference genome sequence after liftover.")
);
- /** Attribute used to store the name of the source contig/chromosome prior to liftover. */
+ /**
+ * Attribute used to store the name of the source contig/chromosome prior to liftover.
+ */
public static final String ORIGINAL_CONTIG = "OriginalContig";
- /** Attribute used to store the position of the variant on the source contig prior to liftover. */
+ /**
+ * Attribute used to store the position of the variant on the source contig prior to liftover.
+ */
public static final String ORIGINAL_START = "OriginalStart";
- /** Metadata to be added to the Passing file. */
+ /**
+ * Attribute used to store the position of the failed variant on the target contig prior to finding out that alleles do not match.
+ */
+ public static final String ATTEMPTED_LOCUS = "AttemptedLocus";
+
+ /**
+ * Metadata to be added to the Passing file.
+ */
private static final List ATTRS = CollectionUtil.makeList(
new VCFInfoHeaderLine(ORIGINAL_CONTIG, 1, VCFHeaderLineType.String, "The name of the source contig/chromosome prior to liftover."),
new VCFInfoHeaderLine(ORIGINAL_START, 1, VCFHeaderLineType.String, "The position of the variant on the source contig prior to liftover.")
);
+ private VariantContextWriter rejects;
private final Log log = Log.getInstance(LiftoverVcf.class);
+ private SortingCollection sorter;
- // Stock main method
- public static void main(final String[] args) {
- new LiftoverVcf().instanceMainWithExit(args);
- }
-
- @Override protected int doWork() {
+ @Override
+ protected int doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
IOUtil.assertFileIsReadable(CHAIN);
@@ -154,13 +157,12 @@ public static void main(final String[] args) {
log.info("Loading up the target reference genome.");
final ReferenceSequenceFileWalker walker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
- final Map refSeqs = new HashMap();
- for (final SAMSequenceRecord rec: walker.getSequenceDictionary().getSequences()) {
- refSeqs.put(rec.getSequenceName(), walker.get(rec.getSequenceIndex()).getBases());
+ final Map refSeqs = new HashMap<>();
+ for (final SAMSequenceRecord rec : walker.getSequenceDictionary().getSequences()) {
+ refSeqs.put(rec.getSequenceName(), walker.get(rec.getSequenceIndex()));
}
CloserUtil.close(walker);
-
////////////////////////////////////////////////////////////////////////
// Setup the outputs
////////////////////////////////////////////////////////////////////////
@@ -170,21 +172,22 @@ public static void main(final String[] args) {
if (WRITE_ORIGINAL_POSITION) {
for (final VCFInfoHeaderLine line : ATTRS) outHeader.addMetaDataLine(line);
}
- final VariantContextWriter out = new VariantContextWriterBuilder().setOption(Options.INDEX_ON_THE_FLY)
+ final VariantContextWriter out = new VariantContextWriterBuilder()
+ .setOption(Options.INDEX_ON_THE_FLY)
.modifyOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER, ALLOW_MISSING_FIELDS_IN_HEADER)
.setOutputFile(OUTPUT).setReferenceDictionary(walker.getSequenceDictionary()).build();
out.writeHeader(outHeader);
- final VariantContextWriter rejects = new VariantContextWriterBuilder().setOutputFile(REJECT).unsetOption(Options.INDEX_ON_THE_FLY)
+ rejects = new VariantContextWriterBuilder().setOutputFile(REJECT)
+ .unsetOption(Options.INDEX_ON_THE_FLY)
.modifyOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER, ALLOW_MISSING_FIELDS_IN_HEADER)
.build();
final VCFHeader rejectHeader = new VCFHeader(in.getFileHeader());
for (final VCFFilterHeaderLine line : FILTERS) rejectHeader.addMetaDataLine(line);
- if (WRITE_ORIGINAL_POSITION) {
- for (final VCFInfoHeaderLine line : ATTRS) rejectHeader.addMetaDataLine(line);
- }
- rejects.writeHeader(rejectHeader);
+ rejectHeader.addMetaDataLine(new VCFInfoHeaderLine(ATTEMPTED_LOCUS,1, VCFHeaderLineType.String, "The locus of the variant in the TARGET prior to failing due to mismatching alleles."));
+
+ rejects.writeHeader(rejectHeader);
////////////////////////////////////////////////////////////////////////
// Read the input VCF, lift the records over and write to the sorting
@@ -193,7 +196,7 @@ public static void main(final String[] args) {
long failedLiftover = 0, failedAlleleCheck = 0, total = 0;
log.info("Lifting variants over and sorting.");
- final SortingCollection sorter = SortingCollection.newInstance(VariantContext.class,
+ sorter = SortingCollection.newInstance(VariantContext.class,
new VCFRecordCodec(outHeader, ALLOW_MISSING_FIELDS_IN_HEADER || VALIDATION_STRINGENCY != ValidationStringency.STRICT),
outHeader.getVCFRecordComparator(),
MAX_RECORDS_IN_RAM,
@@ -201,15 +204,16 @@ public static void main(final String[] args) {
ProgressLogger progress = new ProgressLogger(log, 1000000, "read");
// a mapping from original allele to reverse complemented allele
- final Map reverseComplementAlleleMap = new HashMap(10);
+ final Map reverseComplementAlleleMap = new HashMap<>(10);
for (final VariantContext ctx : in) {
++total;
final Interval source = new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), false, ctx.getContig() + ":" + ctx.getStart() + "-" + ctx.getEnd());
final Interval target = liftOver.liftOver(source, LIFTOVER_MIN_MATCH);
+ final ReferenceSequence refSeq;
- // if the target is null OR (the target is reverse complemented AND the variant is an indel or mixed), then we cannot lift it over
- if (target == null || (target.isNegativeStrand() && (ctx.isMixed() || ctx.isIndel()))) {
+ // if the target is null OR (the target is reverse complemented AND the variant is a non-biallelic indel or mixed), then we cannot lift it over
+ if (target == null || (target.isNegativeStrand() && (ctx.isMixed() || (ctx.isIndel() && !ctx.isBiallelic())))) {
final String reason = (target == null) ? FILTER_NO_TARGET : FILTER_CANNOT_LIFTOVER_INDEL;
rejects.add(new VariantContextBuilder(ctx).filter(reason).make());
failedLiftover++;
@@ -217,72 +221,33 @@ public static void main(final String[] args) {
rejects.add(new VariantContextBuilder(ctx).filter(FILTER_NO_TARGET).make());
failedLiftover++;
- String missingContigMessage = "Encountered a contig, " + target.getContig() + " that is not part of the target reference.";
- if(WARN_ON_MISSING_CONTIG) {
+ final String missingContigMessage = "Encountered a contig, " + target.getContig() + " that is not part of the target reference.";
+ if (WARN_ON_MISSING_CONTIG) {
log.warn(missingContigMessage);
} else {
log.error(missingContigMessage);
return EXIT_CODE_WHEN_CONTIG_NOT_IN_REFERENCE;
}
- } else {
- // Fix the alleles if we went from positive to negative strand
- reverseComplementAlleleMap.clear();
- final List alleles = new ArrayList();
+ } else if (target.isNegativeStrand() && ctx.isIndel() && ctx.isBiallelic()) {
+ refSeq = refSeqs.get(target.getContig());
+ //flipping indels:
- for (final Allele oldAllele : ctx.getAlleles()) {
- if (target.isPositiveStrand() || oldAllele.isSymbolic()) {
- alleles.add(oldAllele);
- }
- else {
- final Allele fixedAllele = Allele.create(SequenceUtil.reverseComplement(oldAllele.getBaseString()), oldAllele.isReference());
- alleles.add(fixedAllele);
- reverseComplementAlleleMap.put(oldAllele, fixedAllele);
- }
- }
-
- // Build the new variant context
- final VariantContextBuilder builder = new VariantContextBuilder(
- ctx.getSource(),
- target.getContig(),
- target.getStart(),
- target.getEnd(),
- alleles);
-
- builder.id(ctx.getID());
- builder.attributes(ctx.getAttributes());
-
- if (WRITE_ORIGINAL_POSITION) {
- builder.attribute(ORIGINAL_CONTIG, source.getContig());
- builder.attribute(ORIGINAL_START, source.getStart());
- }
- builder.genotypes(fixGenotypes(ctx.getGenotypes(), reverseComplementAlleleMap));
- builder.filters(ctx.getFilters());
- builder.log10PError(ctx.getLog10PError());
-
- // Check that the reference allele still agrees with the reference sequence
- boolean mismatchesReference = false;
- for (final Allele allele : builder.getAlleles()) {
- if (allele.isReference()) {
- final byte[] ref = refSeqs.get(target.getContig());
- final String refString = StringUtil.bytesToString(ref, target.getStart()-1, target.length());
-
- if (!refString.equalsIgnoreCase(allele.getBaseString())) {
- mismatchesReference = true;
- }
-
- break;
+ final VariantContext flippedIndel = flipIndel(ctx, liftOver, refSeq);
+ if (flippedIndel == null) {
+ throw new IllegalArgumentException("Unexpectedly found null VC. This should have not happened.");
+ } else {
+ if (!tryToAddVariant(flippedIndel, refSeq, reverseComplementAlleleMap, ctx)){
+ failedAlleleCheck++;
}
}
+ } else {
+ refSeq = refSeqs.get(target.getContig());
+ final VariantContext liftedVariant = liftSnp(ctx, target, refSeq);
- if (mismatchesReference) {
- rejects.add(new VariantContextBuilder(ctx).filter(FILTER_MISMATCHING_REF_ALLELE).make());
+ if (!tryToAddVariant(liftedVariant, refSeq, reverseComplementAlleleMap, ctx)) {
failedAlleleCheck++;
}
- else {
- sorter.add(builder.make());
- }
}
-
progress.record(ctx.getContig(), ctx.getStart());
}
@@ -313,21 +278,261 @@ public static void main(final String[] args) {
return 0;
}
- protected static GenotypesContext fixGenotypes(final GenotypesContext originals, final Map reverseComplementAlleleMap) {
+ /**
+ * utility function to attempt to add a variant. Checks that the reference allele still matches the reference (which may have changed)
+ *
+ * @param vc new {@link VariantContext}
+ * @param refSeq {@link ReferenceSequence} of new reference
+ * @param alleleMap a {@link Map} mapping the old alleles to the new alleles (for fixing the genotypes)
+ * @param source the original {@link VariantContext} to use for putting the original location information into vc
+ * @return true if successful, false if failed due to mismatching reference allele.
+ */
+ private boolean tryToAddVariant(final VariantContext vc, final ReferenceSequence refSeq, final Map alleleMap, final VariantContext source) {
+
+ final VariantContextBuilder builder = new VariantContextBuilder(vc);
+
+
+ builder.genotypes(fixGenotypes(source.getGenotypes(), alleleMap));
+ builder.filters(source.getFilters());
+ builder.log10PError(source.getLog10PError());
+ builder.attributes(source.getAttributes());
+ builder.id(source.getID());
+
+ if (WRITE_ORIGINAL_POSITION) {
+ builder.attribute(ORIGINAL_CONTIG, source.getContig());
+ builder.attribute(ORIGINAL_START, source.getStart());
+ }
+
+ // Check that the reference allele still agrees with the reference sequence
+ boolean mismatchesReference = false;
+ for (final Allele allele : builder.getAlleles()) {
+ if (allele.isReference()) {
+ final byte[] ref = refSeq.getBases();
+ final String refString = StringUtil.bytesToString(ref, vc.getStart() - 1, vc.getEnd()-vc.getStart()+1);
+
+ if (!refString.equalsIgnoreCase(allele.getBaseString())) {
+ mismatchesReference = true;
+ }
+ break;
+ }
+ }
+
+ if (mismatchesReference) {
+ rejects.add(new VariantContextBuilder(source)
+ .filter(FILTER_MISMATCHING_REF_ALLELE)
+ .attribute(ATTEMPTED_LOCUS, String.format("%s:%d-%d",vc.getContig(),vc.getStart(),vc.getEnd()))
+ .make());
+ return false;
+ } else {
+ sorter.add(builder.make());
+ return true;
+ }
+ }
+
+
+ protected static VariantContext liftSnp(final VariantContext source, final Interval target , final ReferenceSequence referenceSequence){
+ // Fix the alleles if we went from positive to negative strand
+ final Map reverseComplementAlleleMap = new HashMap<>(2);
+
+ final List alleles = new ArrayList<>();
+
+ for (final Allele oldAllele : source.getAlleles()) {
+ if (target.isPositiveStrand() || oldAllele.isSymbolic()) {
+ alleles.add(oldAllele);
+ } else {
+ final Allele fixedAllele = Allele.create(SequenceUtil.reverseComplement(oldAllele.getBaseString()), oldAllele.isReference());
+ alleles.add(fixedAllele);
+ reverseComplementAlleleMap.put(oldAllele, fixedAllele);
+ }
+ }
+
+ // Build the new variant context
+ final VariantContextBuilder builder = new VariantContextBuilder(
+ source.getSource(),
+ target.getContig(),
+ target.getStart(),
+ target.getEnd(),
+ alleles);
+
+ builder.id(source.getID());
+
+ return builder.make();
+ }
+
+ /**
+ * @param source original variant context
+ * @param liftOver the LiftOver object to use for flipping
+ * @param referenceSequence the reference sequence of the target
+ * @return a flipped variant-context.
+ */
+ protected static VariantContext flipIndel(final VariantContext source, final LiftOver liftOver, final ReferenceSequence referenceSequence) {
+ if (!source.isBiallelic()) return null; //only supporting biallelic indels, for now.
+
+ final Interval originalLocus = new Interval(source.getContig(), source.getStart(), source.getEnd());
+ final Locatable target = liftOver.liftOver(originalLocus);
+
+ if (target == null) return null;
+
+ // a boolean to protect against trying to access the -1 position in the reference array
+ final boolean addToStart = target.getStart() > 1;
+
+ final Map reverseComplementAlleleMap = new HashMap<>(2);
+
+ reverseComplementAlleleMap.clear();
+ final List alleles = new ArrayList<>();
+
+ for (final Allele oldAllele : source.getAlleles()) {
+ // target.getStart is 1-based, reference bases are 0-based
+ final StringBuilder alleleBuilder = new StringBuilder(target.getEnd() - target.getStart() + 1);
+
+ if (addToStart) alleleBuilder.append((char) referenceSequence.getBases()[target.getStart() - 2]);
+ alleleBuilder.append(SequenceUtil.reverseComplement(oldAllele.getBaseString().substring(1, oldAllele.length())));
+ if (!addToStart) alleleBuilder.append((char) referenceSequence.getBases()[target.getEnd() - 1]);
+
+ final Allele fixedAllele = Allele.create(alleleBuilder.toString(), oldAllele.isReference());
+ alleles.add(fixedAllele);
+ reverseComplementAlleleMap.put(oldAllele, fixedAllele);
+ }
+
+ final VariantContextBuilder builder = new VariantContextBuilder(source.getSource(),
+ target.getContig(),
+ target.getStart() - (addToStart ? 1 : 0),
+ target.getEnd() - (addToStart ? 1 : 0),
+ alleles);
+
+ builder.id(source.getID());
+ builder.attributes(source.getAttributes());
+
+ builder.genotypes(fixGenotypes(source.getGenotypes(), reverseComplementAlleleMap));
+ builder.filters(source.getFilters());
+ builder.log10PError(source.getLog10PError());
+
+ return leftAlignVariant(builder.make(), referenceSequence);
+ }
+
+ protected static GenotypesContext fixGenotypes(final GenotypesContext originals, final Map alleleMap) {
// optimization: if nothing needs to be fixed then don't bother
- if ( reverseComplementAlleleMap.isEmpty() ) {
+ if (alleleMap.isEmpty()) {
return originals;
}
final GenotypesContext fixedGenotypes = GenotypesContext.create(originals.size());
- for ( final Genotype genotype : originals ) {
- final List fixedAlleles = new ArrayList();
- for ( final Allele allele : genotype.getAlleles() ) {
- final Allele fixedAllele = reverseComplementAlleleMap.containsKey(allele) ? reverseComplementAlleleMap.get(allele) : allele;
+ for (final Genotype genotype : originals) {
+ final List fixedAlleles = new ArrayList<>();
+ for (final Allele allele : genotype.getAlleles()) {
+ final Allele fixedAllele = alleleMap.getOrDefault(allele, allele);
fixedAlleles.add(fixedAllele);
}
fixedGenotypes.add(new GenotypeBuilder(genotype).alleles(fixedAlleles).make());
}
return fixedGenotypes;
}
-}
+
+ /**
+ * Normalizes and left aligns a {@link VariantContext}.
+ *
+ * Based on Adrian Tan, Gonçalo R. Abecasis and Hyun Min Kang. (2015)
+ * Unified Representation of Genetic Variants. Bioinformatics.
+ *
+ * @param vc the {@link VariantContext} to be normalized
+ * @param referenceSequence the {@link ReferenceSequence} of the same contig as vc
+ * @return a new {@link VariantContext} which represents the same variation as vc but has
+ * been normalized and left-aligned
+ */
+ protected static VariantContext leftAlignVariant(final VariantContext vc, final ReferenceSequence referenceSequence) {
+
+ boolean changesInAlleles = true;
+
+ int start = vc.getStart();
+ int end = vc.getEnd();
+
+ if (!vc.getContig().equals(referenceSequence.getName())) {
+ throw new IllegalArgumentException("vc contig doesn't match that of supplied reference: " + vc.getContig() + " != " + referenceSequence.getName());
+ }
+
+ final Map alleleBasesMap = new HashMap<>();
+ vc.getAlleles().forEach(a -> alleleBasesMap.put(a, a.getBases()));
+
+ // 1. while changes in alleles do
+ while (changesInAlleles) {
+
+ changesInAlleles = false;
+ // 2. if alleles end with the same nucleotide then
+ if (alleleBasesMap.values().stream()
+ .collect(Collectors.groupingBy(a -> a[a.length - 1], Collectors.toSet()))
+ .size() == 1 && end > 1) {
+ // 3. truncate rightmost nucleotide of each allele
+ for (final Allele allele : alleleBasesMap.keySet()) {
+ alleleBasesMap.put(allele, truncateBase(alleleBasesMap.get(allele), true));
+ }
+ changesInAlleles = true;
+ end--;
+ // 4. end if
+ }
+
+ // 5. if there exists an empty allele then
+ if (alleleBasesMap.values().stream()
+ .map(a -> a.length)
+ .anyMatch(l -> l == 0)) {
+ // 6. extend alleles 1 nucleotide to the left
+ for (final Allele allele : alleleBasesMap.keySet()) {
+ // the first -1 for zero-base (getBases) versus 1-based (variant position)
+ // another -1 to get the base prior to the location of the start of the allele
+ final byte extraBase = (start > 1) ?
+ referenceSequence.getBases()[start - 2] :
+ referenceSequence.getBases()[end];
+
+ alleleBasesMap.put(allele, extendOneBase(alleleBasesMap.get(allele), extraBase));
+ }
+ changesInAlleles = true;
+ start--;
+
+ // 7. end if
+ }
+ }
+
+ // 8. while leftmost nucleotide of each allele are the same and all alleles have length 2 or more do
+ while (alleleBasesMap.values().stream()
+ .allMatch(a -> a.length >= 2) &&
+
+ alleleBasesMap.values().stream()
+ .collect(Collectors.groupingBy(a -> a[0], Collectors.toSet()))
+ .size() == 1
+ ) {
+
+ //9. truncate the leftmost base of the alleles
+ for (final Allele allele : alleleBasesMap.keySet()) {
+ alleleBasesMap.put(allele, truncateBase(alleleBasesMap.get(allele), false));
+ }
+ start++;
+ }
+
+ final VariantContextBuilder builder = new VariantContextBuilder(vc);
+ builder.start(start);
+ builder.stop(end);
+
+ final Map fixedAlleleMap = alleleBasesMap.entrySet().stream()
+ .collect(Collectors.toMap(Map.Entry::getKey, me -> Allele.create(me.getValue(), me.getKey().isReference())));
+ builder.alleles(fixedAlleleMap.values());
+ builder.genotypes(fixGenotypes(vc.getGenotypes(), fixedAlleleMap));
+
+ return builder.make();
+ }
+
+ private static byte[] truncateBase(final byte[] allele, final boolean truncateRightmost) {
+ return Arrays.copyOfRange(allele, truncateRightmost ? 0 : 1, truncateRightmost ?
+ allele.length - 1 :
+ allele.length);
+ }
+
+ //creates a new byte array with the base added at the begining
+ private static byte[] extendOneBase(final byte[] bases, final byte base) {
+
+ final byte[] newBases = new byte[bases.length + 1];
+
+ System.arraycopy(bases, 0, newBases, 1, bases.length);
+ newBases[0] = base;
+
+ return newBases;
+ }
+}
\ No newline at end of file
diff --git a/src/test/java/picard/vcf/LiftoverVcfTest.java b/src/test/java/picard/vcf/LiftoverVcfTest.java
index 758967e4d..acabe826a 100644
--- a/src/test/java/picard/vcf/LiftoverVcfTest.java
+++ b/src/test/java/picard/vcf/LiftoverVcfTest.java
@@ -1,5 +1,8 @@
package picard.vcf;
+import htsjdk.samtools.liftover.LiftOver;
+import htsjdk.samtools.reference.ReferenceSequence;
+import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFFileReader;
@@ -12,9 +15,11 @@
import java.io.File;
import java.util.*;
+import static picard.vcf.LiftoverVcf.leftAlignVariant;
+
/**
* Test class for LiftoverVcf.
- *
+ *
* Created by ebanks on 8/11/15.
*/
public class LiftoverVcfTest extends CommandLineProgramTest {
@@ -34,11 +39,21 @@ public void teardown() {
IOUtil.deleteDirectoryTree(OUTPUT_DATA_PATH);
}
- @Test
- public void testDoNotFixReverseComplementedIndels() {
+ @DataProvider(name = "liftoverReverseStrand")
+ public Object[][] liftoverReverseStrand() {
+ return new Object[][]{
+ {"testLiftoverBiallelicIndels.vcf", 3, 0},
+ {"testLiftoverMultiallelicIndels.vcf", 0, 2},
+ {"testLiftoverFailingVariants.vcf", 0, 3},
+ };
+ }
+
+
+ @Test(dataProvider = "liftoverReverseStrand" )
+ public void testReverseComplementedIndels(final String filename, final int expectedPassing, final int expectedFailing) {
final File liftOutputFile = new File(OUTPUT_DATA_PATH, "lift-delete-me.vcf");
final File rejectOutputFile = new File(OUTPUT_DATA_PATH, "reject-delete-me.vcf");
- final File input = new File(TEST_DATA_PATH, "testLiftover.vcf");
+ final File input = new File(TEST_DATA_PATH, filename);
liftOutputFile.deleteOnExit();
rejectOutputFile.deleteOnExit();
@@ -54,45 +69,10 @@ public void testDoNotFixReverseComplementedIndels() {
Assert.assertEquals(runPicardCommandLine(args), 0);
final VCFFileReader liftReader = new VCFFileReader(liftOutputFile, false);
- for (final VariantContext inputContext : liftReader) {
- Assert.fail("there should be no passing indels in the liftover");
- }
- final VCFFileReader rejectReader = new VCFFileReader(rejectOutputFile, false);
- int counter = 0;
- for (final VariantContext inputContext : rejectReader) {
- counter++;
- }
- Assert.assertEquals(counter, 2, "the wrong number of rejected indels failed the liftover");
- }
+ Assert.assertEquals(liftReader.iterator().stream().count(), expectedPassing, "The wrong number of variants was lifted over");
- @Test
- public void testFixReverseComplementedGenotypes() {
-
- final Allele refA = Allele.create("A", true);
- final Allele altC = Allele.create("C", false);
- final GenotypesContext originalGenotypes = GenotypesContext.create(3);
- originalGenotypes.add(new GenotypeBuilder("homref").alleles(Arrays.asList(refA, refA)).make());
- originalGenotypes.add(new GenotypeBuilder("het").alleles(Arrays.asList(refA, altC)).make());
- originalGenotypes.add(new GenotypeBuilder("homvar").alleles(Arrays.asList(altC, altC)).make());
-
- final Allele refT = Allele.create("T", true);
- final Allele altG = Allele.create("G", false);
- final GenotypesContext expectedGenotypes = GenotypesContext.create(3);
- expectedGenotypes.add(new GenotypeBuilder("homref").alleles(Arrays.asList(refT, refT)).make());
- expectedGenotypes.add(new GenotypeBuilder("het").alleles(Arrays.asList(refT, altG)).make());
- expectedGenotypes.add(new GenotypeBuilder("homvar").alleles(Arrays.asList(altG, altG)).make());
-
- final Map reverseComplementAlleleMap = new HashMap(2);
- reverseComplementAlleleMap.put(refA, refT);
- reverseComplementAlleleMap.put(altC, altG);
- final GenotypesContext actualGenotypes = LiftoverVcf.fixGenotypes(originalGenotypes, reverseComplementAlleleMap);
-
- for ( final String sample : Arrays.asList("homref", "het", "homvar") ) {
- final List expected = expectedGenotypes.get(sample).getAlleles();
- final List actual = actualGenotypes.get(sample).getAlleles();
- Assert.assertEquals(expected.get(0), actual.get(0));
- Assert.assertEquals(expected.get(1), actual.get(1));
- }
+ final VCFFileReader rejectReader = new VCFFileReader(rejectOutputFile, false);
+ Assert.assertEquals(rejectReader.iterator().stream().count(), expectedFailing, "The wrong number of variants was lifted over");
}
@DataProvider(name = "dataTestMissingContigInReference")
@@ -104,7 +84,7 @@ public void testFixReverseComplementedGenotypes() {
}
@Test(dataProvider = "dataTestMissingContigInReference")
- public void testMissingContigInReference(boolean warnOnMissingContext, int expectedReturnCode) {
+ public void testMissingContigInReference(final boolean warnOnMissingContext, final int expectedReturnCode) {
final File liftOutputFile = new File(OUTPUT_DATA_PATH, "lift-delete-me.vcf");
final File rejectOutputFile = new File(OUTPUT_DATA_PATH, "reject-delete-me.vcf");
final File input = new File(TEST_DATA_PATH, "testLiftoverUsingMissingContig.vcf");
@@ -134,10 +114,10 @@ public void testMissingContigInReference(boolean warnOnMissingContext, int expec
}
@Test(dataProvider = "dataTestWriteOriginalPosition")
- public void testWriteOriginalPosition(boolean shouldWriteOriginalPosition) {
+ public void testWriteOriginalPosition(final boolean shouldWriteOriginalPosition) {
final File liftOutputFile = new File(OUTPUT_DATA_PATH, "lift-delete-me.vcf");
final File rejectOutputFile = new File(OUTPUT_DATA_PATH, "reject-delete-me.vcf");
- final File input = new File(TEST_DATA_PATH, "testLiftover.vcf");
+ final File input = new File(TEST_DATA_PATH, "testLiftoverBiallelicIndels.vcf");
liftOutputFile.deleteOnExit();
rejectOutputFile.deleteOnExit();
@@ -154,17 +134,398 @@ public void testWriteOriginalPosition(boolean shouldWriteOriginalPosition) {
runPicardCommandLine(args);
- try (VCFFileReader liftReader = new VCFFileReader(liftOutputFile, false)) {
- for (VariantContext vc : liftReader) {
+ try (final VCFFileReader liftReader = new VCFFileReader(liftOutputFile, false)) {
+ for (final VariantContext vc : liftReader) {
if (shouldWriteOriginalPosition) {
Assert.assertNotNull(vc.getAttribute(LiftoverVcf.ORIGINAL_CONTIG));
Assert.assertNotNull(vc.getAttribute(LiftoverVcf.ORIGINAL_START));
- }
- else {
+ } else {
Assert.assertFalse(vc.hasAttribute(LiftoverVcf.ORIGINAL_CONTIG));
Assert.assertFalse(vc.hasAttribute(LiftoverVcf.ORIGINAL_START));
}
}
}
}
+
+ @DataProvider(name = "indelFlipData")
+ public Iterator