diff --git a/src/main/java/picard/vcf/GenotypeConcordance.java b/src/main/java/picard/vcf/GenotypeConcordance.java index 250e072cf..cce3ced93 100644 --- a/src/main/java/picard/vcf/GenotypeConcordance.java +++ b/src/main/java/picard/vcf/GenotypeConcordance.java @@ -416,6 +416,11 @@ private void writeVcfTuple(final VcfTuple tuple, final VariantContextWriter writ callContext = tuple.rightVariantContext.get(); } + //Don't write symbolic alleles to output VCF + if (truthContext != null && truthContext.isSymbolic() || callContext != null && callContext.isSymbolic()) { + return; + } + // Get the alleles for each genotype. No alleles will be extracted for a genotype if the genotype is // mixed, filtered, or missing. final Alleles alleles = normalizeAlleles(truthContext, TRUTH_SAMPLE, callContext, CALL_SAMPLE); @@ -432,14 +437,10 @@ private void writeVcfTuple(final VcfTuple tuple, final VariantContextWriter writ final List truthAlleles = alleles.truthAlleles(); final List callAlleles = alleles.callAlleles(); - // Get the alleles present at this site for both samples to use for the output variant context. + // Get the alleles present at this site for both samples to use for the output variant context, but remove no calls. final Set siteAlleles = new HashSet<>(); - if (truthContext != null) { - siteAlleles.addAll(truthContext.getAlleles()); - } - if (callContext != null) { - siteAlleles.addAll(callContext.getAlleles()); - } + siteAlleles.addAll(allAlleles); + siteAlleles.remove(Allele.NO_CALL); // Initialize the variant context builder final VariantContext initialContext = (callContext == null) ? truthContext : callContext; @@ -739,8 +740,8 @@ final protected static Alleles normalizeAlleles(final VariantContext truthContex // Truth reference is shorter than call reference final String suffix = getStringSuffix(callRef, truthRef, "Ref alleles mismatch between: " + truthContext + " and " + callContext); final int insertIdx = truthRef.length(); - truthAllele1 = spliceOrAppendString(truthAllele1, suffix, insertIdx); - truthAllele2 = spliceOrAppendString(truthAllele2, suffix, insertIdx); + truthAllele1 = truthAllele1.equals(Allele.NO_CALL_STRING) ? truthAllele1 : spliceOrAppendString(truthAllele1, suffix, insertIdx); + truthAllele2 = truthAllele2.equals(Allele.NO_CALL_STRING) ? truthAllele2 : spliceOrAppendString(truthAllele2, suffix, insertIdx); truthRef = truthRef + suffix; } @@ -748,8 +749,8 @@ else if (truthRef.length() > callRef.length()) { // call reference is shorter than truth: final String suffix = getStringSuffix(truthRef, callRef, "Ref alleles mismatch between: " + truthContext + " and " + callContext); final int insertIdx = callRef.length(); - callAllele1 = spliceOrAppendString(callAllele1, suffix, insertIdx); - callAllele2 = spliceOrAppendString(callAllele2, suffix, insertIdx); + callAllele1 = callAllele1.equals(Allele.NO_CALL_STRING) ? callAllele1 : spliceOrAppendString(callAllele1, suffix, insertIdx); + callAllele2 = callAllele2.equals(Allele.NO_CALL_STRING) ? callAllele2 : spliceOrAppendString(callAllele2, suffix, insertIdx); callRef = callRef + suffix; } else { diff --git a/src/test/java/picard/vcf/GenotypeConcordanceTest.java b/src/test/java/picard/vcf/GenotypeConcordanceTest.java index 381377910..155bf126f 100644 --- a/src/test/java/picard/vcf/GenotypeConcordanceTest.java +++ b/src/test/java/picard/vcf/GenotypeConcordanceTest.java @@ -25,6 +25,7 @@ package picard.vcf; import htsjdk.samtools.metrics.MetricsFile; +import htsjdk.samtools.metrics.StringHeader; import htsjdk.samtools.util.BufferedLineReader; import htsjdk.samtools.util.IOUtil; import htsjdk.variant.variantcontext.Allele; @@ -100,6 +101,8 @@ private static final String NORMALIZE_ALLELES_TRUTH = "normalize_alleles_truth.vcf"; private static final String NORMALIZE_ALLELES_CALL = "normalize_alleles_call.vcf"; + private static final String NORMALIZE_NO_CALLS_TRUTH = "normalize_no_calls_truth.vcf"; + private static final String NORMALIZE_NO_CALLS_CALL = "normalize_no_calls_call.vcf"; @AfterClass public void tearDown() { @@ -572,4 +575,36 @@ public void testNoCallVariants() { Assert.assertEquals(genotypeConcordance.instanceMain(new String[0]), 0); } + + @Test + public void testNormalizeAllelesForWritingVCF() throws FileNotFoundException { + final File truthVcfPath = new File(TEST_DATA_PATH.getAbsolutePath(), NORMALIZE_NO_CALLS_TRUTH); + final File callVcfPath = new File(TEST_DATA_PATH.getAbsolutePath(), NORMALIZE_NO_CALLS_CALL); + final File outputBaseFileName = new File(OUTPUT_DATA_PATH, "MultipleRefAlleles"); + final File outputContingencyMetrics = new File(outputBaseFileName.getAbsolutePath() + GenotypeConcordance.CONTINGENCY_METRICS_FILE_EXTENSION); + outputContingencyMetrics.deleteOnExit(); + + final GenotypeConcordance genotypeConcordance = new GenotypeConcordance(); + genotypeConcordance.TRUTH_VCF = truthVcfPath; + genotypeConcordance.TRUTH_SAMPLE = "truth"; + genotypeConcordance.CALL_VCF = callVcfPath; + genotypeConcordance.CALL_SAMPLE = "truth"; + genotypeConcordance.OUTPUT = new File(OUTPUT_DATA_PATH, "MultipleRefAlleles"); + genotypeConcordance.OUTPUT_VCF = true; + + Assert.assertEquals(genotypeConcordance.instanceMain(new String[0]), 0); + + final MetricsFile> output = new MetricsFile>(); + output.read(new FileReader(outputContingencyMetrics)); + + for (final GenotypeConcordanceContingencyMetrics metrics : output.getMetrics()) { + if(metrics.VARIANT_TYPE == VariantContext.Type.INDEL){ + Assert.assertEquals(metrics.TP_COUNT, 3); + Assert.assertEquals(metrics.TN_COUNT, 3); + Assert.assertEquals(metrics.FP_COUNT, 0); + Assert.assertEquals(metrics.FN_COUNT, 0); + Assert.assertEquals(metrics.EMPTY_COUNT, 2); + } + } + } } diff --git a/testdata/picard/vcf/normalize_no_calls_call.vcf b/testdata/picard/vcf/normalize_no_calls_call.vcf new file mode 100644 index 000000000..6ccb81021 --- /dev/null +++ b/testdata/picard/vcf/normalize_no_calls_call.vcf @@ -0,0 +1,43 @@ +##fileformat=VCFv4.1 +##FILTER= +##FORMAT= +##FORMAT= +##INFO= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##fileDate=20130719 +##phasing=none +##reference=file:///seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta +##source=SelectVariants +##variants_justified=left +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT truth +1 1 . TCAAGGG TGGG 22132 PASS set=Intersection GT:GQ 0/1:99 +1 10 . TCAACAA TCAA 22132 PASS set=Intersection GT:GQ 0/1:99 +1 100 . TCAACAA TCAACAAGG 10461 PASS set=Intersection GT:GQ 0/1:99 +1 1000 . TCAACAA T 10461 PASS set=Intersection GT:GQ ./. +1 1100 . TCAACAA T 10461 PASS set=Intersection GT:GQ 0/1:99 +1 1200 . TCAACAA T 10461 PASS set=Intersection GT:GQ ./. +1 1300 . TCAACAA 10461 PASS set=Intersection GT:GQ 0/1:99 diff --git a/testdata/picard/vcf/normalize_no_calls_truth.vcf b/testdata/picard/vcf/normalize_no_calls_truth.vcf new file mode 100644 index 000000000..e4e9c0670 --- /dev/null +++ b/testdata/picard/vcf/normalize_no_calls_truth.vcf @@ -0,0 +1,42 @@ +##fileformat=VCFv4.1 +##FILTER= +##FORMAT= +##INFO= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##fileDate=20130719 +##phasing=none +##reference=file:///seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta +##source=SelectVariants +##variants_justified=left +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT truth +1 1 . TCAA T 22132 PASS set=Intersection GT:GQ 0/1:99 +1 10 . TCAA T 22132 PASS set=Intersection GT:GQ 0/1:99 +1 100 . TCAA TCAAGG 10461 PASS set=Intersection GT:GQ 0/1:99 +1 1000 . TCAA T 10461 PASS set=Intersection GT:GQ ./. +1 1100 . TCAA T 10461 PASS set=Intersection GT:GQ ./. +1 1200 . TCAA T 10461 PASS set=Intersection GT:GQ 0/1:99 +1 1300 . TCAA 10461 PASS set=Intersection GT:GQ 0/1:99