From bfeb7f5da7c739108e5728cd5bfc7aea0a5ee18a Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 5 May 2017 09:07:30 -0400 Subject: [PATCH] Added a new tool to merge a collection of variant calling metrics --- .../vcf/AccumulateVariantCallingMetrics.java | 134 ++++++++++++++++ .../picard/vcf/CollectVariantCallingMetrics.java | 75 ++++++--- .../vcf/AccumulateVariantCallingMetricsTest.java | 170 +++++++++++++++++++++ ...eTest.emptyShard.variant_calling_detail_metrics | 9 ++ ...Test.emptyShard.variant_calling_summary_metrics | 9 ++ ...mergeTest.shard1.variant_calling_detail_metrics | 9 ++ ...ergeTest.shard1.variant_calling_summary_metrics | 9 ++ ...mergeTest.shard2.variant_calling_detail_metrics | 9 ++ ...ergeTest.shard2.variant_calling_summary_metrics | 9 ++ 9 files changed, 414 insertions(+), 19 deletions(-) create mode 100644 src/main/java/picard/vcf/AccumulateVariantCallingMetrics.java create mode 100644 src/test/java/picard/vcf/AccumulateVariantCallingMetricsTest.java create mode 100755 testdata/picard/vcf/mergeTest.emptyShard.variant_calling_detail_metrics create mode 100755 testdata/picard/vcf/mergeTest.emptyShard.variant_calling_summary_metrics create mode 100755 testdata/picard/vcf/mergeTest.shard1.variant_calling_detail_metrics create mode 100755 testdata/picard/vcf/mergeTest.shard1.variant_calling_summary_metrics create mode 100755 testdata/picard/vcf/mergeTest.shard2.variant_calling_detail_metrics create mode 100755 testdata/picard/vcf/mergeTest.shard2.variant_calling_summary_metrics diff --git a/src/main/java/picard/vcf/AccumulateVariantCallingMetrics.java b/src/main/java/picard/vcf/AccumulateVariantCallingMetrics.java new file mode 100644 index 000000000..a0d22420b --- /dev/null +++ b/src/main/java/picard/vcf/AccumulateVariantCallingMetrics.java @@ -0,0 +1,134 @@ +/* + * The MIT License + * + * Copyright (c) 2017 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package picard.vcf; + +import htsjdk.samtools.metrics.MetricsFile; +import htsjdk.samtools.util.*; +import picard.PicardException; +import picard.cmdline.CommandLineProgram; +import picard.cmdline.CommandLineProgramProperties; +import picard.cmdline.Option; +import picard.cmdline.StandardOptionDefinitions; +import picard.cmdline.programgroups.Metrics; + +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.*; + +/** + * Combines multiple Variant Calling Metrics files into a single file. + * @author Eric Banks + */ +@CommandLineProgramProperties( + usage = "Combines multiple Variant Calling Metrics files into a single file. This tool is used in cases where the metrics are calculated" + + " separately for different (genomic) shards of the same callset and we want to combine them into a single result over the entire callset." + + " The shards are expected to contain the same samples (although it will not fail if they do not) and to not have been run over overlapping genomic positions.", + usageShort = "Combines multiple Variant Calling Metrics files into a single file", + programGroup = Metrics.class +) +public class AccumulateVariantCallingMetrics extends CommandLineProgram { + + @Option(shortName= StandardOptionDefinitions.INPUT_SHORT_NAME, doc="Paths (except for the file extensions) of Variant Calling Metrics files to read and merge.", minElements=1) + public List INPUT; + + @Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "Path (except for the file extension) of output metrics files to write.") + public File OUTPUT; + + @Override + protected int doWork() { + + final String outputPrefix = OUTPUT.getAbsolutePath() + "."; + final File detailOutputFile = new File(outputPrefix + CollectVariantCallingMetrics.VariantCallingDetailMetrics.getFileExtension()); + final File summaryOutputFile = new File(outputPrefix + CollectVariantCallingMetrics.VariantCallingSummaryMetrics.getFileExtension()); + IOUtil.assertFileIsWritable(detailOutputFile); + IOUtil.assertFileIsWritable(summaryOutputFile); + + // set up the collectors + final Map> sampleDetailsMap = new HashMap<>(); + final Collection summaries = new ArrayList<>(); + + for (final File file : INPUT) { + final String inputPrefix = file.getAbsolutePath() + "."; + + try { + // read in the detailed metrics file + final File detail = new File(inputPrefix + CollectVariantCallingMetrics.VariantCallingDetailMetrics.getFileExtension()); + IOUtil.assertFileIsReadable(detail); + MetricsFile detailedMetricsFile = getMetricsFile(); + detailedMetricsFile.read(new FileReader(detail)); + + // for each sample in the detailed metrics... + long totalHetDepth = 0L; + for (final CollectVariantCallingMetrics.VariantCallingDetailMetrics detailedMetrics : detailedMetricsFile.getMetrics()) { + // re-calculate internal fields from derived fields + detailedMetrics.calculateFromDerivedFields(); + totalHetDepth += detailedMetrics.TOTAL_HET_DEPTH; + + // add it to the list of metrics for that sample so that we can merge them later + sampleDetailsMap.computeIfAbsent(detailedMetrics.SAMPLE_ALIAS, f -> new ArrayList<>()).add(detailedMetrics); + } + + // next, read in the summary metrics + final File summary = new File(inputPrefix + CollectVariantCallingMetrics.VariantCallingSummaryMetrics.getFileExtension()); + IOUtil.assertFileIsReadable(summary); + MetricsFile summaryMetricsFile = getMetricsFile(); + summaryMetricsFile.read(new FileReader(summary)); + if (summaryMetricsFile.getMetrics().size() != 1) { + throw new PicardException(String.format("Expected 1 row in the summary metrics file but saw %d", summaryMetricsFile.getMetrics().size())); + } + + // re-calculate internal fields from derived fields and add it to the list of summary metrics + final CollectVariantCallingMetrics.VariantCallingSummaryMetrics summaryMetrics = summaryMetricsFile.getMetrics().get(0); + summaryMetrics.calculateFromDerivedFields(totalHetDepth); + summaries.add(summaryMetrics); + } catch (IOException e) { + throw new PicardException(String.format("Cannot read from metrics files with prefix %s", inputPrefix)); + } + } + + // now merge all of the accumulated metrics + final Collection collapsedDetails = new ArrayList<>(); + sampleDetailsMap.values().forEach(sampleDetails -> { + final CollectVariantCallingMetrics.VariantCallingDetailMetrics collapsed = new CollectVariantCallingMetrics.VariantCallingDetailMetrics(); + CollectVariantCallingMetrics.VariantCallingDetailMetrics.foldInto(collapsed, sampleDetails); + collapsed.calculateDerivedFields(); + collapsedDetails.add(collapsed); + }); + final CollectVariantCallingMetrics.VariantCallingSummaryMetrics collapsedSummary = new CollectVariantCallingMetrics.VariantCallingSummaryMetrics(); + CollectVariantCallingMetrics.VariantCallingSummaryMetrics.foldInto(collapsedSummary, summaries); + collapsedSummary.calculateDerivedFields(); + + // prepare and write the finalized merged metrics + final MetricsFile detail = getMetricsFile(); + final MetricsFile summary = getMetricsFile(); + summary.addMetric(collapsedSummary); + collapsedDetails.forEach(detail::addMetric); + + detail.write(detailOutputFile); + summary.write(summaryOutputFile); + + return 0; + } +} diff --git a/src/main/java/picard/vcf/CollectVariantCallingMetrics.java b/src/main/java/picard/vcf/CollectVariantCallingMetrics.java index 6d40ede38..e52d76f4b 100644 --- a/src/main/java/picard/vcf/CollectVariantCallingMetrics.java +++ b/src/main/java/picard/vcf/CollectVariantCallingMetrics.java @@ -136,75 +136,75 @@ protected int doWork() { /** A collection of metrics relating to snps and indels within a variant-calling file (VCF). */ public static class VariantCallingSummaryMetrics extends MergeableMetricBase { - /** The number of high confidence SNPs calls (i.e. non-reference genotypes) that were examined */ + /** The number of passing bi-allelic SNPs calls (i.e. non-reference genotypes) that were examined */ @MergeByAdding public long TOTAL_SNPS; - /** The number of high confidence SNPs found in dbSNP */ + /** The number of passing bi-allelic SNPs found in dbSNP */ @MergeByAdding public long NUM_IN_DB_SNP; - /** The number of high confidence SNPS called that were not found in dbSNP */ + /** The number of passing bi-allelic SNPS called that were not found in dbSNP */ @MergeByAdding public long NOVEL_SNPS; - /** The number of SNPs that are also filtered */ + /** The number of SNPs that are filtered */ @MergeByAdding public long FILTERED_SNPS; - /** The fraction of high confidence SNPs in dbSNP */ + /** The fraction of passing bi-allelic SNPs in dbSNP */ @NoMergingIsDerived public float PCT_DBSNP; - /** The Transition/Transversion ratio of the SNP calls made at dbSNP sites */ + /** The Transition/Transversion ratio of the passing bi-allelic SNP calls made at dbSNP sites */ @NoMergingIsDerived public double DBSNP_TITV; - /** The Transition/Transversion ratio of the SNP calls made at non-dbSNP sites */ + /** The Transition/Transversion ratio of the passing bi-allelic SNP calls made at non-dbSNP sites */ @NoMergingIsDerived public double NOVEL_TITV; - /** The number of high confidence Indel calls that were examined */ + /** The number of passing indel calls that were examined */ @MergeByAdding public long TOTAL_INDELS; - /** The number of high confidence Indels called that were not found in dbSNP */ + /** The number of passing indels called that were not found in dbSNP */ @MergeByAdding public long NOVEL_INDELS; - /** The number of indels that are also filtered */ + /** The number of indels that are filtered */ @MergeByAdding public long FILTERED_INDELS; - /** The fraction of high confidence Indels in dbSNP */ + /** The fraction of passing indels in dbSNP */ @NoMergingIsDerived public float PCT_DBSNP_INDELS; - /** The number of high confidence Indels found in dbSNP */ + /** The number of passing indels found in dbSNP */ @MergeByAdding public long NUM_IN_DB_SNP_INDELS; - /** The Insertion/Deletion ratio of the Indel calls made at dbSNP sites */ + /** The Insertion/Deletion ratio of the indel calls made at dbSNP sites */ @NoMergingIsDerived public double DBSNP_INS_DEL_RATIO; - /** The Insertion/Deletion ratio of the Indel calls made at non-dbSNP sites */ + /** The Insertion/Deletion ratio of the indel calls made at non-dbSNP sites */ @NoMergingIsDerived public double NOVEL_INS_DEL_RATIO; - /** The number of high confidence multiallelic SNP calls that were examined */ + /** The number of passing multi-allelic SNP calls that were examined */ @MergeByAdding public double TOTAL_MULTIALLELIC_SNPS; - /** The number of high confidence multiallelic SNPs found in dbSNP */ + /** The number of passing multi-allelic SNPs found in dbSNP */ @MergeByAdding public double NUM_IN_DB_SNP_MULTIALLELIC; - /** The number of high confidence complex Indel calls that were examined */ + /** The number of passing complex indel calls that were examined */ @MergeByAdding public double TOTAL_COMPLEX_INDELS; - /** The number of high confidence complex Indels found in dbSNP */ + /** The number of passing complex indels found in dbSNP */ @MergeByAdding public double NUM_IN_DB_SNP_COMPLEX_INDELS; @@ -249,11 +249,35 @@ public void calculateDerivedFields() { this.NOVEL_INS_DEL_RATIO = this.novelInsertions / (double) this.novelDeletions; } + public void calculateFromDerivedFields(final long totalHetDepth) { + dbSnpTransversions = invertFromRatio(NUM_IN_DB_SNP, DBSNP_TITV); + dbSnpTransitions = NUM_IN_DB_SNP - dbSnpTransversions; + novelTransversions = invertFromRatio(NOVEL_SNPS, NOVEL_TITV); + novelTransitions = NOVEL_SNPS - novelTransversions; + dbSnpDeletions = invertFromRatio(NUM_IN_DB_SNP_INDELS, DBSNP_INS_DEL_RATIO); + dbSnpInsertions = NUM_IN_DB_SNP_INDELS - dbSnpDeletions; + novelDeletions = invertFromRatio(NOVEL_INDELS, NOVEL_INS_DEL_RATIO); + novelInsertions = NOVEL_INDELS - novelDeletions; + refAlleleObs = Double.isNaN(SNP_REFERENCE_BIAS) ? 0L : Math.round(totalHetDepth * SNP_REFERENCE_BIAS); + altAlleleObs = totalHetDepth - refAlleleObs; + } + public static void foldInto(final T target, final Collection metrics) { metrics.forEach(target::merge); } } + /** + * Given the ratio (X/Y) and the sum (X+Y), returns Y. + * + * @param sum X+Y + * @param ratio X/Y + * @return Y as a long + */ + private static long invertFromRatio(final long sum, final Double ratio) { + return ratio.isNaN() ? 0L : Math.round(sum / (ratio + 1.0)); + } + /** A collection of metrics relating to snps and indels within a variant-calling file (VCF) for a given sample. */ public static class VariantCallingDetailMetrics extends CollectVariantCallingMetrics.VariantCallingSummaryMetrics { /** The name of the sample being assayed */ @@ -279,6 +303,12 @@ public void calculateDerivedFields() { public long TOTAL_GQ0_VARIANTS; /** + * total number of reads (from AD field) for passing bi-allelic SNP hets for this sample + */ + @NoMergingIsDerived + public long TOTAL_HET_DEPTH; + + /** * Hidden fields not propagated to the metrics file. */ @MergeByAdding @@ -293,8 +323,15 @@ public void calculateDerivedFields() { super.calculateDerivedFields(); // Divide by zero should be OK -- NaN should get propagated to metrics file. HET_HOMVAR_RATIO = numHets / (double) numHomVar; - PCT_GQ0_VARIANTS = TOTAL_GQ0_VARIANTS / (double) (numHets + numHomVar); + TOTAL_HET_DEPTH = refAlleleObs + altAlleleObs; + } + + public void calculateFromDerivedFields() { + numHomVar = invertFromRatio(TOTAL_SNPS, HET_HOMVAR_RATIO); + numHets = TOTAL_SNPS - numHomVar; + + calculateFromDerivedFields(TOTAL_HET_DEPTH); } } } diff --git a/src/test/java/picard/vcf/AccumulateVariantCallingMetricsTest.java b/src/test/java/picard/vcf/AccumulateVariantCallingMetricsTest.java new file mode 100644 index 000000000..ebbb11c4c --- /dev/null +++ b/src/test/java/picard/vcf/AccumulateVariantCallingMetricsTest.java @@ -0,0 +1,170 @@ +package picard.vcf; + +/* + * The MIT License + * + * Copyright (c) 2017 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +import htsjdk.samtools.metrics.MetricsFile; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +/** + * Test for AccumulateVariantCallingMetrics + * + * @author Eric Banks + */ +public class AccumulateVariantCallingMetricsTest { + private static final File TEST_DATA_DIR = new File("testdata/picard/vcf"); + + @DataProvider(name = "shardDataProvider") + public Object[][] shardDataProvider() { + final File filePrefix1 = new File(TEST_DATA_DIR, "mergeTest.shard1"); + final File filePrefix2 = new File(TEST_DATA_DIR, "mergeTest.shard2"); + final File filePrefix3 = new File(TEST_DATA_DIR, "mergeTest.emptyShard"); + + return new Object[][] { + {Arrays.asList(filePrefix1, filePrefix2)}, + {Arrays.asList(filePrefix1, filePrefix2, filePrefix3)}, + }; + } + + + @Test(dataProvider = "shardDataProvider") + public void testMerge(final List inputs) throws IOException { + final File mergedFilePrefix = new File(TEST_DATA_DIR + "mergeTest"); + final File mergedSummaryFile = new File(mergedFilePrefix.getAbsolutePath() + ".variant_calling_summary_metrics"); + final File mergedDetailFile = new File(mergedFilePrefix.getAbsolutePath() + ".variant_calling_detail_metrics"); + mergedSummaryFile.deleteOnExit(); + mergedDetailFile.deleteOnExit(); + + final AccumulateVariantCallingMetrics program = new AccumulateVariantCallingMetrics(); + program.INPUT = inputs; + program.OUTPUT = mergedFilePrefix; + + Assert.assertEquals(program.doWork(), 0); + + final MetricsFile> detail = new MetricsFile<>(); + detail.read(new FileReader(mergedDetailFile)); + + final MetricsFile> summary = new MetricsFile<>(); + summary.read(new FileReader(mergedSummaryFile)); + + checkResults(detail, summary); + } + + private void checkResults(final MetricsFile> detail, + final MetricsFile> summary) { + + int parsedDetail = 0; + for (final CollectVariantCallingMetrics.VariantCallingDetailMetrics metrics : detail.getMetrics()) { + if (metrics.SAMPLE_ALIAS.equals("FOO1")) { + Assert.assertEquals(metrics.HET_HOMVAR_RATIO, 2.0); + Assert.assertEquals(metrics.TOTAL_HET_DEPTH, 30); + + Assert.assertEquals(metrics.TOTAL_SNPS, 15); + Assert.assertEquals(metrics.NUM_IN_DB_SNP, 10); + Assert.assertEquals(metrics.NOVEL_SNPS, 5); + Assert.assertEquals(metrics.FILTERED_SNPS, 7); + + Assert.assertEquals(metrics.PCT_DBSNP, 0.666667, 0.01); + Assert.assertEquals(metrics.DBSNP_TITV, 2.333333, 0.01); + Assert.assertEquals(metrics.NOVEL_TITV, 1.5, 0.01); + + Assert.assertEquals(metrics.TOTAL_INDELS, 9); + Assert.assertEquals(metrics.NOVEL_INDELS, 3); + Assert.assertEquals(metrics.FILTERED_INDELS, 12); + Assert.assertEquals(metrics.NUM_IN_DB_SNP_INDELS, 6); + + Assert.assertEquals(metrics.PCT_DBSNP_INDELS, 0.666667, 0.01); + Assert.assertEquals(metrics.DBSNP_INS_DEL_RATIO, 1.0, 0.01); + Assert.assertEquals(metrics.NOVEL_INS_DEL_RATIO, 0.0, 0.01); + + Assert.assertEquals(metrics.SNP_REFERENCE_BIAS, 0.466667, 0.01); + Assert.assertEquals(metrics.NUM_SINGLETONS, 10); + } else if (metrics.SAMPLE_ALIAS.equals("FOO2")) { + Assert.assertEquals(metrics.HET_HOMVAR_RATIO, 1.571429); + Assert.assertEquals(metrics.TOTAL_HET_DEPTH, 33); + + Assert.assertEquals(metrics.TOTAL_SNPS, 18); + Assert.assertEquals(metrics.NUM_IN_DB_SNP, 13); + Assert.assertEquals(metrics.NOVEL_SNPS, 5); + Assert.assertEquals(metrics.FILTERED_SNPS, 5); + + Assert.assertEquals(metrics.PCT_DBSNP, 0.722222, 0.01); + Assert.assertEquals(metrics.DBSNP_TITV, 2.25, 0.01); + Assert.assertEquals(metrics.NOVEL_TITV, 0.666667, 0.01); + + Assert.assertEquals(metrics.TOTAL_INDELS, 6); + Assert.assertEquals(metrics.NOVEL_INDELS, 3); + Assert.assertEquals(metrics.FILTERED_INDELS, 6); + Assert.assertEquals(metrics.NUM_IN_DB_SNP_INDELS, 3); + + Assert.assertEquals(metrics.PCT_DBSNP_INDELS, 0.5, 0.01); + Assert.assertEquals(metrics.DBSNP_INS_DEL_RATIO, 0.5, 0.01); + Assert.assertEquals(metrics.NOVEL_INS_DEL_RATIO, 0.5, 0.01); + + Assert.assertEquals(metrics.SNP_REFERENCE_BIAS, 0.696969, 0.01); + Assert.assertEquals(metrics.NUM_SINGLETONS, 9); + } else { + Assert.assertTrue(false, "Unexpected sample name in detailed metrics: " + metrics.SAMPLE_ALIAS); + } + parsedDetail++; + } + Assert.assertEquals(parsedDetail, 2, "Did not parse enough detail metrics."); + + boolean parsedSummary = false; + for (final CollectVariantCallingMetrics.VariantCallingSummaryMetrics metrics : summary.getMetrics()) { + Assert.assertEquals(metrics.TOTAL_SNPS, 33); + Assert.assertEquals(metrics.NOVEL_SNPS, 10); + Assert.assertEquals(metrics.NUM_IN_DB_SNP, 23); + Assert.assertEquals(metrics.FILTERED_SNPS, 12); + + Assert.assertEquals(metrics.PCT_DBSNP, 0.696969, 0.01); + Assert.assertEquals(metrics.DBSNP_TITV, 2.285714, 0.01); + Assert.assertEquals(metrics.NOVEL_TITV, 1.0, 0.01); + + Assert.assertEquals(metrics.TOTAL_INDELS, 15); + Assert.assertEquals(metrics.NOVEL_INDELS, 6); + Assert.assertEquals(metrics.NUM_IN_DB_SNP_INDELS, 9); + Assert.assertEquals(metrics.FILTERED_INDELS, 18); + + Assert.assertEquals(metrics.PCT_DBSNP_INDELS, 0.6, 0.01); + Assert.assertEquals(metrics.DBSNP_INS_DEL_RATIO, 0.8, 0.01); + Assert.assertEquals(metrics.NOVEL_INS_DEL_RATIO, 0.2, 0.01); + + Assert.assertEquals(metrics.SNP_REFERENCE_BIAS, 0.587302, 0.01); + Assert.assertEquals(metrics.NUM_SINGLETONS, 19); + + parsedSummary = true; + } + + Assert.assertTrue(parsedSummary, "Did not parse summary metrics."); + } +} diff --git a/testdata/picard/vcf/mergeTest.emptyShard.variant_calling_detail_metrics b/testdata/picard/vcf/mergeTest.emptyShard.variant_calling_detail_metrics new file mode 100755 index 000000000..a3d674cb7 --- /dev/null +++ b/testdata/picard/vcf/mergeTest.emptyShard.variant_calling_detail_metrics @@ -0,0 +1,9 @@ +## htsjdk.samtools.metrics.StringHeader +# picard.vcf.CollectVariantCallingMetrics FOO +## htsjdk.samtools.metrics.StringHeader +# Started on: Fri Sep 16 21:26:45 UTC 2016 + +## METRICS CLASS picard.vcf.CollectVariantCallingMetrics$VariantCallingDetailMetrics +SAMPLE_ALIAS HET_HOMVAR_RATIO TOTAL_HET_DEPTH TOTAL_SNPS NUM_IN_DB_SNP NOVEL_SNPS FILTERED_SNPS PCT_DBSNP DBSNP_TITV NOVEL_TITV TOTAL_INDELS NOVEL_INDELS FILTERED_INDELS PCT_DBSNP_INDELS NUM_IN_DB_SNP_INDELS DBSNP_INS_DEL_RATIO NOVEL_INS_DEL_RATIO TOTAL_MULTIALLELIC_SNPS NUM_IN_DB_SNP_MULTIALLELIC TOTAL_COMPLEX_INDELS NUM_IN_DB_SNP_COMPLEX_INDELS SNP_REFERENCE_BIAS NUM_SINGLETONS +FOO1 NaN 0 0 0 0 0 NaN NaN NaN 0 0 0 NaN 0 NaN NaN 0 0 0 0 NaN 0 +FOO2 NaN 0 0 0 0 0 NaN NaN NaN 0 0 0 NaN 0 NaN NaN 0 0 0 0 NaN 0 diff --git a/testdata/picard/vcf/mergeTest.emptyShard.variant_calling_summary_metrics b/testdata/picard/vcf/mergeTest.emptyShard.variant_calling_summary_metrics new file mode 100755 index 000000000..72501d725 --- /dev/null +++ b/testdata/picard/vcf/mergeTest.emptyShard.variant_calling_summary_metrics @@ -0,0 +1,9 @@ +## htsjdk.samtools.metrics.StringHeader +# picard.vcf.CollectVariantCallingMetrics FOO +## htsjdk.samtools.metrics.StringHeader +# Started on: Fri Sep 16 21:26:45 UTC 2016 + +## METRICS CLASS picard.vcf.CollectVariantCallingMetrics$VariantCallingSummaryMetrics +TOTAL_SNPS NUM_IN_DB_SNP NOVEL_SNPS FILTERED_SNPS PCT_DBSNP DBSNP_TITV NOVEL_TITV TOTAL_INDELS NOVEL_INDELS FILTERED_INDELS PCT_DBSNP_INDELS NUM_IN_DB_SNP_INDELS DBSNP_INS_DEL_RATIO NOVEL_INS_DEL_RATIO TOTAL_MULTIALLELIC_SNPS NUM_IN_DB_SNP_MULTIALLELIC TOTAL_COMPLEX_INDELS NUM_IN_DB_SNP_COMPLEX_INDELS SNP_REFERENCE_BIAS NUM_SINGLETONS +0 0 0 0 NaN NaN NaN 0 0 0 NaN 0 NaN NaN 0 0 0 0 NaN 0 + diff --git a/testdata/picard/vcf/mergeTest.shard1.variant_calling_detail_metrics b/testdata/picard/vcf/mergeTest.shard1.variant_calling_detail_metrics new file mode 100755 index 000000000..de392bde7 --- /dev/null +++ b/testdata/picard/vcf/mergeTest.shard1.variant_calling_detail_metrics @@ -0,0 +1,9 @@ +## htsjdk.samtools.metrics.StringHeader +# picard.vcf.CollectVariantCallingMetrics FOO +## htsjdk.samtools.metrics.StringHeader +# Started on: Fri Sep 16 21:26:45 UTC 2016 + +## METRICS CLASS picard.vcf.CollectVariantCallingMetrics$VariantCallingDetailMetrics +SAMPLE_ALIAS HET_HOMVAR_RATIO TOTAL_HET_DEPTH TOTAL_SNPS NUM_IN_DB_SNP NOVEL_SNPS FILTERED_SNPS PCT_DBSNP DBSNP_TITV NOVEL_TITV TOTAL_INDELS NOVEL_INDELS FILTERED_INDELS PCT_DBSNP_INDELS NUM_IN_DB_SNP_INDELS DBSNP_INS_DEL_RATIO NOVEL_INS_DEL_RATIO TOTAL_MULTIALLELIC_SNPS NUM_IN_DB_SNP_MULTIALLELIC TOTAL_COMPLEX_INDELS NUM_IN_DB_SNP_COMPLEX_INDELS SNP_REFERENCE_BIAS NUM_SINGLETONS +FOO1 1.5 10 5 3 2 2 0.6 2.0 1.0 3 1 4 0.666667 2 1.0 0.0 1 1 1 0 0.5 4 +FOO2 2.0 12 6 4 2 1 0.666667 3.0 0.0 2 1 2 0.5 1 0.0 0.0 0 0 0 0 0.75 3 diff --git a/testdata/picard/vcf/mergeTest.shard1.variant_calling_summary_metrics b/testdata/picard/vcf/mergeTest.shard1.variant_calling_summary_metrics new file mode 100755 index 000000000..7ea3222c4 --- /dev/null +++ b/testdata/picard/vcf/mergeTest.shard1.variant_calling_summary_metrics @@ -0,0 +1,9 @@ +## htsjdk.samtools.metrics.StringHeader +# picard.vcf.CollectVariantCallingMetrics FOO +## htsjdk.samtools.metrics.StringHeader +# Started on: Fri Sep 16 21:26:45 UTC 2016 + +## METRICS CLASS picard.vcf.CollectVariantCallingMetrics$VariantCallingSummaryMetrics +TOTAL_SNPS NUM_IN_DB_SNP NOVEL_SNPS FILTERED_SNPS PCT_DBSNP DBSNP_TITV NOVEL_TITV TOTAL_INDELS NOVEL_INDELS FILTERED_INDELS PCT_DBSNP_INDELS NUM_IN_DB_SNP_INDELS DBSNP_INS_DEL_RATIO NOVEL_INS_DEL_RATIO TOTAL_MULTIALLELIC_SNPS NUM_IN_DB_SNP_MULTIALLELIC TOTAL_COMPLEX_INDELS NUM_IN_DB_SNP_COMPLEX_INDELS SNP_REFERENCE_BIAS NUM_SINGLETONS +11 7 4 3 0.636363 2.5 0.333333 5 2 6 0.6 3 0.5 0.0 1 1 1 0 0.636363 7 + diff --git a/testdata/picard/vcf/mergeTest.shard2.variant_calling_detail_metrics b/testdata/picard/vcf/mergeTest.shard2.variant_calling_detail_metrics new file mode 100755 index 000000000..493d98fc2 --- /dev/null +++ b/testdata/picard/vcf/mergeTest.shard2.variant_calling_detail_metrics @@ -0,0 +1,9 @@ +## htsjdk.samtools.metrics.StringHeader +# picard.vcf.CollectVariantCallingMetrics FOO +## htsjdk.samtools.metrics.StringHeader +# Started on: Fri Sep 16 21:26:45 UTC 2016 + +## METRICS CLASS picard.vcf.CollectVariantCallingMetrics$VariantCallingDetailMetrics +SAMPLE_ALIAS HET_HOMVAR_RATIO TOTAL_HET_DEPTH TOTAL_SNPS NUM_IN_DB_SNP NOVEL_SNPS FILTERED_SNPS PCT_DBSNP DBSNP_TITV NOVEL_TITV TOTAL_INDELS NOVEL_INDELS FILTERED_INDELS PCT_DBSNP_INDELS NUM_IN_DB_SNP_INDELS DBSNP_INS_DEL_RATIO NOVEL_INS_DEL_RATIO TOTAL_MULTIALLELIC_SNPS NUM_IN_DB_SNP_MULTIALLELIC TOTAL_COMPLEX_INDELS NUM_IN_DB_SNP_COMPLEX_INDELS SNP_REFERENCE_BIAS NUM_SINGLETONS +FOO1 2.333333 20 10 7 3 5 0.7 2.5 2.0 6 2 8 0.666667 4 1.0 0.0 2 1 2 1 0.45 6 +FOO2 1.4 21 12 9 3 4 0.75 2.0 2.0 4 2 4 0.5 2 1.0 1.0 0 0 0 0 0.666667 6 diff --git a/testdata/picard/vcf/mergeTest.shard2.variant_calling_summary_metrics b/testdata/picard/vcf/mergeTest.shard2.variant_calling_summary_metrics new file mode 100755 index 000000000..11052ffa4 --- /dev/null +++ b/testdata/picard/vcf/mergeTest.shard2.variant_calling_summary_metrics @@ -0,0 +1,9 @@ +## htsjdk.samtools.metrics.StringHeader +# picard.vcf.CollectVariantCallingMetrics FOO +## htsjdk.samtools.metrics.StringHeader +# Started on: Fri Sep 16 21:26:45 UTC 2016 + +## METRICS CLASS picard.vcf.CollectVariantCallingMetrics$VariantCallingSummaryMetrics +TOTAL_SNPS NUM_IN_DB_SNP NOVEL_SNPS FILTERED_SNPS PCT_DBSNP DBSNP_TITV NOVEL_TITV TOTAL_INDELS NOVEL_INDELS FILTERED_INDELS PCT_DBSNP_INDELS NUM_IN_DB_SNP_INDELS DBSNP_INS_DEL_RATIO NOVEL_INS_DEL_RATIO TOTAL_MULTIALLELIC_SNPS NUM_IN_DB_SNP_MULTIALLELIC TOTAL_COMPLEX_INDELS NUM_IN_DB_SNP_COMPLEX_INDELS SNP_REFERENCE_BIAS NUM_SINGLETONS +22 16 6 9 0.727272 2.2 2.0 10 4 12 0.6 6 1.0 0.333333 2 1 2 1 0.560976 12 +