diff --git a/src/main/java/picard/analysis/AbstractWgsMetricsCollector.java b/src/main/java/picard/analysis/AbstractWgsMetricsCollector.java index f84f300a5..51364cc38 100644 --- a/src/main/java/picard/analysis/AbstractWgsMetricsCollector.java +++ b/src/main/java/picard/analysis/AbstractWgsMetricsCollector.java @@ -184,8 +184,7 @@ protected void addBaseQHistogram(final MetricsFile ALLELE_FRACTION = new ArrayList<>(Arrays.asList(0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.5)); + @Option(doc = "If true, fast algorithm is used.") public boolean USE_FAST_ALGORITHM = false; @@ -431,6 +435,9 @@ protected int doWork() { if (INTERVALS != null) { IOUtil.assertFileIsReadable(INTERVALS); } + if (THEORETICAL_SENSITIVITY_OUTPUT != null) { + IOUtil.assertFileIsWritable(THEORETICAL_SENSITIVITY_OUTPUT); + } // it doesn't make sense for the locus accumulation cap to be lower than the coverage cap if (LOCUS_ACCUMULATION_CAP < COVERAGE_CAP) { @@ -467,6 +474,10 @@ protected int doWork() { processor.addToMetricsFile(out, INCLUDE_BQ_HISTOGRAM, dupeFilter, mapqFilter, pairFilter); out.write(OUTPUT); + if (THEORETICAL_SENSITIVITY_OUTPUT != null) { + TheoreticalSensitivity.writeOutput(THEORETICAL_SENSITIVITY_OUTPUT, getMetricsFile(), SAMPLE_SIZE, collector.getUnfilteredDepthHistogram(),collector.getUnfilteredBaseQHistogram(), ALLELE_FRACTION); + } + return 0; } diff --git a/src/main/java/picard/analysis/TheoreticalSensitivity.java b/src/main/java/picard/analysis/TheoreticalSensitivity.java index 9b93ab4ff..aca64c6cf 100644 --- a/src/main/java/picard/analysis/TheoreticalSensitivity.java +++ b/src/main/java/picard/analysis/TheoreticalSensitivity.java @@ -24,15 +24,14 @@ package picard.analysis; +import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.util.Histogram; import htsjdk.samtools.util.Log; import picard.PicardException; import picard.util.MathUtil; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Random; +import java.io.File; +import java.util.*; /** * Created by David Benjamin on 5/13/15. @@ -41,14 +40,15 @@ private static final Log log = Log.getInstance(TheoreticalSensitivity.class); private static final int SAMPLING_MAX = 600; //prevent 'infinite' loops - private static final int MAX_CONSIDERED_DEPTH = 1000; //no point in looking any deeper than this, otherwise GC overhead is too high. + private static final int MAX_CONSIDERED_DEPTH = 10000; //no point in looking any deeper than this, otherwise GC overhead is too high. + private static final int randomSeed = 51; /** - * @param depthDistribution the probability of depth n is depthDistribution[n] for n = 0, 1. . . N - 1 + * @param depthDistribution the probability of depth n is depthDistribution[n] for n = 0, 1. . . N - 1 * @param qualityDistribution the probability of quality q is qualityDistribution[q] for q = 0, 1. . . Q - * @param sampleSize sample size is the number of random sums of quality scores for each m - * @param logOddsThreshold is the log_10 of the likelihood ratio required to call a SNP, - * for example 5 if the variant likelihood must be 10^5 times greater + * @param sampleSize sample size is the number of random sums of quality scores for each m + * @param logOddsThreshold is the log_10 of the likelihood ratio required to call a SNP, + * for example 5 if the variant likelihood must be 10^5 times greater */ public static double hetSNPSensitivity(final double[] depthDistribution, final double[] qualityDistribution, final int sampleSize, final double logOddsThreshold) { @@ -56,12 +56,12 @@ public static double hetSNPSensitivity(final double[] depthDistribution, final d } /** - * @param depthDistribution the probability of depth n is depthDistribution[n] for n = 0, 1. . . N - 1 + * @param depthDistribution the probability of depth n is depthDistribution[n] for n = 0, 1. . . N - 1 * @param qualityDistribution the probability of quality q is qualityDistribution[q] for q = 0, 1. . . Q - * @param sampleSize sample size is the number of random sums of quality scores for each m - * @param logOddsThreshold is the log_10 of the likelihood ratio required to call a SNP, - * for example 5 if the variant likelihood must be 10^5 times greater. - * @param withLogging true to output log messages, false otherwise. + * @param sampleSize sample size is the number of random sums of quality scores for each m + * @param logOddsThreshold is the log_10 of the likelihood ratio required to call a SNP, + * for example 5 if the variant likelihood must be 10^5 times greater. + * @param withLogging true to output log messages, false otherwise. */ public static double hetSNPSensitivity(final double[] depthDistribution, final double[] qualityDistribution, final int sampleSize, final double logOddsThreshold, final boolean withLogging) { @@ -143,7 +143,7 @@ We use an O(1) stochastic acceptance algorithm -- see Physica A, Volume 391, Pag private Random rng; RouletteWheel(final double[] weights) { - rng = new Random(51); + rng = new Random(randomSeed); N = weights.length; probabilities = new ArrayList<>(); @@ -204,4 +204,174 @@ public int draw() { } return normalizedHistogram; } -} + + /** + * Determines if a variant would be called under the particular conditions of a given total depth, alt depth, + * average base qualities, allele fraction of variant and log odds threshold necessary to exceed to call variant. + * @param totalDepth Depth at the site to be called, both alt and ref. + * @param altDepth Number of alt bases at this site. + * @param averageQuality Average Phred-scaled quality of bases + * @param alleleFraction Allele fraction we are attempting to detect + * @param logOddsThreshold Log odds threshold necessary to exceed for variant to be called + * @return + */ + public static boolean isCalled(int totalDepth, int altDepth, double averageQuality, double alleleFraction, double logOddsThreshold) { + double threshold; + double sumOfQualities = altDepth * averageQuality; + threshold = 10.0 * (altDepth * Math.log10(1.0 / alleleFraction) + (totalDepth - altDepth) * Math.log10(1.0 / (1.0 - alleleFraction)) + logOddsThreshold); + + return sumOfQualities > threshold; + } + + /** + * Draw from a binomial distribution. + * @param trials Number of trials to perform + * @param p Probability of individual success + * @param uniformRNG Random number generator to use for making draw + * @return Number of total successes + */ + public static int binomialDraw(final int trials, final double p, final Random uniformRNG) { + if (p > 1.0 || p < 0) { + throw new PicardException("Probabilities should be between 0 and 1, found value " + p + "."); + } + + int successes = 0; + for (int i = 0; i < trials; i++) { + if (uniformRNG.nextDouble() < p) { + successes++; + } + } + return successes; + } + + /** + * Calculates the theoretical sensitivity with a given Phred-scaled quality score distribution at a constant + * depth. + * @param depth Depth to compute sensitivity at + * @param qualityDistribution Phred-scaled quality score distribution + * @param logOddsThreshold Log odd threshold necessary to exceed for variant to be called + * @param sampleSize sampleSize is the total number of simulations to run + * @param alleleFraction the allele fraction to evaluate sensitivity at + * @param randomSeed random number seed to use for random number generator + * @return + */ + public static double sensitivityAtConstantDepth(final int depth, final double[] qualityDistribution, final double logOddsThreshold, final int sampleSize, final double alleleFraction, final int randomSeed) { + final RouletteWheel qualityRW = new RouletteWheel(trimDistribution(qualityDistribution)); + final Random uniformRNG = new Random(randomSeed); + + int altDepth = 0; + int calledVariants = 0; + for (int k = 0; k < sampleSize; k++) { + altDepth = binomialDraw(depth, alleleFraction, uniformRNG); + + int sumOfQualities = 0; + for (int i = 0; i < altDepth; i++) { + sumOfQualities += qualityRW.draw(); + } + if (isCalled(depth, altDepth, (double) sumOfQualities / (double) altDepth, alleleFraction, logOddsThreshold)) { + calledVariants++; + } + } + return (double) calledVariants / sampleSize; + } + + /** + * Calculates the theoretical sensitivity with a given Phred-scaled quality score distribution at a constant + * depth. + * @param depth Depth to compute sensitivity at + * @param qualityDistribution Phred-scaled quality score distribution + * @param logOddsThreshold Log odds threshold necessary to exceed for variant to be called + * @param sampleSize the total number of simulations to run + * @param alleleFraction the allele fraction to evaluate sensitivity at + * @return + */ + public static double sensitivityAtConstantDepth(final int depth, final double[] qualityDistribution, final double logOddsThreshold, final int sampleSize, final double alleleFraction) { + return sensitivityAtConstantDepth(depth, qualityDistribution, logOddsThreshold, sampleSize, alleleFraction, randomSeed); + } + + /** + * Calculates the theoretical sensitivity with a given Phred-scaled quality score distribution and depth + * distribution. + * @param depthDistribution Depth distribution to compute theoretical sensitivity over + * @param qualityDistribution Phred-scaled quality score distribution + * @param sampleSize the total number of simulations to run + * @param logOddsThreshold Log odds threshold necessary to exceed for variant to be called + * @param alleleFraction the allele fraction to evaluate sensitivity at + * @return + */ + public static double theoreticalSensitivity(final double[] depthDistribution, final double[] qualityDistribution, + final int sampleSize, final double logOddsThreshold, final double alleleFraction) { + if (alleleFraction > 1.0 || alleleFraction < 0.0) { + throw new PicardException("Allele fractions must be between 0 and 1."); + } + double sensitivity = 0.0; + for (int k = 0; k < depthDistribution.length; k++) { + if (k % 100 == 0) { + log.info("Calculting sensitivity for allele fraction " + alleleFraction + " at depth " + k + " of " + depthDistribution.length); + } + sensitivity += sensitivityAtConstantDepth(k, qualityDistribution, logOddsThreshold, sampleSize, alleleFraction) * depthDistribution[k]; + } + return sensitivity; + } + + /** + * Removes trailing zeros in a distribution. The purpose of this function is to prevent other + * functions from evaluating in regions where the distribution has zero probability. + * @param distribution Distribution of base qualities + * @return Distribution of base qualities removing any trailing zeros + */ + public static double[] trimDistribution(final double[] distribution) { + int endOfDistribution = 0; + + // Locate the index of the distribution where all the values remaining at + // larger indices are zero. + for (endOfDistribution = distribution.length-1;endOfDistribution >= 0;endOfDistribution--) { + if (distribution[endOfDistribution] != 0) { + break; + } + } + + // Remove trailing zeros. + final double[] trimmedDistribution = new double[endOfDistribution+1]; + for (int i = 0;i <= endOfDistribution;i++) { + trimmedDistribution[i] = distribution[i]; + } + + return trimmedDistribution; + } + + /** + * This is a utility function + * @param theoreticalSensitivityOutput File to save to results ot theoretical sensitivity. + * @param tsOut MetricsFile object to save results of theoretical sensitivity to. + * @param sampleSize Number of samples to take for each depth. + * @param depthHistogram Histogram of depth distribution for sample. + * @param baseQHistogram Histogram of Phred-scaled quality scores. + * @param alleleFractions Allele fractions + */ + public static void writeOutput(final File theoreticalSensitivityOutput, final MetricsFile tsOut, final int sampleSize, + final Histogram depthHistogram, final Histogram baseQHistogram, final List alleleFractions) { + if (theoreticalSensitivityOutput != null) { + final double logOddsThreshold = 6.2; // This threshold is used because it is the value used for MuTect2. + final double[] depthDoubleArray = TheoreticalSensitivity.normalizeHistogram(depthHistogram); + final double[] baseQDoubleArray = TheoreticalSensitivity.normalizeHistogram(baseQHistogram); + + final TheoreticalSensitivityMetrics theoreticalSensitivityMetrics = new TheoreticalSensitivityMetrics(); + + // For each allele fraction in alleleFractions calculate theoretical sensitivity and add the results + // to the histogram sensitivityHistogram. + final Histogram sensitivityHistogram = new Histogram<>(); + sensitivityHistogram.setBinLabel("allele_fraction"); + sensitivityHistogram.setValueLabel("theoretical_sensitivity"); + for (Double alleleFraction : alleleFractions) { + sensitivityHistogram.increment(alleleFraction, TheoreticalSensitivity + .theoreticalSensitivity(depthDoubleArray, baseQDoubleArray, sampleSize, logOddsThreshold, alleleFraction)); + } + + // Write out results to file. + tsOut.addMetric(theoreticalSensitivityMetrics); + tsOut.addHistogram(sensitivityHistogram); + tsOut.write(theoreticalSensitivityOutput); + } + } +} \ No newline at end of file diff --git a/src/main/java/picard/analysis/TheoreticalSensitivityMetrics.java b/src/main/java/picard/analysis/TheoreticalSensitivityMetrics.java new file mode 100644 index 000000000..1b6cc1003 --- /dev/null +++ b/src/main/java/picard/analysis/TheoreticalSensitivityMetrics.java @@ -0,0 +1,11 @@ +package picard.analysis; + +import htsjdk.samtools.metrics.MetricBase; + +/** + * Created by fleharty on 6/30/17. + */ +public class TheoreticalSensitivityMetrics extends MetricBase { + /** This is a dummy value that allows for this to be read properly from a file. */ + public int DUMMY_FIELD = 0; +} \ No newline at end of file diff --git a/src/main/java/picard/analysis/directed/CollectTargetedMetrics.java b/src/main/java/picard/analysis/directed/CollectTargetedMetrics.java index 2ec89d370..43b58489c 100644 --- a/src/main/java/picard/analysis/directed/CollectTargetedMetrics.java +++ b/src/main/java/picard/analysis/directed/CollectTargetedMetrics.java @@ -15,14 +15,15 @@ import htsjdk.samtools.util.ProgressLogger; import htsjdk.samtools.util.SequenceUtil; import picard.analysis.MetricAccumulationLevel; +import picard.analysis.TheoreticalSensitivity; +import picard.analysis.TheoreticalSensitivityMetrics; import picard.cmdline.CommandLineProgram; import picard.cmdline.Option; import picard.cmdline.StandardOptionDefinitions; import picard.metrics.MultilevelMetrics; import java.io.File; -import java.util.List; -import java.util.Set; +import java.util.*; import static picard.cmdline.StandardOptionDefinitions.MINIMUM_MAPPING_QUALITY_SHORT_NAME; @@ -98,6 +99,11 @@ protected abstract COLLECTOR makeCollector(final Set ac @Option(doc="Sample Size used for Theoretical Het Sensitivity sampling. Default is 10000.", optional = true) public int SAMPLE_SIZE=10000; + @Option(doc="Output for Theoretical Sensitivity metrics where the allele fractions are provided by the ALLELE_FRACTION argument. Default is null.", optional = true) + public File THEORETICAL_SENSITIVITY_OUTPUT; + + @Option(doc="Allele fraction to run theoretical sensitivity on.", optional = true) + public List ALLELE_FRACTION = new ArrayList<>(Arrays.asList(0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.5)); /** * Asserts that files are readable and writable and then fires off an * HsMetricsCalculator instance to do the real work. @@ -156,6 +162,11 @@ protected int doWork() { metrics.write(OUTPUT); + if (THEORETICAL_SENSITIVITY_OUTPUT != null) { + final MetricsFile theoreticalSensitivityMetrics = getMetricsFile(); + TheoreticalSensitivity.writeOutput(THEORETICAL_SENSITIVITY_OUTPUT, theoreticalSensitivityMetrics, SAMPLE_SIZE, collector.getDepthHistogram(), collector.getBaseQualityHistogram(), ALLELE_FRACTION); + } + CloserUtil.close(reader); return 0; } diff --git a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java index 1094cefdb..dd522a24e 100644 --- a/src/main/java/picard/analysis/directed/TargetMetricsCollector.java +++ b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java @@ -124,6 +124,8 @@ private static final double LOG_ODDS_THRESHOLD = 3.0; + private final File theoreticalSensitivityOutput = null; + private final int minimumMappingQuality; private final int minimumBaseQuality; private final boolean clipOverlappingReads; @@ -921,6 +923,14 @@ public String toString() { return "TargetedMetricCollector(interval=" + interval + ", depths = [" + StringUtil.intValuesToString(this.depths) + "])"; } } + + public Histogram getBaseQualityHistogram() { + return unfilteredBaseQHistogram; + } + + public Histogram getDepthHistogram() { + return unfilteredDepthHistogram; + } } /** diff --git a/src/test/java/picard/analysis/TheoreticalSensitivityTest.java b/src/test/java/picard/analysis/TheoreticalSensitivityTest.java index c36974088..8ce0a217e 100644 --- a/src/test/java/picard/analysis/TheoreticalSensitivityTest.java +++ b/src/test/java/picard/analysis/TheoreticalSensitivityTest.java @@ -207,7 +207,7 @@ public void testHetSensDistributions() throws Exception { } @Test(dataProvider = "hetSensDataProvider") - public void testHetSensTargeted(final double expected, final File metricsFile) throws Exception{ + public void testHetSensTargeted(final double expected, final File metricsFile) throws Exception { final double tolerance = 0.000_000_01; final MetricsFile Metrics = new MetricsFile(); @@ -216,8 +216,8 @@ public void testHetSensTargeted(final double expected, final File metricsFile) t final Histogram depthHistogram = histograms.get(0); final Histogram qualityHistogram = histograms.get(1); - final double [] depthDistribution = TheoreticalSensitivity.normalizeHistogram(depthHistogram); - final double [] qualityDistribution = TheoreticalSensitivity.normalizeHistogram(qualityHistogram); + final double[] depthDistribution = TheoreticalSensitivity.normalizeHistogram(depthHistogram); + final double[] qualityDistribution = TheoreticalSensitivity.normalizeHistogram(qualityHistogram); final int sampleSize = 1_000; final double logOddsThreshold = 3.0; @@ -225,4 +225,109 @@ public void testHetSensTargeted(final double expected, final File metricsFile) t final double result = TheoreticalSensitivity.hetSNPSensitivity(depthDistribution, qualityDistribution, sampleSize, logOddsThreshold); Assert.assertEquals(result, expected, tolerance); } + + @DataProvider(name = "TheoreticalSensitivityConstantDepthDataProvider") + public Object[][] fractionalAlleleSensDataProvider() { + final File wgsMetricsFile = new File(TEST_DIR, "test_Solexa-332667.wgs_metrics"); + final File targetedMetricsFile = new File(TEST_DIR, "test_25103070136.targeted_pcr_metrics"); + + // These magic numbers come from a separate implementation of the code in R. + return new Object[][]{ + // Expected sensitivity, metrics file, allele fraction, constant depth, sample size. + {1.00, wgsMetricsFile, .5, 30, 10000}, + {1.00, wgsMetricsFile, .5, 30, 10000}, + {0.78, targetedMetricsFile, .1, 30, 10000}, + {0.26, targetedMetricsFile, 0.1, 10, 10000} + }; + } + + @Test(dataProvider = "TheoreticalSensitivityConstantDepthDataProvider") + public void testSensitivityAtConstantDepth(final double expected, final File metricsFile, final double alleleFraction, final int depth, final int sampleSize) throws Exception { + // This tests Theoretical Sensitivity assuming a uniform depth with a distribution of base quality scores. + // Because this only tests sensitivity at a constant depth, we use this for testing the code at high depths. + final double tolerance = 0.01; + final MetricsFile Metrics = new MetricsFile(); + Metrics.read(new FileReader(metricsFile)); + final List histograms = Metrics.getAllHistograms(); + final Histogram qualityHistogram = histograms.get(1); + + final double[] qualityDistribution = TheoreticalSensitivity.normalizeHistogram(qualityHistogram); + + // We ensure that even using different random seeds we converge to roughly the same value. + for (int i = 0;i < 3;i++) { + double result = TheoreticalSensitivity.sensitivityAtConstantDepth(depth, qualityDistribution, 3, sampleSize, alleleFraction, i); + Assert.assertEquals(result, expected, tolerance); + } + } + + @DataProvider(name = "TheoreticalSensitivityDataProvider") + public Object[][] arbFracSensDataProvider() { + final File wgsMetricsFile = new File(TEST_DIR, "test_Solexa-332667.wgs_metrics"); + + // This test acts primarily as an intergration test. The sample size of 100 + // is not quite large enough to converge properly, but is used for the purpose of + // keeping the compute time of the tests short. + return new Object[][] { + {0.90, wgsMetricsFile, .5, 100}, + {0.77, wgsMetricsFile, .3, 100}, + {0.29, wgsMetricsFile, .1, 100}, + {0.08, wgsMetricsFile, .05, 100}, + }; + } + + @Test(dataProvider = "TheoreticalSensitivityDataProvider") + public void testSensitivity(final double expected, final File metricsFile, final double alleleFraction, final int sampleSize) throws Exception { + // This tests Theoretical Sensitivity using distributions on both base quality scores + // and the depth histogram. + + // We use a pretty forgiving tolerance here because for these tests + // we are not using large enough sample sizes to converge. + final double tolerance = 0.02; + + final MetricsFile Metrics = new MetricsFile(); + Metrics.read(new FileReader(metricsFile)); + final List histograms = Metrics.getAllHistograms(); + final Histogram depthHistogram = histograms.get(0); + final Histogram qualityHistogram = histograms.get(1); + + final double[] qualityDistribution = TheoreticalSensitivity.normalizeHistogram(qualityHistogram); + final double[] depthDistribution = TheoreticalSensitivity.normalizeHistogram(depthHistogram); + + final double result = TheoreticalSensitivity.theoreticalSensitivity(depthDistribution, qualityDistribution, sampleSize, 3, alleleFraction); + + Assert.assertEquals(result, expected, tolerance); + } + + @DataProvider(name = "equivalanceHetVsArbitrary") + public Object[][] equivalenceHetVsFull() { + final File wgsMetricsFile = new File(TEST_DIR, "test_Solexa-332667.wgs_metrics"); + final File targetedMetricsFile = new File(TEST_DIR, "test_25103070136.targeted_pcr_metrics"); + + return new Object[][] { + // The sample sizes chosen here for these tests are smaller than what would normally be used + // in order to keep the test time low. It should be noted that for larger sample sizes + // the values converge. + {wgsMetricsFile, 0.01, 300}, + {targetedMetricsFile, 0.01, 50} + }; + } + + @Test(dataProvider = "equivalanceHetVsArbitrary") + public void testHetVsArbitrary(final File metricsFile, final double tolerance, final int sampleSize) throws Exception { + // This test compares Theoretical Sensitivity for arbitrary allele fractions with the theoretical het sensitivity + // model. Since allele fraction of 0.5 is equivalent to a het, these should provide the same answer. + final MetricsFile Metrics = new MetricsFile(); + Metrics.read(new FileReader(metricsFile)); + final List histograms = Metrics.getAllHistograms(); + final Histogram depthHistogram = histograms.get(0); + final Histogram qualityHistogram = histograms.get(1); + + final double[] qualityDistribution = TheoreticalSensitivity.normalizeHistogram(qualityHistogram); + final double[] depthDistribution = TheoreticalSensitivity.normalizeHistogram(depthHistogram); + + final double resultFromTS = TheoreticalSensitivity.theoreticalSensitivity(depthDistribution, qualityDistribution, sampleSize, 3, 0.5); + final double resultFromTHS = TheoreticalSensitivity.hetSNPSensitivity(depthDistribution, qualityDistribution, sampleSize, 3); + + Assert.assertEquals(resultFromTHS, resultFromTS, tolerance); + } } diff --git a/src/test/java/picard/analysis/directed/CollectTargetedMetricsTest.java b/src/test/java/picard/analysis/directed/CollectTargetedMetricsTest.java index 7cd9b99e0..d4bcf4a1d 100644 --- a/src/test/java/picard/analysis/directed/CollectTargetedMetricsTest.java +++ b/src/test/java/picard/analysis/directed/CollectTargetedMetricsTest.java @@ -15,6 +15,7 @@ import org.testng.annotations.BeforeTest; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import picard.analysis.TheoreticalSensitivityMetrics; import picard.cmdline.CommandLineProgramTest; import picard.sam.SortSam; import picard.util.TestNGUtil; @@ -22,6 +23,9 @@ import java.io.File; import java.io.FileReader; import java.io.IOException; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; import java.util.Random; public class CollectTargetedMetricsTest extends CommandLineProgramTest { @@ -31,8 +35,10 @@ private File tempSamFile; private File tempSamFileIndex; private File outfile; + private File tsOutfile; // Theoretical sensitivity file private File perTargetOutfile; private final static int LENGTH = 99; + private final static int RANDOM_SEED = 51; final String referenceFile = "testdata/picard/quality/chrM.reference.fasta"; final String emptyIntervals = "testdata/picard/quality/chrM.empty.interval_list"; @@ -82,6 +88,7 @@ void setupBuilder() throws IOException { //Add to setBuilder final SAMRecordSetBuilder setBuilder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate); + setBuilder.setRandomSeed(RANDOM_SEED); setBuilder.setReadGroup(readGroupRecord); setBuilder.setUseNmFlag(true); setBuilder.setHeader(header); @@ -121,8 +128,10 @@ void setupBuilder() throws IOException { //create output files for tests outfile = File.createTempFile("test", ".TargetedMetrics_Coverage"); perTargetOutfile = File.createTempFile("perTarget", ".perTargetCoverage"); + tsOutfile = File.createTempFile("test", ".TheoreticalSensitivityMetrics"); outfile.deleteOnExit(); perTargetOutfile.deleteOnExit(); + tsOutfile.deleteOnExit(); } @DataProvider(name = "targetedIntervalDataProvider") @@ -160,6 +169,61 @@ public void runCollectTargetedMetricsTest(final File input, final File outfile, } } + @DataProvider(name = "theoreticalSensitivityDataProvider") + public Object[][] theoreticalSensitivityDataProvider() { + + return new Object[][] { + // This test is primarily used as an integration test since theoretical sensitivity doesn't converge + // well with a sample size of 10. The sample size is set so low as to prevent the tests from taking + // too long to run. + {tempSamFile, outfile, tsOutfile, perTargetOutfile, referenceFile, singleIntervals, 10, + Arrays.asList(0.01, 0.05, 0.10, 0.30, 0.50), // Allele fraction + Arrays.asList(0.01, 0.59, 0.87, 0.99, 0.99), // Expected sensitivity + 0.12, 0.94 + } + }; + } + + @Test(dataProvider = "theoreticalSensitivityDataProvider") + public void runCollectTargetedMetricsTheoreticalSensitivityTest(final File input, final File outfile, final File tsOutfile, final File perTargetOutfile, final String referenceFile, + final String targetIntervals, final int sampleSize, final List alleleFractions, final List expectedSensitivities, + final double additionalAlleleFraction, final double additionalExpectedSensitivity) throws IOException { + + final String[] args = new String[] { + "TARGET_INTERVALS=" + targetIntervals, + "INPUT=" + input.getAbsolutePath(), + "OUTPUT=" + outfile.getAbsolutePath(), + "REFERENCE_SEQUENCE=" + referenceFile, + "PER_TARGET_COVERAGE=" + perTargetOutfile.getAbsolutePath(), + "LEVEL=ALL_READS", + "AMPLICON_INTERVALS=" + targetIntervals, + "ALLELE_FRACTION=" + additionalAlleleFraction, + "THEORETICAL_SENSITIVITY_OUTPUT=" + tsOutfile.getAbsolutePath(), + "SAMPLE_SIZE=" + sampleSize + }; + + Assert.assertEquals(runPicardCommandLine(args), 0); + + final MetricsFile output = new MetricsFile<>(); + output.read(new FileReader(tsOutfile.getAbsolutePath())); + + final Histogram h = output.getHistogram(); + + // Ensure that the sensitivity calculated for the option ALLELE_FRACTION is correct + Assert.assertEquals(h.get(additionalAlleleFraction).getValue(), additionalExpectedSensitivity, 0.01); + + // Ensure that the sensitivities calculated for the default allele fractions are correct. + Iterator alleleFraction = alleleFractions.iterator(); + Iterator expectedSensitivity = expectedSensitivities.iterator(); + Assert.assertEquals(alleleFractions.size(), expectedSensitivities.size(), "List of allele fractions to test is not the same size as the list of expected sensitivities."); + while (alleleFraction.hasNext() && expectedSensitivity.hasNext()) { + final double af = h.get(alleleFraction.next()).getValue(); + final double es = expectedSensitivity.next(); + + Assert.assertEquals(af, es, 0.01); + } + } + @Test() public void testRawBqDistributionWithSoftClips() throws IOException {