diff --git a/src/main/java/picard/analysis/AbstractWgsMetricsCollector.java b/src/main/java/picard/analysis/AbstractWgsMetricsCollector.java
new file mode 100644
index 000000000..f84f300a5
--- /dev/null
+++ b/src/main/java/picard/analysis/AbstractWgsMetricsCollector.java
@@ -0,0 +1,219 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.analysis;
+
+import htsjdk.samtools.metrics.MetricsFile;
+import htsjdk.samtools.reference.ReferenceSequence;
+import htsjdk.samtools.util.AbstractLocusInfo;
+import htsjdk.samtools.util.AbstractRecordAndOffset;
+import htsjdk.samtools.util.Histogram;
+import htsjdk.samtools.util.IntervalList;
+import htsjdk.samtools.util.SequenceUtil;
+import picard.filter.CountingFilter;
+import picard.filter.CountingPairedFilter;
+
+/**
+ * Class for collecting data on reference coverage, base qualities and excluded bases from one AbstractLocusInfo object for
+ * CollectWgsMetrics.
+ *
+ * The shared code for forming result for CollectWgsMetrics is abstracted into this class.
+ * Classes that extend this collector implement their logic in addInfo() method.
+ * @author Mariia_Zueva@epam.com, EPAM Systems, Inc.
+ */
+
+public abstract class AbstractWgsMetricsCollector {
+
+ /**
+ * The source CollectWgsMetrics object
+ */
+ final CollectWgsMetrics collectWgsMetrics;
+ /** Count of sites with a given depth of coverage. Includes all but quality 2 bases.
+ * We draw depths from this histogram when we calculate the theoretical het sensitivity.
+ */
+ protected final long[] unfilteredDepthHistogramArray;
+ /** Count of bases observed with a given base quality. Includes all but quality 2 bases.
+ * We draw bases from this histogram when we calculate the theoretical het sensitivity.
+ */
+ protected final long[] unfilteredBaseQHistogramArray;
+ /**
+ * Count of sites with a given depth of coverage.
+ * Excludes bases with quality below MINIMUM_BASE_QUALITY (default 20).
+ */
+ protected final long[] highQualityDepthHistogramArray;
+ /**
+ * Number of aligned bases that were filtered out because they were of low base quality (default is < 20).
+ */
+ long basesExcludedByBaseq = 0;
+ /**
+ * Number of aligned bases that were filtered out because they were the second observation from an insert with overlapping reads.
+ */
+ long basesExcludedByOverlap = 0;
+ /**
+ * Number of aligned bases that were filtered out because they would have raised coverage above the capped value (default cap = 250x).
+ */
+ long basesExcludedByCapping = 0;
+ /**
+ * Positions with coverage exceeding this value are treated as if they had coverage at this value
+ */
+ protected final int coverageCap;
+
+ protected final IntervalList intervals;
+ /**
+ * This value indicates that processing will stop after specified int the metric amount of genomic bases.
+ */
+ private final boolean usingStopAfter;
+ /**
+ * The number of processed genomic bases
+ */
+ protected long counter = 0;
+
+ /**
+ * Creates a collector and initializes the inner data structures
+ *
+ * @param collectWgsMetrics CollectWgsMetrics, that creates this collector
+ * @param coverageCap coverage cap
+ */
+ AbstractWgsMetricsCollector(CollectWgsMetrics collectWgsMetrics, final int coverageCap, final IntervalList intervals) {
+ if (coverageCap <= 0) {
+ throw new IllegalArgumentException("Coverage cap must be positive.");
+ }
+ this.collectWgsMetrics = collectWgsMetrics;
+ unfilteredDepthHistogramArray = new long[coverageCap + 1];
+ highQualityDepthHistogramArray = new long[coverageCap + 1];
+ unfilteredBaseQHistogramArray = new long[Byte.MAX_VALUE];
+ this.coverageCap = coverageCap;
+ this.intervals = intervals;
+ this.usingStopAfter = collectWgsMetrics.STOP_AFTER > 0;
+ }
+
+ /**
+ * Accumulates the data from AbstractLocusInfo in inner structures
+ * @param info {@link htsjdk.samtools.util.AbstractLocusInfo} with aligned to reference position reads
+ * @param ref {@link htsjdk.samtools.reference.ReferenceSequence}
+ * @param referenceBaseN true if current the value of reference base represents a no call
+ */
+ public abstract void addInfo(final AbstractLocusInfo info, final ReferenceSequence ref, boolean referenceBaseN);
+
+ /**
+ * Adds collected metrics and depth histogram to file
+ * @param file MetricsFile for result of collector's work
+ * @param dupeFilter counting filter for duplicate reads
+ * @param mapqFilter counting filter for mapping quality
+ * @param pairFilter counting filter for reads without a mapped mate pair
+ */
+ public void addToMetricsFile(final MetricsFile file,
+ final boolean includeBQHistogram,
+ final CountingFilter dupeFilter,
+ final CountingFilter mapqFilter,
+ final CountingPairedFilter pairFilter) {
+ final CollectWgsMetrics.WgsMetrics
+ metrics = getMetrics(dupeFilter, mapqFilter, pairFilter);
+
+ // add them to the file
+ file.addMetric(metrics);
+ file.addHistogram(getHighQualityDepthHistogram());
+ if (includeBQHistogram) addBaseQHistogram(file);
+ }
+
+ protected void addBaseQHistogram(final MetricsFile file) {
+ file.addHistogram(getUnfilteredBaseQHistogram());
+ }
+
+ protected Histogram getHighQualityDepthHistogram() {
+ return getHistogram(highQualityDepthHistogramArray, "coverage", "high_quality_coverage_count");
+ }
+
+ protected Histogram getUnfilteredDepthHistogram() {
+ return getHistogram(unfilteredDepthHistogramArray, "coverage", "unfiltered_coverage_count");
+ }
+
+ protected Histogram getUnfilteredBaseQHistogram() {
+ return getHistogram(unfilteredBaseQHistogramArray, "baseq", "unfiltered_baseq_count");
+ }
+
+ protected Histogram getHistogram(final long[] array, final String binLabel, final String valueLabel) {
+ final Histogram histogram = new Histogram<>(binLabel, valueLabel);
+ for (int i = 0; i < array.length; ++i) {
+ histogram.increment(i, array[i]);
+ }
+ return histogram;
+ }
+
+ /**
+ * Creates CollectWgsMetrics.WgsMetrics - the object holding the result of CollectWgsMetrics
+ *
+ * @param dupeFilter counting filter for duplicate reads
+ * @param mapqFilter counting filter for mapping quality
+ * @param pairFilter counting filter for reads without a mapped mate pair
+ * @return CollectWgsMetrics.WgsMetrics with set fields
+ */
+ protected CollectWgsMetrics.WgsMetrics getMetrics(final CountingFilter dupeFilter,
+ final CountingFilter mapqFilter,
+ final CountingPairedFilter pairFilter) {
+ return collectWgsMetrics.generateWgsMetrics(
+ this.intervals,
+ getHighQualityDepthHistogram(),
+ getUnfilteredDepthHistogram(),
+ collectWgsMetrics.getBasesExcludedBy(mapqFilter),
+ collectWgsMetrics.getBasesExcludedBy(dupeFilter),
+ collectWgsMetrics.getBasesExcludedBy(pairFilter),
+ basesExcludedByBaseq,
+ basesExcludedByOverlap,
+ basesExcludedByCapping,
+ coverageCap,
+ getUnfilteredBaseQHistogram(),
+ collectWgsMetrics.SAMPLE_SIZE
+ );
+ }
+
+ /**
+ * @return true, of number of processed loci exceeded the threshold, otherwise false
+ */
+ boolean isTimeToStop(final long processedLoci) {
+ return usingStopAfter && processedLoci > collectWgsMetrics.STOP_AFTER - 1;
+ }
+
+ /**
+ * Sets the counter to the current number of processed loci. Counter, must be updated
+ * from outside, since we are skipping a no call reference positions outside of the collector
+ *
+ * @param counter number of processed loci
+ */
+ public void setCounter(long counter) {
+ this.counter = counter;
+ }
+
+ /**
+ * Checks if reference base at given position is unknown.
+ *
+ * @param position to check the base
+ * @param ref reference sequence
+ * @return true if reference base at position represents a no call, otherwise false
+ */
+ boolean isReferenceBaseN(final int position, final ReferenceSequence ref) {
+ final byte base = ref.getBases()[position - 1];
+ return SequenceUtil.isNoCall(base);
+ }
+}
diff --git a/src/main/java/picard/analysis/CollectWgsMetrics.java b/src/main/java/picard/analysis/CollectWgsMetrics.java
index ab7df819f..645adb3d7 100644
--- a/src/main/java/picard/analysis/CollectWgsMetrics.java
+++ b/src/main/java/picard/analysis/CollectWgsMetrics.java
@@ -47,16 +47,16 @@
import java.io.File;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
-import java.util.stream.LongStream;
import static picard.cmdline.StandardOptionDefinitions.MINIMUM_MAPPING_QUALITY_SHORT_NAME;
/**
* Computes a number of metrics that are useful for evaluating coverage and performance of whole genome sequencing experiments.
- *
+ * Two algorithms are available for this metrics: default and fast. The fast algorithm is enabled by USE_FAST_ALGORITHM option.
+ * The fast algorithm works better for regions of BAM file with coverage at least 10 reads per locus,
+ * for lower coverage the algorithms perform the same.
* @author tfennell
*/
@CommandLineProgramProperties(
@@ -118,6 +118,12 @@
@Option(doc="Sample Size used for Theoretical Het Sensitivity sampling. Default is 10000.", optional = true)
public int SAMPLE_SIZE=10000;
+ @Option(doc = "If true, fast algorithm is used.")
+ public boolean USE_FAST_ALGORITHM = false;
+
+ @Option(doc = "Average read length in the file. Default is 150.", optional = true)
+ public int READ_LENGTH = 150;
+
@Option(doc = "An interval list file that contains the positions to restrict the assessment. Please note that " +
"all bases of reads that overlap these intervals will be considered, even if some of those bases extend beyond the boundaries of " +
"the interval. The ideal use case for this argument is to use it to restrict the calculation to a subset of (whole) contigs. To " +
@@ -437,7 +443,7 @@ protected int doWork() {
final ProgressLogger progress = new ProgressLogger(log, 10000000, "Processed", "loci");
final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
final SamReader in = getSamReader();
- final SamLocusIterator iterator = getLocusIterator(in);
+ final AbstractLocusIterator iterator = getLocusIterator(in);
final List filters = new ArrayList<>();
final CountingFilter mapqFilter = new CountingMapQFilter(MINIMUM_MAPPING_QUALITY);
@@ -451,49 +457,26 @@ protected int doWork() {
filters.add(pairFilter);
}
iterator.setSamFilters(filters);
- iterator.setEmitUncoveredLoci(true);
iterator.setMappingQualityScoreCutoff(0); // Handled separately because we want to count bases
- iterator.setQualityScoreCutoff(0); // Handled separately because we want to count bases
iterator.setIncludeNonPfReads(false);
- iterator.setMaxReadsToAccumulatePerLocus(LOCUS_ACCUMULATION_CAP);
-
- final WgsMetricsCollector collector = getCollector(COVERAGE_CAP, getIntervalsToExamine());
-
- final boolean usingStopAfter = STOP_AFTER > 0;
- final long stopAfter = STOP_AFTER - 1;
- long counter = 0;
- // Loop through all the loci
- while (iterator.hasNext()) {
- final SamLocusIterator.LocusInfo info = iterator.next();
- final ReferenceSequence ref = refWalker.get(info.getSequenceIndex());
-
- // Check that the reference is not N
- final byte base = ref.getBases()[info.getPosition() - 1];
- if (SequenceUtil.isNoCall(base)) continue;
-
- // add to the collector
- collector.addInfo(info);
-
- // Record progress and perhaps stop
- progress.record(info.getSequenceName(), info.getPosition());
- if (usingStopAfter && ++counter > stopAfter) break;
- }
-
- // check that we added the same number of bases to the raw coverage histogram and the base quality histograms
- final long sumBaseQ= Arrays.stream(collector.unfilteredBaseQHistogramArray).sum();
- final long sumDepthHisto = LongStream.rangeClosed(0, collector.coverageCap).map(i -> (i * collector.unfilteredDepthHistogramArray[(int) i])).sum();
- if (sumBaseQ != sumDepthHisto) {
- log.error("Coverage and baseQ distributions contain different amount of bases!");
- }
+ final AbstractWgsMetricsCollector collector = getCollector(COVERAGE_CAP, getIntervalsToExamine());
+ final WgsMetricsProcessor processor = getWgsMetricsProcessor(progress, refWalker, iterator, collector);
+ processor.processFile();
final MetricsFile out = getMetricsFile();
- collector.addToMetricsFile(out, INCLUDE_BQ_HISTOGRAM, dupeFilter, mapqFilter, pairFilter);
+ processor.addToMetricsFile(out, INCLUDE_BQ_HISTOGRAM, dupeFilter, mapqFilter, pairFilter);
out.write(OUTPUT);
return 0;
}
+ private WgsMetricsProcessorImpl getWgsMetricsProcessor(
+ ProgressLogger progress, ReferenceSequenceFileWalker refWalker,
+ AbstractLocusIterator> iterator, AbstractWgsMetricsCollector collector) {
+ return new WgsMetricsProcessorImpl<>(iterator, refWalker, collector, progress);
+ }
+
/** Gets the intervals over which we will calculate metrics. */
protected IntervalList getIntervalsToExamine() {
final IntervalList intervals;
@@ -547,7 +530,7 @@ protected WgsMetrics generateWgsMetrics(final IntervalList intervals,
);
}
- private WgsMetrics generateWgsMetrics(final IntervalList intervals,
+ WgsMetrics generateWgsMetrics(final IntervalList intervals,
final Histogram highQualityDepthHistogram,
final Histogram unfilteredDepthHistogram,
final long basesExcludedByMapq,
@@ -596,48 +579,49 @@ protected long getBasesExcludedBy(final CountingFilter filter) {
return filter.getFilteredBases();
}
- protected SamLocusIterator getLocusIterator(final SamReader in) {
- return (INTERVALS != null) ? new SamLocusIterator(in, IntervalList.fromFile(INTERVALS)) : new SamLocusIterator(in);
+ /**
+ * Creates {@link htsjdk.samtools.util.AbstractLocusIterator} implementation according to {@link this#USE_FAST_ALGORITHM} value.
+ *
+ * @param in inner {@link htsjdk.samtools.SamReader}
+ * @return if {@link this#USE_FAST_ALGORITHM} is enabled, returns {@link htsjdk.samtools.util.EdgeReadIterator} implementation,
+ * otherwise default algorithm is used and {@link htsjdk.samtools.util.SamLocusIterator} is returned.
+ */
+ protected AbstractLocusIterator getLocusIterator(final SamReader in) {
+ if (USE_FAST_ALGORITHM) {
+ return (INTERVALS != null) ? new EdgeReadIterator(in, IntervalList.fromFile(INTERVALS)) : new EdgeReadIterator(in);
+ }
+ SamLocusIterator iterator = (INTERVALS != null) ? new SamLocusIterator(in, IntervalList.fromFile(INTERVALS)) : new SamLocusIterator(in);
+ iterator.setMaxReadsToAccumulatePerLocus(LOCUS_ACCUMULATION_CAP);
+ iterator.setEmitUncoveredLoci(true);
+ iterator.setQualityScoreCutoff(0);
+ return iterator;
}
/**
+ * Creates {@link picard.analysis.AbstractWgsMetricsCollector} implementation according to {@link this#USE_FAST_ALGORITHM} value.
+ *
* @param coverageCap the maximum depth/coverage to consider.
* @param intervals the intervals over which metrics are collected.
- * @return
+ * @return if {@link this#USE_FAST_ALGORITHM} is enabled, returns {@link picard.analysis.FastWgsMetricsCollector} implementation,
+ * otherwise default algorithm is used and {@link picard.analysis.CollectWgsMetrics.WgsMetricsCollector} is returned.
*/
- protected WgsMetricsCollector getCollector(final int coverageCap, final IntervalList intervals) {
- return new WgsMetricsCollector(coverageCap, intervals);
+ protected AbstractWgsMetricsCollector getCollector(final int coverageCap, final IntervalList intervals) {
+ return USE_FAST_ALGORITHM ? new FastWgsMetricsCollector(this, coverageCap, intervals) :
+ new WgsMetricsCollector(this, coverageCap, intervals);
}
- protected class WgsMetricsCollector {
+ protected static class WgsMetricsCollector extends AbstractWgsMetricsCollector {
- // Count of sites with a given depth of coverage. Includes all but quality 2 bases.
- // We draw depths from this histogram when we calculate the theoretical het sensitivity.
- protected final long[] unfilteredDepthHistogramArray;
-
- // Count of bases observed with a given base quality. Includes all but quality 2 bases.
- // We draw bases from this histogram when we calculate the theoretical het sensitivity.
- protected final long[] unfilteredBaseQHistogramArray;
-
- // Count of sites with a given depth of coverage. Excludes bases with quality below MINIMUM_BASE_QUALITY (default 20).
- protected final long[] highQualityDepthHistogramArray;
-
- private long basesExcludedByBaseq = 0;
- private long basesExcludedByOverlap = 0;
- private long basesExcludedByCapping = 0;
- protected final IntervalList intervals;
- protected final int coverageCap;
-
- public WgsMetricsCollector(final int coverageCap, final IntervalList intervals) {
- unfilteredDepthHistogramArray = new long[coverageCap + 1];
- highQualityDepthHistogramArray = new long[coverageCap + 1];
- unfilteredBaseQHistogramArray = new long[Byte.MAX_VALUE];
- this.coverageCap = coverageCap;
- this.intervals = intervals;
+ public WgsMetricsCollector(final CollectWgsMetrics metrics, final int coverageCap, final IntervalList intervals) {
+ super(metrics, coverageCap, intervals);
}
- public void addInfo(final SamLocusIterator.LocusInfo info) {
+ @Override
+ public void addInfo(final AbstractLocusInfo info, final ReferenceSequence ref, boolean referenceBaseN) {
+ if (referenceBaseN) {
+ return;
+ }
// Figure out the coverage while not counting overlapping reads twice, and excluding various things
final HashSet readNames = new HashSet<>(info.getRecordAndOffsets().size());
int pileupSize = 0;
@@ -653,78 +637,15 @@ public void addInfo(final SamLocusIterator.LocusInfo info) {
unfilteredDepth++;
}
- if (recs.getBaseQuality() < MINIMUM_BASE_QUALITY ||
+ if (recs.getBaseQuality() < collectWgsMetrics.MINIMUM_BASE_QUALITY ||
SequenceUtil.isNoCall(recs.getReadBase())) { ++basesExcludedByBaseq; continue; }
if (!readNames.add(recs.getRecord().getReadName())) { ++basesExcludedByOverlap; continue; }
-
pileupSize++;
}
-
final int highQualityDepth = Math.min(pileupSize, coverageCap);
if (highQualityDepth < pileupSize) basesExcludedByCapping += pileupSize - coverageCap;
highQualityDepthHistogramArray[highQualityDepth]++;
unfilteredDepthHistogramArray[unfilteredDepth]++;
}
-
- public void addToMetricsFile(final MetricsFile file,
- final boolean includeBQHistogram,
- final CountingFilter dupeFilter,
- final CountingFilter mapqFilter,
- final CountingPairedFilter pairFilter) {
- final WgsMetrics metrics = getMetrics(dupeFilter, mapqFilter, pairFilter);
-
- // add them to the file
- file.addMetric(metrics);
- file.addHistogram(getHighQualityDepthHistogram());
- if (includeBQHistogram) addBaseQHistogram(file);
-
-
- }
-
- protected void addBaseQHistogram(final MetricsFile file) {
- file.addHistogram(getUnfilteredBaseQHistogram());
- }
-
- protected Histogram getHighQualityDepthHistogram() {
- return getHistogram(highQualityDepthHistogramArray, "coverage", "high_quality_coverage_count");
- }
-
- protected Histogram getUnfilteredDepthHistogram(){
- return getHistogram(unfilteredDepthHistogramArray, "coverage", "unfiltered_coverage_count");
- }
-
- protected Histogram getUnfilteredBaseQHistogram() {
- return getHistogram(unfilteredBaseQHistogramArray, "baseq", "unfiltered_baseq_count");
- }
-
- protected Histogram getHistogram(final long[] array, final String binLabel, final String valueLabel) {
- final Histogram histogram = new Histogram<>(binLabel, valueLabel);
- for (int i = 0; i < array.length; ++i) {
- histogram.increment(i, array[i]);
- }
- return histogram;
- }
-
- protected WgsMetrics getMetrics(final CountingFilter dupeFilter,
- final CountingFilter mapqFilter,
- final CountingPairedFilter pairFilter) {
- return generateWgsMetrics(
- this.intervals,
- getHighQualityDepthHistogram(),
- getUnfilteredDepthHistogram(),
- getBasesExcludedBy(mapqFilter),
- getBasesExcludedBy(dupeFilter),
- getBasesExcludedBy(pairFilter),
- basesExcludedByBaseq,
- basesExcludedByOverlap,
- basesExcludedByCapping,
- coverageCap,
- getUnfilteredBaseQHistogram(),
- SAMPLE_SIZE
- );
- }
-
-
}
}
-
diff --git a/src/main/java/picard/analysis/CollectWgsMetricsWithNonZeroCoverage.java b/src/main/java/picard/analysis/CollectWgsMetricsWithNonZeroCoverage.java
index 2c872ff43..17e013afd 100644
--- a/src/main/java/picard/analysis/CollectWgsMetricsWithNonZeroCoverage.java
+++ b/src/main/java/picard/analysis/CollectWgsMetricsWithNonZeroCoverage.java
@@ -27,7 +27,11 @@
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.metrics.MetricsFile;
-import htsjdk.samtools.util.*;
+import htsjdk.samtools.util.Histogram;
+import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.IntervalList;
+import htsjdk.samtools.util.Log;
+import htsjdk.samtools.util.StringUtil;
import picard.PicardException;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
@@ -125,7 +129,7 @@ protected int doWork() {
// Initialize the SamReader, so the header is available prior to super.doWork, for getIntervalsToExamine call. */
getSamReader();
- this.collector = new WgsMetricsWithNonZeroCoverageCollector(COVERAGE_CAP, getIntervalsToExamine());
+ this.collector = new WgsMetricsWithNonZeroCoverageCollector(this, COVERAGE_CAP, getIntervalsToExamine());
super.doWork();
final List readGroups = getSamFileHeader().getReadGroups();
@@ -187,8 +191,9 @@ protected WgsMetricsCollector getCollector(final int coverageCap, final Interval
Histogram highQualityDepthHistogram;
Histogram highQualityDepthHistogramNonZero;
- public WgsMetricsWithNonZeroCoverageCollector(final int coverageCap, final IntervalList intervals) {
- super(coverageCap, intervals);
+ public WgsMetricsWithNonZeroCoverageCollector(final CollectWgsMetricsWithNonZeroCoverage metrics,
+ final int coverageCap, final IntervalList intervals) {
+ super(metrics, coverageCap, intervals);
}
@Override
@@ -239,5 +244,4 @@ public boolean areHistogramsEmpty() {
return (highQualityDepthHistogram.isEmpty() || highQualityDepthHistogramNonZero.isEmpty());
}
}
-
-}
\ No newline at end of file
+}
diff --git a/src/main/java/picard/analysis/CounterManager.java b/src/main/java/picard/analysis/CounterManager.java
new file mode 100644
index 000000000..112536b13
--- /dev/null
+++ b/src/main/java/picard/analysis/CounterManager.java
@@ -0,0 +1,194 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.analysis;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+/**
+ * Class for managing a list of Counters of integer,
+ * provides methods to access data from Counters with respect to an offset.
+ * Each Counter represents a certain region of a processed sequence starting from offset and
+ * can accumulate some value for each locus of this sequence, for instance, read coverage.
+ * @author Mariia_Zueva@epam.com, EPAM Systems, Inc.
+ */
+
+public class CounterManager {
+
+ /**
+ * Length of inner Counter arrays
+ */
+ private final int arrayLength;
+ /**
+ * Proposed length of processed reads
+ */
+ private final int readLength;
+ /**
+ * Current offset from the start of the processed sequence. If offset = 100, this means, that value in
+ * Counter arrays at index 0, represents the 100th position of the sequence.
+ */
+ private int offset = 0;
+ /**
+ * List of Counter arrays for managing
+ */
+ private final ArrayList arrays = new ArrayList<>(2);
+
+ /**
+ * Constructor creates new CounterManager without any Counters,
+ * counters are added to CounterManager via newCounter() method
+ *
+ * @param arrayLength length of inner Counter arrays
+ * @param readLength proposed length of processed reads
+ */
+ public CounterManager(final int arrayLength, int readLength) {
+ this.arrayLength = arrayLength;
+ this.readLength = readLength;
+ }
+
+ /**
+ * Method added for use in unit tests
+ *
+ * @return offset from the reference sequence, that is represented by index 0 of {@link picard.analysis.CounterManager.Counter} arrays.
+ */
+ int getOffset() {
+ return offset;
+ }
+
+ /**
+ * Method added for use in unit tests
+ *
+ * @param offset from the reference sequence, that will be represented by index 0 of {@link picard.analysis.CounterManager.Counter} arrays.
+ */
+ void setOffset(int offset) {
+ this.offset = offset;
+ }
+
+ /**
+ * Method checks that new locus position is not out of bounds of Counter arrays and there is enough
+ * space in them to hold information on at least one more read of length {@link this#readLength}.
+ * If there is no free space, but there is accumulated information in Counter arrays after new locus position
+ * that we may need, the arrays are rebased, so that 0 index of arrays represents new locus position.
+ * In other case, we just clear the arrays.
+ *
+ * @param locusPosition position in the reference sequence,
+ * that will be represented by index 0 of {@link picard.analysis.CounterManager.Counter} arrays.
+ */
+ public void checkOutOfBounds(int locusPosition) {
+ if (locusPosition - offset + readLength >= arrayLength) {
+ if (locusPosition - offset < arrayLength) {
+ rebase(locusPosition);
+ } else {
+ clear();
+ offset = locusPosition;
+ }
+ }
+ }
+
+ /**
+ * Rebases inner Counter arrays so that 0 index of array represents the new locus position
+ *
+ * @param locusPosition position in the reference sequence,
+ * that will be represented by index 0 of {@link picard.analysis.CounterManager.Counter} arrays.
+ */
+ private void rebase(int locusPosition) {
+ if (locusPosition < offset) {
+ throw new IllegalArgumentException("Position in the reference sequence is lesser than offset.");
+ }
+ for (Counter counter : arrays) {
+ final int[] array = counter.array;
+ final int skipLength = locusPosition - offset;
+ System.arraycopy(array, skipLength, array, 0, arrayLength - skipLength);
+ Arrays.fill(array, arrayLength - skipLength, arrayLength, 0);
+ }
+ offset = locusPosition;
+ }
+
+ /**
+ * Clears all inner Counter arrays
+ */
+ public void clear() {
+ for (Counter counter : arrays) {
+ final int[] array = counter.array;
+ Arrays.fill(array, 0);
+ }
+ offset = 0;
+ }
+
+ /**
+ * Creates a new Counter object and adds it to the list of managed Counters.
+ *
+ * @return {@link picard.analysis.CounterManager.Counter}, that will be managed by current {@link picard.analysis.CounterManager}
+ */
+ public Counter newCounter() {
+ final Counter counter = new Counter(arrayLength);
+ arrays.add(counter);
+ return counter;
+ }
+
+ /**
+ * Class represents an integer array with methods to increment and get the values from it with respect
+ * to offset of outer {@link picard.analysis.CounterManager}.
+ */
+ public class Counter {
+
+ /**
+ * Inner array to store accumulated values
+ */
+ private final int[] array;
+
+ /**
+ * @param arrayLength length of the inner array
+ */
+ private Counter(int arrayLength) {
+ array = new int[arrayLength];
+ }
+
+ /**
+ * Increments value corresponding to reference sequence index
+ *
+ * @param index in the reference sequence
+ */
+ public void increment(int index) {
+ checkIndex(index);
+ array[index - offset]++;
+ }
+
+ /**
+ * @param index in the reference sequence
+ * @return value from the inner array, corresponding to input reference index
+ */
+ public int get(int index) {
+ checkIndex(index);
+ return array[index - offset];
+ }
+
+ private void checkIndex(int index) {
+ if ((index - offset) < 0 || (index - offset) >= array.length) {
+ throw new ArrayIndexOutOfBoundsException("The requested index " + index + " is out of counter bounds. " +
+ "Possible cause of exception can be wrong READ_LENGTH parameter (much smaller than actual read length)");
+ }
+ }
+ }
+}
diff --git a/src/main/java/picard/analysis/FastWgsMetricsCollector.java b/src/main/java/picard/analysis/FastWgsMetricsCollector.java
new file mode 100644
index 000000000..989474d66
--- /dev/null
+++ b/src/main/java/picard/analysis/FastWgsMetricsCollector.java
@@ -0,0 +1,195 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.analysis;
+
+import htsjdk.samtools.reference.ReferenceSequence;
+import htsjdk.samtools.util.AbstractLocusInfo;
+import htsjdk.samtools.util.AbstractRecordAndOffset;
+import htsjdk.samtools.util.EdgingRecordAndOffset;
+import htsjdk.samtools.util.IntervalList;
+import htsjdk.samtools.util.SequenceUtil;
+
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.HashMap;
+
+/**
+ * Class represents fast algorithm for collecting data from AbstractLocusInfo
+ * with a list of aligned {@link htsjdk.samtools.util.EdgingRecordAndOffset} objects. According to the algorithm
+ * we receive only two {@link htsjdk.samtools.util.EdgingRecordAndOffset} objects for each alignment block of a read:
+ * one for the start of block and one for the end. When meeting a {@link htsjdk.samtools.util.EdgingRecordAndOffset}
+ * with type {@link htsjdk.samtools.util.EdgingRecordAndOffset.Type#BEGIN}, all information from the alignment block is accumulated in the collector,
+ * read name is added to a map holding the names of processed reads for detecting overlapping positions.
+ * When meeting a {@link htsjdk.samtools.util.EdgingRecordAndOffset} with type {@link htsjdk.samtools.util.EdgingRecordAndOffset.Type#END},
+ * the read name is removed from the map with names of processed reads.
+ * @author Mariia_Zueva@epam.com, EPAM Systems, Inc.
+ */
+
+public class FastWgsMetricsCollector extends AbstractWgsMetricsCollector {
+
+ /**
+ * Index of the sequence of the previous processed AbstractLocusInfo
+ */
+ private int previousSequenceIndex;
+
+ /**
+ * Manager for {@link this#pileupSize} Counter.
+ */
+ private final CounterManager counterManager;
+ /**
+ * Counter, accumulating the data on coverage for the calculation of base quality histogram.
+ */
+ private final CounterManager.Counter pileupSize;
+ /**
+ * Counter, accumulating the data on unfiltered coverage (includes all but quality 2 bases)
+ * for the calculation of theoretical het sensitivity.
+ */
+ private final CounterManager.Counter unfilteredDepthSize;
+
+ /**
+ * Map, holding information on currently processed reads, that possibly have overlapping regions.
+ */
+ private Map> readsNames;
+
+ /**
+ * Determines the size of created {@link picard.analysis.CounterManager.Counter} objects. The bigger {@link picard.analysis.CounterManager.Counter} objects
+ * are created, the less rebasing in {@link picard.analysis.CounterManager.Counter} will occur.
+ */
+ private final int ARRAY_SIZE_PER_READ_LENGTH = 2000;
+
+ /**
+ * Creates a collector and initializes the inner data structures
+ *
+ * @param collectWgsMetrics CollectWgsMetrics, that creates this collector
+ * @param coverageCap coverage cap
+ */
+ public FastWgsMetricsCollector(CollectWgsMetrics collectWgsMetrics, int coverageCap, final IntervalList intervals) {
+ super(collectWgsMetrics, coverageCap, intervals);
+ this.previousSequenceIndex = -1;
+ this.counterManager = new CounterManager(collectWgsMetrics.READ_LENGTH * ARRAY_SIZE_PER_READ_LENGTH, collectWgsMetrics.READ_LENGTH);
+ this.pileupSize = counterManager.newCounter();
+ this.unfilteredDepthSize = counterManager.newCounter();
+ }
+
+ @Override
+ public void addInfo(final AbstractLocusInfo info, final ReferenceSequence ref, boolean referenceBaseN) {
+ prepareCollector(info);
+ for (final EdgingRecordAndOffset record : info.getRecordAndOffsets()) {
+ final String readName = record.getReadName();
+ Optional> recordsAndOffsetsForName = Optional.ofNullable((Set)readsNames.get(readName));
+ if (record.getType() == EdgingRecordAndOffset.Type.BEGIN) {
+ processRecord(info.getPosition(), ref, record, recordsAndOffsetsForName.orElse(new HashSet<>()));
+ } else {
+ recordsAndOffsetsForName.ifPresent(
+ edgingRecordAndOffsets -> removeRecordFromMap(record,
+ edgingRecordAndOffsets));
+ }
+ }
+ if (!referenceBaseN) {
+ final int readNamesSize = pileupSize.get(info.getPosition());
+ final int highQualityDepth = Math.min(readNamesSize, coverageCap);
+ if (highQualityDepth < readNamesSize) {
+ basesExcludedByCapping += readNamesSize - coverageCap;
+ }
+ highQualityDepthHistogramArray[highQualityDepth]++;
+ unfilteredDepthHistogramArray[unfilteredDepthSize.get(info.getPosition())]++;
+ }
+ }
+
+ private void processRecord(int position, ReferenceSequence ref, EdgingRecordAndOffset record, Set recordsAndOffsetsForName) {
+ long processedLoci = counter;
+ readsNames.put(record.getReadName(), recordsAndOffsetsForName);
+ final byte[] qualities = record.getBaseQualities();
+ final byte[] bases = record.getRecord().getReadBases();
+ for (int i = 0; i < record.getLength(); i++) {
+ final int index = i + position;
+ if (isReferenceBaseN(index, ref)) {
+ continue;
+ }
+ final byte quality = qualities[i + record.getOffset()];
+ if (quality <= 2) {
+ basesExcludedByBaseq++;
+ } else {
+ if (unfilteredDepthSize.get(index) < coverageCap) {
+ unfilteredBaseQHistogramArray[quality]++;
+ unfilteredDepthSize.increment(index);
+ }
+ if (quality < collectWgsMetrics.MINIMUM_BASE_QUALITY || SequenceUtil.isNoCall(bases[i + record.getOffset()])){
+ basesExcludedByBaseq++;
+ } else {
+ final int bsq = excludeByQuality(recordsAndOffsetsForName, index);
+ if (recordsAndOffsetsForName.size() - bsq > 0) {
+ basesExcludedByOverlap++;
+ } else {
+ pileupSize.increment(index);
+ }
+ }
+ }
+ if (isTimeToStop(++processedLoci)) {
+ break;
+ }
+ }
+ recordsAndOffsetsForName.add(record);
+ }
+
+ private void removeRecordFromMap(EdgingRecordAndOffset record, Set recordsAndOffsetsForName) {
+ if (recordsAndOffsetsForName.size() == 1) {
+ readsNames.remove(record.getReadName());
+ } else {
+ recordsAndOffsetsForName.remove(record.getStart());
+ }
+ }
+
+ /**
+ * Prepares the accumulator objects to process a new {@link htsjdk.samtools.util.AbstractLocusInfo}.
+ * If we switch to a new sequence, all accumulators are cleared.
+ *
+ * @param info the next {@link htsjdk.samtools.util.AbstractLocusInfo} to process
+ */
+ private void prepareCollector(AbstractLocusInfo info) {
+ if (readsNames == null) {
+ readsNames = new HashMap<>();
+ }
+ if (previousSequenceIndex != info.getSequenceIndex()) {
+ readsNames.clear();
+ counterManager.clear();
+ previousSequenceIndex = info.getSequenceIndex();
+ }
+ counterManager.checkOutOfBounds(info.getPosition());
+ }
+
+ private int excludeByQuality(final Set setForName, int position) {
+ int bsq = 0;
+ for (EdgingRecordAndOffset recordAndOffset : setForName) {
+ if (position - recordAndOffset.getRefPos() >= recordAndOffset.getLength()
+ || recordAndOffset.getBaseQuality(position) < collectWgsMetrics.MINIMUM_BASE_QUALITY) {
+ bsq++;
+ }
+ }
+ return bsq;
+ }
+}
diff --git a/src/main/java/picard/analysis/WgsMetricsProcessor.java b/src/main/java/picard/analysis/WgsMetricsProcessor.java
new file mode 100644
index 000000000..c4415c56d
--- /dev/null
+++ b/src/main/java/picard/analysis/WgsMetricsProcessor.java
@@ -0,0 +1,57 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.analysis;
+
+
+import htsjdk.samtools.metrics.MetricsFile;
+import picard.filter.CountingFilter;
+import picard.filter.CountingPairedFilter;
+
+/**
+ * Interface for processing data and generate result for CollectWgsMetrics
+ * @author Mariia_Zueva@epam.com, EPAM Systems, Inc.
+ */
+public interface WgsMetricsProcessor {
+
+ /**
+ * Method processes the input data and accumulates result data
+ */
+ void processFile();
+
+ /**
+ * Adds result metric's data to input file
+ *
+ * @param file MetricsFile for result of collector's work
+ * @param includeBQHistogram include base quality histogram
+ * @param dupeFilter counting filter for duplicate reads
+ * @param mapqFilter counting filter for mapping quality
+ * @param pairFilter counting filter for reads without a mapped mate pair
+ */
+ void addToMetricsFile(final MetricsFile file,
+ final boolean includeBQHistogram,
+ final CountingFilter dupeFilter,
+ final CountingFilter mapqFilter,
+ final CountingPairedFilter pairFilter);
+}
diff --git a/src/main/java/picard/analysis/WgsMetricsProcessorImpl.java b/src/main/java/picard/analysis/WgsMetricsProcessorImpl.java
new file mode 100644
index 000000000..d987f1685
--- /dev/null
+++ b/src/main/java/picard/analysis/WgsMetricsProcessorImpl.java
@@ -0,0 +1,129 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.analysis;
+
+import htsjdk.samtools.metrics.MetricsFile;
+import htsjdk.samtools.reference.ReferenceSequence;
+import htsjdk.samtools.reference.ReferenceSequenceFileWalker;
+import htsjdk.samtools.util.AbstractLocusInfo;
+import htsjdk.samtools.util.AbstractLocusIterator;
+import htsjdk.samtools.util.AbstractRecordAndOffset;
+import htsjdk.samtools.util.ProgressLogger;
+import htsjdk.samtools.util.Log;
+import picard.filter.CountingFilter;
+import picard.filter.CountingPairedFilter;
+
+import java.util.Arrays;
+import java.util.stream.LongStream;
+
+/**
+ * Implementation of {@link picard.analysis.WgsMetricsProcessor} that gets input data from a given iterator
+ * and processes it with a help of collector
+ * @author Mariia_Zueva@epam.com, EPAM Systems, Inc.
+ */
+public class WgsMetricsProcessorImpl implements WgsMetricsProcessor {
+ /**
+ * Source of input data
+ */
+ private final AbstractLocusIterator> iterator;
+ /**
+ * Accumulates the data from iterator
+ */
+ private final AbstractWgsMetricsCollector collector;
+ /**
+ * ReferenceWalker for a processed reference sequence
+ */
+ private final ReferenceSequenceFileWalker refWalker;
+ /**
+ * Logger for the progress of work
+ */
+ private final ProgressLogger progress;
+
+ private final Log log = Log.getInstance(WgsMetricsProcessorImpl.class);
+
+ /**
+ * @param iterator input {@link htsjdk.samtools.util.AbstractLocusIterator}
+ * @param refWalker over processed reference file
+ * @param collector input {@link picard.analysis.AbstractWgsMetricsCollector}
+ * @param progress logger
+ */
+ public WgsMetricsProcessorImpl(AbstractLocusIterator> iterator,
+ ReferenceSequenceFileWalker refWalker,
+ AbstractWgsMetricsCollector collector,
+ ProgressLogger progress) {
+ this.iterator = iterator;
+ this.collector = collector;
+ this.refWalker = refWalker;
+ this.progress = progress;
+ }
+
+ /**
+ * Method gets the data from iterator for each locus and processes it with the help of collector.
+ */
+ @Override
+ public void processFile() {
+ long counter = 0;
+
+ while (iterator.hasNext()) {
+ final AbstractLocusInfo info = iterator.next();
+ final ReferenceSequence ref = refWalker.get(info.getSequenceIndex());
+ boolean referenceBaseN = collector.isReferenceBaseN(info.getPosition(), ref);
+ collector.addInfo(info, ref, referenceBaseN);
+ if (referenceBaseN) {
+ continue;
+ }
+
+ progress.record(info.getSequenceName(), info.getPosition());
+ if (collector.isTimeToStop(++counter)) {
+ break;
+ }
+ collector.setCounter(counter);
+ }
+ // check that we added the same number of bases to the raw coverage histogram and the base quality histograms
+ final long sumBaseQ = Arrays.stream(collector.unfilteredBaseQHistogramArray).sum();
+ final long sumDepthHisto = LongStream.rangeClosed(0, collector.coverageCap).map(i -> (i * collector.unfilteredDepthHistogramArray[(int) i])).sum();
+ if (sumBaseQ != sumDepthHisto) {
+ log.error("Coverage and baseQ distributions contain different amount of bases!");
+ }
+ }
+
+ /**
+ * Adds result metric's data to input file
+ *
+ * @param file MetricsFile for result of collector's work
+ * @param includeBQHistogram include base quality histogram
+ * @param dupeFilter counting filter for duplicate reads
+ * @param mapqFilter counting filter for mapping quality
+ * @param pairFilter counting filter for reads without a mapped mate pair
+ */
+ @Override
+ public void addToMetricsFile(MetricsFile file,
+ boolean includeBQHistogram,
+ CountingFilter dupeFilter,
+ CountingFilter mapqFilter,
+ CountingPairedFilter pairFilter) {
+ collector.addToMetricsFile(file, includeBQHistogram, dupeFilter, mapqFilter, pairFilter);
+ }
+}
diff --git a/src/test/java/picard/analysis/AbstractWgsMetricsCollectorTest.java b/src/test/java/picard/analysis/AbstractWgsMetricsCollectorTest.java
new file mode 100644
index 000000000..e2bcc4f8f
--- /dev/null
+++ b/src/test/java/picard/analysis/AbstractWgsMetricsCollectorTest.java
@@ -0,0 +1,107 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.analysis;
+
+import htsjdk.samtools.reference.ReferenceSequence;
+import htsjdk.samtools.util.AbstractLocusInfo;
+import org.testng.annotations.Test;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.assertFalse;
+import static picard.analysis.CollectWgsMetricsTestUtils.createIntervalList;
+
+public class AbstractWgsMetricsCollectorTest {
+
+ @Test
+ public void testForCollectorWithoutData(){
+ long[] templateQualHistogram = new long[127];
+ long[] templateHistogramArray = new long[11];
+ CollectWgsMetrics collectWgsMetrics = new CollectWgsMetrics();
+ AbstractWgsMetricsCollector collector = new AbstractWgsMetricsCollector(collectWgsMetrics,
+ 10, createIntervalList()) {
+ @Override
+ public void addInfo(AbstractLocusInfo info, ReferenceSequence ref, boolean referenceBaseN) {
+ }
+ };
+ assertEquals(templateHistogramArray, collector.highQualityDepthHistogramArray);
+ assertEquals(templateQualHistogram, collector.unfilteredBaseQHistogramArray);
+ assertEquals(0, collector.basesExcludedByCapping);
+ assertEquals(0, collector.basesExcludedByOverlap);
+ assertEquals(0, collector.basesExcludedByBaseq);
+ }
+
+ @Test (expectedExceptions = IllegalArgumentException.class)
+ public void testForExceptionWithNegativeCoverage(){
+ CollectWgsMetrics collectWgsMetrics = new CollectWgsMetrics();
+ new AbstractWgsMetricsCollector(collectWgsMetrics, -10, createIntervalList()) {
+ @Override
+ public void addInfo(AbstractLocusInfo info, ReferenceSequence ref, boolean referenceBaseN) {
+ }
+ };
+ }
+
+ @Test
+ public void testForSetCounter(){
+ CollectWgsMetrics collectWgsMetrics = new CollectWgsMetrics();
+ AbstractWgsMetricsCollector collector = new AbstractWgsMetricsCollector(collectWgsMetrics,
+ 10, createIntervalList()) {
+ @Override
+ public void addInfo(AbstractLocusInfo info, ReferenceSequence ref, boolean referenceBaseN) {
+ }
+ };
+ long counter = 20;
+ collector.setCounter(counter);
+ assertEquals(20, collector.counter);
+ }
+
+ @Test
+ public void testForStop(){
+ CollectWgsMetrics collectWgsMetrics = new CollectWgsMetrics();
+ collectWgsMetrics.STOP_AFTER = 10;
+ AbstractWgsMetricsCollector collector = new AbstractWgsMetricsCollector(collectWgsMetrics,
+ 10, createIntervalList()) {
+ @Override
+ public void addInfo(AbstractLocusInfo info, ReferenceSequence ref, boolean referenceBaseN) {
+ }
+ };
+ assertTrue(collector.isTimeToStop(10));
+ assertFalse(collector.isTimeToStop(2));
+ }
+
+ @Test
+ public void testForRefBaseN(){
+ byte[] refBasis = {'A', 'C', 'C', 'T', 'A', 'N', 'G', 'T', 'N', 'N'};
+ ReferenceSequence ref = new ReferenceSequence("test", 0, refBasis);
+ CollectWgsMetrics collectWgsMetrics = new CollectWgsMetrics();
+ AbstractWgsMetricsCollector collector = new AbstractWgsMetricsCollector(collectWgsMetrics,
+ 10, createIntervalList()) {
+ @Override
+ public void addInfo(AbstractLocusInfo info, ReferenceSequence ref, boolean referenceBaseN) {
+ }
+ };
+ assertTrue(collector.isReferenceBaseN(6, ref));
+ assertFalse(collector.isReferenceBaseN(1, ref));
+ }
+}
diff --git a/src/test/java/picard/analysis/CollectWgsMetricsTest.java b/src/test/java/picard/analysis/CollectWgsMetricsTest.java
index b282d73db..1a24eeae6 100644
--- a/src/test/java/picard/analysis/CollectWgsMetricsTest.java
+++ b/src/test/java/picard/analysis/CollectWgsMetricsTest.java
@@ -75,12 +75,14 @@ public String getCommandLineProgramName() {
final String referenceFile = "testdata/picard/quality/chrM.reference.fasta";
return new Object[][] {
- {tempSamFile, outfile, referenceFile}
+ {tempSamFile, outfile, referenceFile, "false"},
+ {tempSamFile, outfile, referenceFile, "true"},
};
}
@Test(dataProvider = "wgsDataProvider")
- public void testMetricsFromWGS(final File input, final File outfile, final String referenceFile) throws IOException {
+ public void testMetricsFromWGS(final File input, final File outfile, final String referenceFile,
+ final String useFastAlgorithm) throws IOException {
outfile.deleteOnExit();
final int sampleSize = 1000;
@@ -88,7 +90,8 @@ public void testMetricsFromWGS(final File input, final File outfile, final Strin
"INPUT=" + input.getAbsolutePath(),
"OUTPUT=" + outfile.getAbsolutePath(),
"REFERENCE_SEQUENCE=" + referenceFile,
- "SAMPLE_SIZE=" + sampleSize
+ "SAMPLE_SIZE=" + sampleSize,
+ "USE_FAST_ALGORITHM=" + useFastAlgorithm
};
Assert.assertEquals(runPicardCommandLine(args), 0);
@@ -193,12 +196,19 @@ void setupBuilder() throws IOException {
sorter.instanceMain(args);
//create output files for tests
- outfile = File.createTempFile("testWgsMetrics", ".txt");
+ outfile = File.createTempFile("testWgsMetrics", ".txt", TEST_DIR);
outfile.deleteOnExit();
}
- @Test
- public void testLargeIntervals() throws IOException {
+ @DataProvider(name = "wgsAlgorithm")
+ public Object[][] wgsAlgorithm() {
+ return new Object[][] {
+ {"false"},
+ {"true"},
+ };
+ }
+ @Test(dataProvider = "wgsAlgorithm")
+ public void testLargeIntervals(final String useFastAlgorithm) throws IOException {
final File input = new File(TEST_DIR, "forMetrics.sam");
final File outfile = File.createTempFile("test", ".wgs_metrics");
final File ref = new File(TEST_DIR, "merger.fasta");
@@ -210,7 +220,8 @@ public void testLargeIntervals() throws IOException {
"OUTPUT=" + outfile.getAbsolutePath(),
"REFERENCE_SEQUENCE=" + ref.getAbsolutePath(),
"INTERVALS=" + intervals.getAbsolutePath(),
- "SAMPLE_SIZE=" + sampleSize
+ "SAMPLE_SIZE=" + sampleSize,
+ "USE_FAST_ALGORITHM=" + useFastAlgorithm
};
Assert.assertEquals(runPicardCommandLine(args), 0);
@@ -225,8 +236,8 @@ public void testLargeIntervals() throws IOException {
}
}
- @Test
- public void testExclusions() throws IOException {
+ @Test(dataProvider = "wgsAlgorithm")
+ public void testExclusions(final String useFastAlgorithm) throws IOException {
final File reference = new File("testdata/picard/sam/merger.fasta");
final File tempSamFile = File.createTempFile("CollectWgsMetrics", ".bam", TEST_DIR);
tempSamFile.deleteOnExit();
@@ -273,7 +284,8 @@ public void testExclusions() throws IOException {
"OUTPUT=" + outfile.getAbsolutePath(),
"REFERENCE_SEQUENCE=" + reference.getAbsolutePath(),
"INCLUDE_BQ_HISTOGRAM=true",
- "COVERAGE_CAP=3"
+ "COVERAGE_CAP=3",
+ "USE_FAST_ALGORITHM=" + useFastAlgorithm
};
Assert.assertEquals(runPicardCommandLine(args), 0);
@@ -289,8 +301,8 @@ public void testExclusions() throws IOException {
Assert.assertEquals((long) highQualityDepthHistogram.get(3).getValue(), 2*10);
}
- @Test
- public void testPoorQualityBases() throws IOException {
+ @Test(dataProvider = "wgsAlgorithm")
+ public void testPoorQualityBases(final String useFastAlgorithm) throws IOException {
final File reference = new File("testdata/picard/quality/chrM.reference.fasta");
final File testSamFile = File.createTempFile("CollectWgsMetrics", ".bam", TEST_DIR);
testSamFile.deleteOnExit();
@@ -336,7 +348,8 @@ public void testPoorQualityBases() throws IOException {
"OUTPUT=" + outfile.getAbsolutePath(),
"REFERENCE_SEQUENCE=" + reference.getAbsolutePath(),
"INCLUDE_BQ_HISTOGRAM=true",
- "COVERAGE_CAP=3"
+ "COVERAGE_CAP=3",
+ "USE_FAST_ALGORITHM=" + useFastAlgorithm
};
Assert.assertEquals(runPicardCommandLine(args), 0);
@@ -349,4 +362,47 @@ public void testPoorQualityBases() throws IOException {
Assert.assertEquals(metrics.PCT_EXC_BASEQ, 0.5);
Assert.assertEquals(metrics.PCT_EXC_CAPPED, 0.0);
}
+
+ @Test(dataProvider = "wgsAlgorithm")
+ public void testGiantDeletion(final String useFastAlgorithm) throws IOException {
+ final File reference = new File("testdata/picard/quality/chrM.reference.fasta");
+ final File testSamFile = File.createTempFile("CollectWgsMetrics", ".bam", TEST_DIR);
+ testSamFile.deleteOnExit();
+
+ final SAMRecordSetBuilder setBuilder = CollectWgsMetricsTestUtils.createTestSAMBuilder(reference, READ_GROUP_ID, SAMPLE, PLATFORM, LIBRARY);
+ setBuilder.setReadLength(10);
+ for (int i = 0; i < 3; i++){
+ setBuilder.addPair("Read:" + i, 0, 1, 30, false, false, "5M10000D5M", "1000D10M", false, true, 40);
+ }
+
+ final SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(setBuilder.getHeader(), false, testSamFile);
+
+ for (final SAMRecord record : setBuilder) {
+ writer.addAlignment(record);
+ }
+
+ writer.close();
+
+ final File outfile = File.createTempFile("testGiantDeletion", ".txt");
+ outfile.deleteOnExit();
+
+ final String[] args = new String[] {
+ "INPUT=" + testSamFile.getAbsolutePath(),
+ "OUTPUT=" + outfile.getAbsolutePath(),
+ "REFERENCE_SEQUENCE=" + reference.getAbsolutePath(),
+ "INCLUDE_BQ_HISTOGRAM=true",
+ "COVERAGE_CAP=3",
+ "USE_FAST_ALGORITHM=" + useFastAlgorithm
+ };
+
+ Assert.assertEquals(runPicardCommandLine(args), 0);
+
+ final MetricsFile output = new MetricsFile<>();
+ output.read(new FileReader(outfile));
+
+ final CollectWgsMetrics.WgsMetrics metrics = output.getMetrics().get(0);
+
+ Assert.assertEquals(metrics.PCT_EXC_BASEQ, 0.0);
+ Assert.assertEquals(metrics.PCT_EXC_CAPPED, 0.0);
+ }
}
diff --git a/src/test/java/picard/analysis/CollectWgsMetricsTestUtils.java b/src/test/java/picard/analysis/CollectWgsMetricsTestUtils.java
index 468a3853f..e629cac6e 100644
--- a/src/test/java/picard/analysis/CollectWgsMetricsTestUtils.java
+++ b/src/test/java/picard/analysis/CollectWgsMetricsTestUtils.java
@@ -26,10 +26,24 @@
import htsjdk.samtools.*;
+import htsjdk.samtools.filter.SamRecordFilter;
+import htsjdk.samtools.filter.SecondaryAlignmentFilter;
+import htsjdk.samtools.reference.ReferenceSequence;
+import htsjdk.samtools.reference.ReferenceSequenceFile;
+import htsjdk.samtools.reference.ReferenceSequenceFileWalker;
+import htsjdk.samtools.util.AbstractLocusIterator;
+import htsjdk.samtools.util.EdgeReadIterator;
+import htsjdk.samtools.util.IntervalList;
import htsjdk.variant.utils.SAMSequenceDictionaryExtractor;
+import picard.filter.CountingDuplicateFilter;
+import picard.filter.CountingFilter;
+import picard.filter.CountingMapQFilter;
+import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
import java.util.stream.IntStream;
@@ -38,6 +52,14 @@
*/
public class CollectWgsMetricsTestUtils {
+
+ private static final String sqHeaderLN20 = "@HD\tSO:coordinate\tVN:1.0\n@SQ\tSN:chrM\tAS:HG18\tLN:20\n";
+ private static final String s1 = "3851612\t16\tchrM\t1\t255\t3M2D10M\t*\t0\t0\tACCTACGTTCAAT\tDDDDDDDDDDDDD\n";
+ private static final String sqHeaderLN100 = "@HD\tSO:coordinate\tVN:1.0\n@SQ\tSN:chrM\tAS:HG18\tLN:100\n";
+ private static final String s2 = "3851612\t16\tchrM\t2\t255\t10M1D5M\t*\t0\t0\tCCTACGTTCAATATT\tDDDDDDDDDDDDDDD\n";
+ static final String exampleSamOneRead = sqHeaderLN20+s1;
+ static final String exampleSamTwoReads = sqHeaderLN100+s1+s2;
+
protected static SAMRecordSetBuilder createTestSAMBuilder(final File reference,
final String readGroupId,
final String sample,
@@ -96,6 +118,97 @@ private static void createTestSAM(String testSamName) throws IOException {
writer.close();
}
+ static IntervalList createIntervalList() {
+ return new IntervalList(new SAMFileHeader());
+ }
+
+ static AbstractLocusIterator createReadEndsIterator(String exampleSam){
+ final List filters = new ArrayList<>();
+ final CountingFilter dupeFilter = new CountingDuplicateFilter();
+ final CountingFilter mapqFilter = new CountingMapQFilter(0);
+ filters.add(new SecondaryAlignmentFilter()); // Not a counting filter because we never want to count reads twice
+ filters.add(mapqFilter);
+ filters.add(dupeFilter);
+ SamReader samReader = createSamReader(exampleSam);
+ AbstractLocusIterator iterator = new EdgeReadIterator(samReader);
+ iterator.setSamFilters(filters);
+ iterator.setMappingQualityScoreCutoff(0); // Handled separately because we want to count bases
+ iterator.setIncludeNonPfReads(false);
+ return iterator;
+ }
+
+ static SamReader createSamReader(String samExample) {
+ ByteArrayInputStream inputStream = new ByteArrayInputStream(samExample.getBytes());
+ return SamReaderFactory.makeDefault().open(SamInputResource.of(inputStream));
+ }
+
+ static ReferenceSequenceFileWalker getReferenceSequenceFileWalker(String referenceString){
+ ReferenceSequenceFile referenceSequenceFile = createReferenceSequenceFile(referenceString);
+ return new ReferenceSequenceFileWalker(referenceSequenceFile);
+ }
+
+ static ReferenceSequenceFileWalker getReferenceSequenceFileWalker(){
+ String referenceString = ">ref\nACCTACGTTCAATATTCTTC";
+ return getReferenceSequenceFileWalker(referenceString);
+ }
+
+ static ReferenceSequenceFile createReferenceSequenceFile(String referenceString) {
+ final SAMSequenceRecord record = new SAMSequenceRecord("ref", referenceString.length());
+ final SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
+ dictionary.addSequence(record);
+ return new ReferenceSequenceFile() {
+
+ boolean done = false;
+ @Override
+ public SAMSequenceDictionary getSequenceDictionary() {
+ return dictionary;
+ }
+
+ @Override
+ public ReferenceSequence nextSequence() {
+ if (!done) {
+ done = true;
+ return getSequence(record.getSequenceName());
+ }
+ return null;
+ }
+
+ @Override
+ public void reset() {
+ done=false;
+ }
+
+ @Override
+ public boolean isIndexed() {
+ return false;
+ }
+
+ @Override
+ public ReferenceSequence getSequence(String contig) {
+ if (contig.equals(record.getSequenceName())) {
+ return new ReferenceSequence(record.getSequenceName(), 0, referenceString.getBytes());
+ } else {
+ return null;
+ }
+ }
+
+ @Override
+ public ReferenceSequence getSubsequenceAt(String contig, long start, long stop) {
+ return null;
+ }
+
+ @Override
+ public String toString() {
+ return null;
+ }
+
+ @Override
+ public void close() throws IOException {
+
+ }
+ };
+ }
+
// call main (from IDE for example) to createTestSAM(), which creates a test SAM file
public static void main(String[] args) {
try { createTestSAM("TestSam"); } catch(IOException e) { ; }
diff --git a/src/test/java/picard/analysis/CounterManagerTest.java b/src/test/java/picard/analysis/CounterManagerTest.java
new file mode 100644
index 000000000..2077860ff
--- /dev/null
+++ b/src/test/java/picard/analysis/CounterManagerTest.java
@@ -0,0 +1,95 @@
+package picard.analysis;
+
+import org.testng.Assert;
+import org.testng.annotations.BeforeTest;
+import org.testng.annotations.Test;
+
+public class CounterManagerTest {
+
+ private CounterManager testCounterManager;
+ private CounterManager.Counter testCounter;
+ private int arrayLength = 10;
+ private int readLength = 10;
+ private int OFFSET = 7;
+ private CounterManager secondTestCounterManager;
+ private CounterManager.Counter secondCounter;
+
+ @BeforeTest
+ public void setUp() {
+ secondTestCounterManager = new CounterManager(arrayLength, readLength);
+ secondTestCounterManager.setOffset(OFFSET);
+ secondCounter = secondTestCounterManager.newCounter();
+ testCounterManager = new CounterManager(arrayLength, readLength);
+ testCounterManager.setOffset(OFFSET);
+ testCounter = testCounterManager.newCounter();
+
+ for (int i = 0; i < arrayLength; i++) {
+ testCounter.increment(i + OFFSET);
+ secondCounter.increment(i + OFFSET);
+ }
+ testCounter.increment(OFFSET);
+ secondCounter.increment(OFFSET);
+ }
+
+ @Test
+ public void testCounterInc() {
+ Assert.assertEquals(2, testCounter.get(OFFSET), "Test method increment:");
+ }
+
+ @Test
+ public void testForClearCounter() {
+ testCounterManager.clear();
+ Assert.assertEquals(0, testCounter.get(OFFSET), "The value of the array with index 0 must be 0 after clear manager:");
+ }
+
+ @Test
+ public void testForCorrectOffsetAfterRebase() {
+ secondTestCounterManager.checkOutOfBounds(11);
+ Assert.assertEquals(11, secondTestCounterManager.getOffset(), "After rebase offset must be new int");
+ }
+
+ @Test
+ public void testForCorrectCounterAfterRebase() {
+ secondTestCounterManager.checkOutOfBounds(11);
+ Assert.assertEquals(1, secondCounter.get(11), "The value of the array with index 0 must be 1 after rebase:");
+ }
+
+ @Test
+ public void testForOutOfBoundCounter() {
+ secondTestCounterManager.checkOutOfBounds(44);
+ Assert.assertEquals(44, secondTestCounterManager.getOffset(), "New offset after clear must be 44:");
+ }
+
+ @Test
+ public void testForCleanCounterAfter() {
+ testCounterManager.checkOutOfBounds(88);
+ Assert.assertEquals(0, testCounter.get(88), "The value of the array with index 0 must be 1 after clean:");
+ }
+
+ @Test
+ public void testForCheckIncrement(){
+ CounterManager testCounterManager = new CounterManager(arrayLength, readLength);
+ testCounterManager.setOffset(0);
+ CounterManager.Counter counter = testCounterManager.newCounter();
+ for (int i=0; i<10; i++){
+ counter.increment(1);
+ }
+ int[] testArray = new int[arrayLength];
+ for (int i = 0; i< arrayLength; i++){
+ testArray[i] = counter.get(i);
+ }
+ int[]templateArray = new int[arrayLength];
+ templateArray[1] = 10;
+ Assert.assertEquals(templateArray, testArray);
+ }
+
+ @Test(expectedExceptions = IndexOutOfBoundsException.class)
+ public void testForWrongIndexInInc(){
+ testCounter.increment(40);
+ }
+
+ @Test(expectedExceptions = IndexOutOfBoundsException.class)
+ public void testForWrongIndexInGet(){
+ testCounter.get(40);
+ }
+}
diff --git a/src/test/java/picard/analysis/FastWgsMetricsCollectorTest.java b/src/test/java/picard/analysis/FastWgsMetricsCollectorTest.java
new file mode 100644
index 000000000..84675170b
--- /dev/null
+++ b/src/test/java/picard/analysis/FastWgsMetricsCollectorTest.java
@@ -0,0 +1,215 @@
+package picard.analysis;
+
+
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.reference.ReferenceSequence;
+import htsjdk.samtools.util.AbstractLocusInfo;
+import htsjdk.samtools.util.AbstractLocusIterator;
+import htsjdk.samtools.util.EdgingRecordAndOffset;
+import org.testng.annotations.BeforeTest;
+import org.testng.annotations.Test;
+import static org.testng.Assert.assertEquals;
+import static picard.analysis.CollectWgsMetricsTestUtils.createIntervalList;
+import static picard.analysis.CollectWgsMetricsTestUtils.createReadEndsIterator;
+import static picard.analysis.CollectWgsMetricsTestUtils.exampleSamTwoReads;
+
+
+public class FastWgsMetricsCollectorTest {
+ private byte[] highQualities = {30, 30, 30 ,30, 30, 30, 30, 30, 30, 30, 50, 50, 50 ,50, 50, 50, 50, 50, 50, 60, 60, 60, 70 ,70, 70, 80, 80, 90, 90, 90};
+ private byte[] qualities = {2, 2, 3 ,3, 3, 4, 4, 4, 4, 1};
+ private byte[] refBases = {'A', 'C', 'C', 'T', 'A', 'C', 'G', 'T', 'T', 'C', 'A', 'A', 'T', 'A', 'T', 'T', 'C', 'T', 'T', 'C', 'G', 'A', 'G', 'T', 'C', 'D' , 'G', 'T', 'C', 'D','A', 'G', 'T', 'C', 'T', 'T', 'C', 'G', 'A', 'G', 'T', 'C', 'T', 'T', 'C', 'G', 'C', 'T', 'T', 'C', 'G', 'A', 'G', 'T', 'C', 'D' , 'G', 'T', 'C', 'D','A', 'G', 'T', 'C', 'T', 'T', 'C', 'G', 'A', 'G', 'T', 'C', 'T', 'T', 'C', 'G', 'C', 'T', 'T', 'C', 'G', 'A', 'G', 'T', 'C', 'D' , 'G', 'T', 'C', 'D', 'G', 'A', 'G', 'T', 'C', 'D' , 'G', 'T', 'C', 'D'};
+ private SAMRecord record;
+ private SAMSequenceRecord sequence;
+ private SAMRecord secondRecord;
+ private SAMRecord thirdRecord;
+ private ReferenceSequence ref;
+
+ @BeforeTest
+ public void setUp(){
+ String referenceString = ">chrM\nACCTACGTTCAATATTCTTCACCTACGTTCAATATTCTTCACCTACGTTCAATATTCTTCACCTACGTTCAATATTCTTCACCTACGTTCAATATTCTTC";
+ ref = new ReferenceSequence("chrM", 0, referenceString.getBytes());
+ sequence = new SAMSequenceRecord("chrM", 100);
+ record = new SAMRecord(new SAMFileHeader());
+ record.setReadName("test");
+ record.setBaseQualities(qualities);
+ record.setReadBases(refBases);
+ secondRecord = generateRecord("test1");
+ thirdRecord = generateRecord("test2");
+ }
+
+ private SAMRecord generateRecord(String name) {
+ SAMRecord record = new SAMRecord(new SAMFileHeader());
+ record.setReadName(name);
+ record.setBaseQualities(highQualities);
+ record.setReadBases(refBases);
+ return record;
+ }
+
+ @Test
+ public void testAddInfoForQuality() throws Exception {
+ CollectWgsMetrics collectWgsMetrics = new CollectWgsMetrics();
+ FastWgsMetricsCollector collector = new FastWgsMetricsCollector(collectWgsMetrics, 100, createIntervalList());
+ AbstractLocusInfo firstInfo = new AbstractLocusInfo<>(sequence, 1);
+ AbstractLocusInfo secondInfo = new AbstractLocusInfo<>(sequence, 10);
+ EdgingRecordAndOffset record1 = EdgingRecordAndOffset.createBeginRecord(record, 0, 10, 0);
+ firstInfo.add(record1);
+ secondInfo.add(EdgingRecordAndOffset.createEndRecord(record1));
+ EdgingRecordAndOffset record2 = EdgingRecordAndOffset.createBeginRecord(record, 0, 10, 0);
+ firstInfo.add(record2);
+ secondInfo.add(EdgingRecordAndOffset.createEndRecord(record2));
+ collector.addInfo(firstInfo, ref, false);
+ collector.addInfo(secondInfo, ref, false);
+ assertEquals(20, collector.basesExcludedByBaseq);
+ }
+
+ @Test
+ public void testAddInfoForOverlap(){
+ CollectWgsMetrics collectWgsMetrics = new CollectWgsMetrics();
+ FastWgsMetricsCollector collector = new FastWgsMetricsCollector(collectWgsMetrics, 100, createIntervalList());
+ AbstractLocusInfo firstInfo = new AbstractLocusInfo<>(sequence, 1);
+ AbstractLocusInfo secondInfo = new AbstractLocusInfo<>(sequence, 5);
+ AbstractLocusInfo thirdInfo = new AbstractLocusInfo<>(sequence, 10);
+ EdgingRecordAndOffset record1 = EdgingRecordAndOffset.createBeginRecord(secondRecord, 0, 10, 0);
+ firstInfo.add(record1);
+ thirdInfo.add(EdgingRecordAndOffset.createEndRecord(record1));
+ EdgingRecordAndOffset record2 = EdgingRecordAndOffset.createBeginRecord(secondRecord, 5, 5, 0);
+ secondInfo.add(record2);
+ thirdInfo.add(EdgingRecordAndOffset.createEndRecord(record2));
+ collector.addInfo(firstInfo, ref, false);
+ collector.addInfo(secondInfo, ref, false);
+ collector.addInfo(thirdInfo, ref, false);
+ assertEquals(5, collector.basesExcludedByOverlap, "Excluded by overlap:");
+ }
+
+ @Test
+ public void testAddInfoForCapping(){
+ CollectWgsMetrics collectWgsMetrics = new CollectWgsMetrics();
+ FastWgsMetricsCollector collector = new FastWgsMetricsCollector(collectWgsMetrics, 1, createIntervalList());
+ AbstractLocusInfo firstInfo = new AbstractLocusInfo<>(sequence, 1);
+ AbstractLocusInfo secondInfo = new AbstractLocusInfo<>(sequence, 10);
+ EdgingRecordAndOffset record1 = EdgingRecordAndOffset.createBeginRecord(secondRecord, 0, 10, 0);
+ firstInfo.add(record1);
+ secondInfo.add(EdgingRecordAndOffset.createEndRecord(record1));
+ EdgingRecordAndOffset record2 = EdgingRecordAndOffset.createBeginRecord(secondRecord, 0, 10, 0);
+ firstInfo.add(record2);
+ secondInfo.add(EdgingRecordAndOffset.createEndRecord(record2));
+ collector.addInfo(firstInfo, ref, false);
+ collector.addInfo(secondInfo, ref, false);
+ assertEquals(1, collector.basesExcludedByCapping, "Excluded by capping:");
+ }
+
+ @Test
+ public void testForExcludedForQualityHistogramArray(){
+ CollectWgsMetrics collectWgsMetrics = new CollectWgsMetrics();
+ collectWgsMetrics.INCLUDE_BQ_HISTOGRAM = true;
+ FastWgsMetricsCollector collector = new FastWgsMetricsCollector(collectWgsMetrics, 100, createIntervalList());
+ AbstractLocusInfo firstInfo = new AbstractLocusInfo<>(sequence, 1);
+ AbstractLocusInfo secondInfo = new AbstractLocusInfo<>(sequence, 30);
+ EdgingRecordAndOffset record = EdgingRecordAndOffset.createBeginRecord(secondRecord, 0, 30, 0);
+ firstInfo.add(record);
+ firstInfo.add(EdgingRecordAndOffset.createEndRecord(record));
+ collector.addInfo(firstInfo, ref, false);
+ collector.addInfo(secondInfo, ref, false);
+ long[] expectedResult = new long[127];
+ expectedResult[30] = 10; expectedResult[50] = 9; expectedResult[60] = 3;
+ expectedResult[70] = 3; expectedResult[80] = 2; expectedResult[90] = 3;
+ assertEquals(expectedResult, collector.unfilteredBaseQHistogramArray);
+ }
+
+ @Test
+ public void testForBaseQualityHetSens(){
+ CollectWgsMetrics collectWgsMetrics = new CollectWgsMetrics();
+ collectWgsMetrics.INCLUDE_BQ_HISTOGRAM = true;
+ FastWgsMetricsCollector collector = new FastWgsMetricsCollector(collectWgsMetrics, 10, createIntervalList());
+ AbstractLocusInfo firstInfo = new AbstractLocusInfo<>(sequence, 1);
+ AbstractLocusInfo secondInfo = new AbstractLocusInfo<>(sequence, 1);
+ AbstractLocusInfo thirdInfo = new AbstractLocusInfo<>(sequence, 1);
+ EdgingRecordAndOffset record1 = EdgingRecordAndOffset.createBeginRecord(secondRecord, 0, 30, 0);
+ firstInfo.add(record1);
+ thirdInfo.add(EdgingRecordAndOffset.createEndRecord(record1));
+ EdgingRecordAndOffset record2 = EdgingRecordAndOffset.createBeginRecord(record, 0, 10, 0);
+ firstInfo.add(record2);
+ secondInfo.add(EdgingRecordAndOffset.createEndRecord(record2));
+ collector.addInfo(firstInfo, ref, false);
+ collector.addInfo(secondInfo, ref, false);
+ collector.addInfo(thirdInfo, ref, false);
+
+ long[] expectedResult = new long[127];
+ expectedResult[30] = 10; expectedResult[50] = 9; expectedResult[60] = 3;
+ expectedResult[70] = 3; expectedResult[80] = 2; expectedResult[90] = 3;
+ expectedResult[1] = 1; expectedResult[2] = 2; expectedResult[3] = 3;
+ expectedResult[4] = 4;
+ }
+
+ @Test
+ public void testForHistogramArray(){
+ CollectWgsMetrics collectWgsMetrics = new CollectWgsMetrics();
+ FastWgsMetricsCollector collector = new FastWgsMetricsCollector(collectWgsMetrics, 10, createIntervalList());
+ long[] templateHistogramArray = {0,1,3,0,0,0,0,0,0,0,0};
+ AbstractLocusInfo firstInfo = new AbstractLocusInfo<>(sequence, 1);
+ AbstractLocusInfo secondInfo = new AbstractLocusInfo<>(sequence, 10);
+ AbstractLocusInfo thirdInfo = new AbstractLocusInfo<>(sequence, 20);
+ AbstractLocusInfo fourthInfo = new AbstractLocusInfo<>(sequence, 30);
+ EdgingRecordAndOffset record1 = EdgingRecordAndOffset.createBeginRecord(record, 0, 10, 0);
+ firstInfo.add(record1);
+ secondInfo.add(EdgingRecordAndOffset.createEndRecord(record1));
+ EdgingRecordAndOffset record2 = EdgingRecordAndOffset.createBeginRecord(secondRecord, 0, 30, 0);
+ firstInfo.add(record2);
+ fourthInfo.add(EdgingRecordAndOffset.createEndRecord(record2));
+ EdgingRecordAndOffset record3 = EdgingRecordAndOffset.createBeginRecord(thirdRecord, 0, 20, 0);
+ firstInfo.add(record3);
+ thirdInfo.add(EdgingRecordAndOffset.createEndRecord(record3));
+ collector.addInfo(firstInfo, ref, false);
+ collector.addInfo(secondInfo, ref, false);
+ collector.addInfo(thirdInfo, ref, false);
+ collector.addInfo(fourthInfo, ref, false);
+ assertEquals(templateHistogramArray, collector.unfilteredDepthHistogramArray);
+ }
+
+ @Test
+ public void testForCollectorWithoutData(){
+ long[] templateQualHistogram = new long[127];
+ long[] templateHistogramArray = new long[11];
+ CollectWgsMetrics collectWgsMetrics = new CollectWgsMetrics();
+ FastWgsMetricsCollector collector = new FastWgsMetricsCollector(collectWgsMetrics, 10, createIntervalList());
+ assertEquals(templateHistogramArray, collector.unfilteredDepthHistogramArray);
+ assertEquals(templateQualHistogram, collector.unfilteredBaseQHistogramArray);
+ assertEquals(0, collector.basesExcludedByCapping);
+ assertEquals(0, collector.basesExcludedByOverlap);
+ assertEquals(0, collector.basesExcludedByBaseq);
+ }
+
+ @Test
+ public void testAddInfoWithoutOverlap(){
+ CollectWgsMetrics collectWgsMetrics = new CollectWgsMetrics();
+ FastWgsMetricsCollector collector = new FastWgsMetricsCollector(collectWgsMetrics, 100, createIntervalList());
+ AbstractLocusInfo firstInfo = new AbstractLocusInfo<>(sequence, 1);
+ AbstractLocusInfo secondInfo = new AbstractLocusInfo<>(sequence, 5);
+ AbstractLocusInfo thirdInfo = new AbstractLocusInfo<>(sequence, 6);
+ AbstractLocusInfo fourthInfo = new AbstractLocusInfo<>(sequence, 10);
+ EdgingRecordAndOffset record1 = EdgingRecordAndOffset.createBeginRecord(secondRecord, 0, 5, 0);
+ firstInfo.add(record1);
+ secondInfo.add(EdgingRecordAndOffset.createEndRecord(record1));
+ EdgingRecordAndOffset record2 = EdgingRecordAndOffset.createBeginRecord(secondRecord, 6, 5, 6);
+ thirdInfo.add(record2);
+ fourthInfo.add(EdgingRecordAndOffset.createEndRecord(record2));
+
+ collector.addInfo(firstInfo, ref, false);
+ collector.addInfo(secondInfo, ref, false);
+ assertEquals(0, collector.basesExcludedByOverlap, "Excluded by overlap:");
+ }
+
+ @Test
+ public void testForComplicatedCigar(){
+ CollectWgsMetrics collectWgsMetrics = new CollectWgsMetrics();
+ FastWgsMetricsCollector collector = new FastWgsMetricsCollector(collectWgsMetrics, 100, createIntervalList());
+ AbstractLocusIterator sli = createReadEndsIterator(exampleSamTwoReads);
+ while(sli.hasNext()) {
+ AbstractLocusInfo info = sli.next();
+ collector.addInfo(info, ref, false);
+ }
+ assertEquals(11, collector.basesExcludedByOverlap, "Excluded by overlap:");
+ }
+}
diff --git a/src/test/java/picard/analysis/WgsMetricsProcessorImplTest.java b/src/test/java/picard/analysis/WgsMetricsProcessorImplTest.java
new file mode 100644
index 000000000..9bd9e20b8
--- /dev/null
+++ b/src/test/java/picard/analysis/WgsMetricsProcessorImplTest.java
@@ -0,0 +1,86 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.analysis;
+
+import htsjdk.samtools.reference.ReferenceSequenceFile;
+import htsjdk.samtools.reference.ReferenceSequenceFileWalker;
+import htsjdk.samtools.util.AbstractLocusIterator;
+import htsjdk.samtools.util.Log;
+import htsjdk.samtools.util.ProgressLogger;
+import org.testng.annotations.BeforeTest;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+import static picard.analysis.CollectWgsMetricsTestUtils.createIntervalList;
+import static picard.analysis.CollectWgsMetricsTestUtils.createReadEndsIterator;
+import static picard.analysis.CollectWgsMetricsTestUtils.exampleSamOneRead;
+import static picard.analysis.CollectWgsMetricsTestUtils.getReferenceSequenceFileWalker;
+import static picard.analysis.CollectWgsMetricsTestUtils.createReferenceSequenceFile;
+
+public class WgsMetricsProcessorImplTest {
+ private ProgressLogger progress;
+
+ @BeforeTest
+ public void setUp(){
+ progress = new ProgressLogger(Log.getInstance(WgsMetricsProcessorImpl.class));
+ }
+
+ @Test
+ public void testForProcessFile(){
+ AbstractLocusIterator iterator = createReadEndsIterator(exampleSamOneRead);
+ CollectWgsMetrics collectWgsMetrics = new CollectWgsMetrics();
+ FastWgsMetricsCollector collector = new FastWgsMetricsCollector(collectWgsMetrics, 100, createIntervalList());
+ WgsMetricsProcessorImpl wgsMetricsProcessor =
+ new WgsMetricsProcessorImpl(iterator, getReferenceSequenceFileWalker(), collector, progress);
+ wgsMetricsProcessor.processFile();
+ assertEquals(20, collector.counter);
+ }
+
+ @Test
+ public void testForFilteredBases(){
+ AbstractLocusIterator iterator = createReadEndsIterator(exampleSamOneRead);
+ CollectWgsMetrics collectWgsMetrics = new CollectWgsMetrics();
+ FastWgsMetricsCollector collector = new FastWgsMetricsCollector(collectWgsMetrics, 100, createIntervalList());
+ String secondReferenceString = ">ref\nNNNNNNNNNNAATATTCTTC";
+ ReferenceSequenceFile referenceSequenceFile = createReferenceSequenceFile(secondReferenceString);
+ ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(referenceSequenceFile);
+ WgsMetricsProcessorImpl wgsMetricsProcessor =
+ new WgsMetricsProcessorImpl(iterator, refWalker, collector, progress);
+ wgsMetricsProcessor.processFile();
+ assertEquals(10, collector.counter);
+ }
+
+ @Test
+ public void testForExitAfter(){
+ AbstractLocusIterator iterator = createReadEndsIterator(exampleSamOneRead);
+ CollectWgsMetrics collectWgsMetrics = new CollectWgsMetrics();
+ collectWgsMetrics.STOP_AFTER = 16;
+ AbstractWgsMetricsCollector collector = new FastWgsMetricsCollector(collectWgsMetrics, 100, null);
+ WgsMetricsProcessorImpl wgsMetricsProcessor =
+ new WgsMetricsProcessorImpl(iterator, getReferenceSequenceFileWalker(), collector, progress);
+ wgsMetricsProcessor.processFile();
+ assertEquals(15, collector.counter);
+ }
+}