From 5d69b7723850dfda49cf433a2864b4560681fb33 Mon Sep 17 00:00:00 2001 From: Nils Homer Date: Fri, 16 Jun 2017 14:44:41 -0700 Subject: [PATCH] DownsampleSam can optionally yield QualityYieldMetrics. --- .../analysis/CollectQualityYieldMetrics.java | 121 ++++++++++++++------- src/main/java/picard/sam/DownsampleSam.java | 15 +++ 2 files changed, 98 insertions(+), 38 deletions(-) diff --git a/src/main/java/picard/analysis/CollectQualityYieldMetrics.java b/src/main/java/picard/analysis/CollectQualityYieldMetrics.java index a704bfb89..e150da179 100644 --- a/src/main/java/picard/analysis/CollectQualityYieldMetrics.java +++ b/src/main/java/picard/analysis/CollectQualityYieldMetrics.java @@ -50,6 +50,8 @@ programGroup = Metrics.class ) public class CollectQualityYieldMetrics extends SinglePassSamProgram { + private QualityYieldMetricsCollector collector = null; + static final String USAGE_SUMMARY = "Collect metrics about reads that pass quality thresholds and Illumina-specific filters. "; static final String USAGE_DETAILS = "This tool evaluates the overall quality of reads within a bam file containing one read group. " + "The output indicates the total numbers of bases within a read group that pass a minimum base quality score threshold and " + @@ -87,64 +89,107 @@ "of bases if there are supplemental alignments in the input file.") public boolean INCLUDE_SUPPLEMENTAL_ALIGNMENTS = false; - // The metrics to be accumulated - private final QualityYieldMetrics metrics = new QualityYieldMetrics(); - /** Ensure that we get all reads regardless of alignment status. */ @Override protected boolean usesNoRefReads() { return true; } @Override protected void setup(final SAMFileHeader header, final File samFile) { IOUtil.assertFileIsWritable(OUTPUT); + this.collector = new QualityYieldMetricsCollector(USE_ORIGINAL_QUALITIES, INCLUDE_SECONDARY_ALIGNMENTS, INCLUDE_SUPPLEMENTAL_ALIGNMENTS); } @Override protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) { - if (!INCLUDE_SECONDARY_ALIGNMENTS && rec.getNotPrimaryAlignmentFlag()) return; - if (!INCLUDE_SUPPLEMENTAL_ALIGNMENTS && rec.getSupplementaryAlignmentFlag()) return; + this.collector.acceptRecord(rec, ref); + } - final int length = rec.getReadLength(); - metrics.TOTAL_READS++; - metrics.TOTAL_BASES += length; + @Override + protected void finish() { + final MetricsFile metricsFile = getMetricsFile(); + this.collector.finish(); + this.collector.addMetricsToFile(metricsFile); + metricsFile.write(OUTPUT); + } - final boolean isPfRead = !rec.getReadFailsVendorQualityCheckFlag(); - if (isPfRead) { - metrics.PF_READS++; - metrics.PF_BASES += length; - } + public static class QualityYieldMetricsCollector { + // If true, include bases from secondary alignments in metrics. Setting to true may cause double-counting + // of bases if there are secondary alignments in the input file. + private final boolean useOriginalQualities; + + // If true, include bases from secondary alignments in metrics. Setting to true may cause double-counting + // of bases if there are secondary alignments in the input file. + private final boolean includeSecondaryAlignments; - final byte[] quals; - if (USE_ORIGINAL_QUALITIES) { - byte[] tmp = rec.getOriginalBaseQualities(); - if (tmp == null) tmp = rec.getBaseQualities(); - quals = tmp; - } else { - quals = rec.getBaseQualities(); + // If true, include bases from supplemental alignments in metrics. Setting to true may cause double-counting + // of bases if there are supplemental alignments in the input file. + public final boolean includeSupplementalAlignments; + + // The metrics to be accumulated + private final QualityYieldMetrics metrics = new QualityYieldMetrics(); + + public QualityYieldMetricsCollector(final boolean useOriginalQualities, + final boolean includeSecondaryAlignments, + final boolean includeSupplementalAlignments) { + this.useOriginalQualities = useOriginalQualities; + this.includeSecondaryAlignments = includeSecondaryAlignments; + this.includeSupplementalAlignments = includeSupplementalAlignments; } - // add up quals, and quals >= 20 - for (final int qual : quals) { - metrics.Q20_EQUIVALENT_YIELD += qual; - if (qual >= 20) metrics.Q20_BASES++; - if (qual >= 30) metrics.Q30_BASES++; + public void acceptRecord(final SAMRecord rec, final ReferenceSequence ref) { + if (!this.includeSecondaryAlignments && rec.getNotPrimaryAlignmentFlag()) return; + if (!this.includeSupplementalAlignments && rec.getSupplementaryAlignmentFlag()) return; + + final int length = rec.getReadLength(); + metrics.TOTAL_READS++; + metrics.TOTAL_BASES += length; + final boolean isPfRead = !rec.getReadFailsVendorQualityCheckFlag(); if (isPfRead) { - metrics.PF_Q20_EQUIVALENT_YIELD += qual; - if (qual >= 20) metrics.PF_Q20_BASES++; - if (qual >= 30) metrics.PF_Q30_BASES++; + metrics.PF_READS++; + metrics.PF_BASES += length; + } + + final byte[] quals; + if (this.useOriginalQualities) { + byte[] tmp = rec.getOriginalBaseQualities(); + if (tmp == null) tmp = rec.getBaseQualities(); + quals = tmp; + } else { + quals = rec.getBaseQualities(); + } + + // add up quals, and quals >= 20 + for (final int qual : quals) { + metrics.Q20_EQUIVALENT_YIELD += qual; + + if (qual >= 30) { + metrics.Q20_BASES++; + metrics.Q30_BASES++; + } else if (qual >= 20) { + metrics.Q20_BASES++; + } + + if (isPfRead) { + metrics.PF_Q20_EQUIVALENT_YIELD += qual; + if (qual >= 30) { + metrics.PF_Q20_BASES++; + metrics.PF_Q30_BASES++; + } else if (qual >= 20) { + metrics.PF_Q20_BASES++; + } + } } } - } - @Override - protected void finish() { - final MetricsFile metricsFile = getMetricsFile(); - metrics.READ_LENGTH = metrics.TOTAL_READS == 0 ? 0 : (int) (metrics.TOTAL_BASES / metrics.TOTAL_READS); - metrics.Q20_EQUIVALENT_YIELD = metrics.Q20_EQUIVALENT_YIELD / 20; - metrics.PF_Q20_EQUIVALENT_YIELD = metrics.PF_Q20_EQUIVALENT_YIELD / 20; + public void finish() { + metrics.READ_LENGTH = metrics.TOTAL_READS == 0 ? 0 : (int) (metrics.TOTAL_BASES / metrics.TOTAL_READS); + metrics.Q20_EQUIVALENT_YIELD = metrics.Q20_EQUIVALENT_YIELD / 20; + metrics.PF_Q20_EQUIVALENT_YIELD = metrics.PF_Q20_EQUIVALENT_YIELD / 20; + } - metricsFile.addMetric(metrics); - metricsFile.write(OUTPUT); + public void addMetricsToFile(final MetricsFile metricsFile) { + metricsFile.addMetric(metrics); + } } /** A set of metrics used to describe the general quality of a BAM file */ @@ -180,7 +225,7 @@ protected void finish() { /** The sum of quality scores of all bases divided by 20 */ public long Q20_EQUIVALENT_YIELD = 0; - /** The sum of quality scores of all bases divided by 20 */ + /** The sum of quality scores of all bases in PF reads divided by 20 */ public long PF_Q20_EQUIVALENT_YIELD = 0; } } diff --git a/src/main/java/picard/sam/DownsampleSam.java b/src/main/java/picard/sam/DownsampleSam.java index 7f0e807d5..06b69d5c5 100644 --- a/src/main/java/picard/sam/DownsampleSam.java +++ b/src/main/java/picard/sam/DownsampleSam.java @@ -25,10 +25,13 @@ import htsjdk.samtools.*; import htsjdk.samtools.DownsamplingIteratorFactory.Strategy; +import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.ProgressLogger; +import picard.analysis.CollectQualityYieldMetrics.QualityYieldMetrics; +import picard.analysis.CollectQualityYieldMetrics.QualityYieldMetricsCollector; import picard.cmdline.CommandLineProgram; import picard.cmdline.CommandLineProgramProperties; import picard.cmdline.Option; @@ -94,6 +97,9 @@ "Higher accuracy will generally require more memory.") public double ACCURACY = 0.0001; + @Option(shortName = "M", doc = "The file to write metrics to (QualityYieldMetrics)") + public File METRICS_FILE; + private final Log log = Log.getInstance(DownsampleSam.class); public static void main(final String[] args) { @@ -115,10 +121,12 @@ protected int doWork() { final SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(in.getFileHeader(), true, OUTPUT); final ProgressLogger progress = new ProgressLogger(log, (int) 1e7, "Wrote"); final DownsamplingIterator iterator = DownsamplingIteratorFactory.make(in, STRATEGY, PROBABILITY, ACCURACY, RANDOM_SEED); + final QualityYieldMetricsCollector metricsCollector = new QualityYieldMetricsCollector(true, false, false); while (iterator.hasNext()) { final SAMRecord rec = iterator.next(); out.addAlignment(rec); + if (METRICS_FILE != null) metricsCollector.acceptRecord(rec, null); progress.record(rec); } @@ -128,6 +136,13 @@ protected int doWork() { log.info("Finished downsampling."); log.info("Kept ", iterator.getAcceptedCount(), " out of ", iterator.getSeenCount(), " reads (", fmt.format(iterator.getAcceptedFraction()), ")."); + if (METRICS_FILE != null) { + final MetricsFile metricsFile = getMetricsFile(); + metricsCollector.finish(); + metricsCollector.addMetricsToFile(metricsFile); + metricsFile.write(METRICS_FILE); + } + return 0; } }