diff --git a/src/main/java/picard/analysis/AlignmentSummaryMetrics.java b/src/main/java/picard/analysis/AlignmentSummaryMetrics.java index 4a7db22f5..8422a83b8 100644 --- a/src/main/java/picard/analysis/AlignmentSummaryMetrics.java +++ b/src/main/java/picard/analysis/AlignmentSummaryMetrics.java @@ -51,7 +51,7 @@ /** The number of PF reads where PF is defined as passing Illumina's filter. */ public long PF_READS; - /** The percentage of reads that are PF (PF_READS / TOTAL_READS) */ + /** The fraction of reads that are PF (PF_READS / TOTAL_READS) */ public double PCT_PF_READS; /** @@ -108,7 +108,7 @@ public double PF_MISMATCH_RATE; /** - * The percentage of bases that mismatch the reference in PF HQ aligned reads. + * The fraction of bases that mismatch the reference in PF HQ aligned reads. */ public double PF_HQ_ERROR_RATE; @@ -131,12 +131,23 @@ public long READS_ALIGNED_IN_PAIRS; /** - * The percentage of reads whose mate pair was also aligned to the reference. + * The fraction of reads whose mate pair was also aligned to the reference. * READS_ALIGNED_IN_PAIRS / PF_READS_ALIGNED */ public double PCT_READS_ALIGNED_IN_PAIRS; /** + * The number of (primary) aligned reads that are **not** "properly" aligned in pairs (as per SAM flag 0x2). + */ + public long PF_READS_IMPROPER_PAIRS; + + /** + * The fraction of (primary) reads that are *not* "properly" aligned in pairs (as per SAM flag 0x2). + * PF_READS_IMPROPER_PAIRS / PF_READS_ALIGNED + */ + public double PCT_PF_READS_IMPROPER_PAIRS; + + /** * The number of instrument cycles in which 80% or more of base calls were no-calls. */ public long BAD_CYCLES; @@ -148,13 +159,13 @@ public double STRAND_BALANCE; /** - * The percentage of reads that map outside of a maximum insert size (usually 100kb) or that have + * The fraction of reads that map outside of a maximum insert size (usually 100kb) or that have * the two ends mapping to different chromosomes. */ public double PCT_CHIMERAS; /** - * The percentage of PF reads that are unaligned and match to a known adapter sequence right from the + * The fraction of PF reads that are unaligned and match to a known adapter sequence right from the * start of the read. */ public double PCT_ADAPTER; diff --git a/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java b/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java index 1dba3fe39..405f63487 100644 --- a/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java +++ b/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java @@ -53,7 +53,7 @@ private final boolean isBisulfiteSequenced; //The minimum mapping quality a base has to meet in order to be considered high quality - private final int MAPPING_QUALITY_THRESOLD = 20; + private final int MAPPING_QUALITY_THRESHOLD = 20; //The minimum quality a base has to meet in order to be consider hq_20 private final static int BASE_QUALITY_THRESHOLD = 20; @@ -67,7 +67,7 @@ public AlignmentSummaryMetricsCollector(final Set accum this.doRefMetrics = doRefMetrics; this.adapterUtility = new AdapterUtility(adapterSequence); this.maxInsertSize = maxInsertSize; - this.expectedOrientations = expectedOrientations; + this.expectedOrientations = expectedOrientations; this.isBisulfiteSequenced = isBisulfiteSequenced; setup(accumulationLevels, samRgRecords); } @@ -110,14 +110,12 @@ public void acceptRecord(final SAMRecordAndReference args) { if (rec.getReadPairedFlag()) { if (rec.getFirstOfPairFlag()) { firstOfPairCollector.addRecord(rec, ref); - } - else { + } else { secondOfPairCollector.addRecord(rec, ref); } pairCollector.addRecord(rec, ref); - } - else { + } else { unpairedCollector.addRecord(rec, ref); } } @@ -155,7 +153,7 @@ public void addMetricsToFile(final MetricsFile readLengthHistogram = new Histogram(); + private final Histogram readLengthHistogram = new Histogram<>(); private AlignmentSummaryMetrics metrics; private long chimeras; private long chimerasDenominator; @@ -164,9 +162,9 @@ public void addMetricsToFile(final MetricsFile mismatchHistogram = new Histogram(); - private final Histogram hqMismatchHistogram = new Histogram(); - private final Histogram badCycleHistogram = new Histogram(); + private final Histogram mismatchHistogram = new Histogram<>(); + private final Histogram hqMismatchHistogram = new Histogram<>(); + private final Histogram badCycleHistogram = new Histogram<>(); public IndividualAlignmentSummaryMetricsCollector(final AlignmentSummaryMetrics.Category pairingCategory, final String sample, @@ -201,14 +199,15 @@ public void onComplete() { metrics.BAD_CYCLES = 0; for (final Histogram.Bin cycleBin : badCycleHistogram.values()) { final double badCyclePercentage = cycleBin.getValue() / metrics.TOTAL_READS; - if (badCyclePercentage >= .8) { + if (badCyclePercentage >= 0.8) { metrics.BAD_CYCLES++; } } if(doRefMetrics) { if (metrics.PF_READS > 0) metrics.PCT_PF_READS_ALIGNED = (double) metrics.PF_READS_ALIGNED / (double) metrics.PF_READS; - if (metrics.PF_READS_ALIGNED > 0) metrics.PCT_READS_ALIGNED_IN_PAIRS = (double) metrics.READS_ALIGNED_IN_PAIRS/ (double) metrics.PF_READS_ALIGNED; + if (metrics.PF_READS_ALIGNED > 0) metrics.PCT_READS_ALIGNED_IN_PAIRS = (double) metrics.READS_ALIGNED_IN_PAIRS / (double) metrics.PF_READS_ALIGNED; + if (metrics.PF_READS_ALIGNED > 0) metrics.PCT_PF_READS_IMPROPER_PAIRS = (double) metrics.PF_READS_IMPROPER_PAIRS / (double) metrics.PF_READS_ALIGNED; if (metrics.PF_READS_ALIGNED > 0) metrics.STRAND_BALANCE = numPositiveStrand / (double) metrics.PF_READS_ALIGNED; if (this.chimerasDenominator > 0) metrics.PCT_CHIMERAS = this.chimeras / (double) this.chimerasDenominator; @@ -239,16 +238,16 @@ private void collectReadData(final SAMRecord record) { if (adapterUtility.isAdapterSequence(readBases)) { this.adapterReads++; } - } - else if(doRefMetrics) { + } else if(doRefMetrics) { metrics.PF_READS_ALIGNED++; + if (!record.getProperPairFlag()) metrics.PF_READS_IMPROPER_PAIRS++; if (!record.getReadNegativeStrandFlag()) numPositiveStrand++; if (record.getReadPairedFlag() && !record.getMateUnmappedFlag()) { metrics.READS_ALIGNED_IN_PAIRS++; // Check that both ends have mapq > minimum final Integer mateMq = record.getIntegerAttribute(SAMTag.MQ.toString()); - if (mateMq == null || mateMq >= MAPPING_QUALITY_THRESOLD && record.getMappingQuality() >= MAPPING_QUALITY_THRESOLD) { + if (mateMq == null || mateMq >= MAPPING_QUALITY_THRESHOLD && record.getMappingQuality() >= MAPPING_QUALITY_THRESHOLD) { ++this.chimerasDenominator; // With both reads mapped we can see if this pair is chimeric @@ -256,10 +255,9 @@ else if(doRefMetrics) { ++this.chimeras; } } - } - else { // fragment reads or read pairs with one end that maps + } else { // fragment reads or read pairs with one end that maps // Consider chimeras that occur *within* the read using the SA tag - if (record.getMappingQuality() >= MAPPING_QUALITY_THRESOLD) { + if (record.getMappingQuality() >= MAPPING_QUALITY_THRESHOLD) { ++this.chimerasDenominator; if (record.getAttribute(SAMTag.SA.toString()) != null) ++this.chimeras; } @@ -348,7 +346,7 @@ private boolean isNoiseRead(final SAMRecord record) { private boolean isHighQualityMapping(final SAMRecord record) { return !record.getReadFailsVendorQualityCheckFlag() && - record.getMappingQuality() >= MAPPING_QUALITY_THRESOLD; + record.getMappingQuality() >= MAPPING_QUALITY_THRESHOLD; } public AlignmentSummaryMetrics getMetrics() { diff --git a/src/test/java/picard/analysis/CollectAlignmentSummaryMetricsTest.java b/src/test/java/picard/analysis/CollectAlignmentSummaryMetricsTest.java index 58e2da9c4..d9b0e3020 100644 --- a/src/test/java/picard/analysis/CollectAlignmentSummaryMetricsTest.java +++ b/src/test/java/picard/analysis/CollectAlignmentSummaryMetricsTest.java @@ -60,48 +60,59 @@ public void test() throws IOException { }; Assert.assertEquals(runPicardCommandLine(args), 0); - final MetricsFile> output = new MetricsFile>(); + final MetricsFile> output = new MetricsFile<>(); output.read(new FileReader(outfile)); - + + Assert.assertEquals(output.getMetrics().size(), 3); for (final AlignmentSummaryMetrics metrics : output.getMetrics()) { Assert.assertEquals(metrics.MEAN_READ_LENGTH, 101.0); switch (metrics.CATEGORY) { - case FIRST_OF_PAIR: - Assert.assertEquals(metrics.TOTAL_READS, 9); - Assert.assertEquals(metrics.PF_READS, 7); - Assert.assertEquals(metrics.PF_NOISE_READS, 1); - Assert.assertEquals(metrics.PF_HQ_ALIGNED_READS, 3); - Assert.assertEquals(metrics.PF_HQ_ALIGNED_Q20_BASES, 59); - Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 19.0); - Assert.assertEquals(metrics.PF_ALIGNED_BASES, 303); - Assert.assertEquals(metrics.PF_MISMATCH_RATE, /*58D/303D*/0.191419); - Assert.assertEquals(metrics.BAD_CYCLES, 19); - break; - case SECOND_OF_PAIR: - Assert.assertEquals(metrics.TOTAL_READS, 9); - Assert.assertEquals(metrics.PF_READS, 9); - Assert.assertEquals(metrics.PF_NOISE_READS, 1); - Assert.assertEquals(metrics.PF_HQ_ALIGNED_READS, 7); - Assert.assertEquals(metrics.PF_HQ_ALIGNED_Q20_BASES, 239); - Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 3.0); - Assert.assertEquals(metrics.PF_ALIGNED_BASES, 707); - Assert.assertEquals(metrics.PF_MISMATCH_RATE, /*19D/707D*/0.026874); - Assert.assertEquals(metrics.BAD_CYCLES, 3); - break; - case PAIR: - Assert.assertEquals(metrics.TOTAL_READS, 18); - Assert.assertEquals(metrics.PF_READS, 16); - Assert.assertEquals(metrics.PF_NOISE_READS, 2); - Assert.assertEquals(metrics.PF_HQ_ALIGNED_READS, 10); - Assert.assertEquals(metrics.PF_HQ_ALIGNED_Q20_BASES, 298); - Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 3.0); - Assert.assertEquals(metrics.PF_ALIGNED_BASES, 1010); - Assert.assertEquals(metrics.PF_MISMATCH_RATE, /*77D/1010D*/0.076238); - Assert.assertEquals(metrics.BAD_CYCLES, 22); - break; - case UNPAIRED: - default: - Assert.fail("Data does not contain this category: " + metrics.CATEGORY); + case FIRST_OF_PAIR: + Assert.assertEquals(metrics.TOTAL_READS, 9); + Assert.assertEquals(metrics.PF_READS, 7); + Assert.assertEquals(metrics.PF_NOISE_READS, 1); + Assert.assertEquals(metrics.PF_HQ_ALIGNED_READS, 3); + Assert.assertEquals(metrics.PF_HQ_ALIGNED_Q20_BASES, 59); + Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 19.0); + Assert.assertEquals(metrics.PF_READS_ALIGNED, 3); + Assert.assertEquals(metrics.PF_READS_IMPROPER_PAIRS, 1); + Assert.assertEquals(metrics.PCT_PF_READS_IMPROPER_PAIRS, 0.333333 /* 1/3 */); + Assert.assertEquals(metrics.PF_ALIGNED_BASES, 303); + Assert.assertEquals(metrics.PF_MISMATCH_RATE, /*58D/303D*/0.191419); + Assert.assertEquals(metrics.BAD_CYCLES, 19); + break; + case SECOND_OF_PAIR: + Assert.assertEquals(metrics.TOTAL_READS, 9); + Assert.assertEquals(metrics.PF_READS, 9); + Assert.assertEquals(metrics.PF_NOISE_READS, 1); + Assert.assertEquals(metrics.PF_HQ_ALIGNED_READS, 7); + Assert.assertEquals(metrics.PF_HQ_ALIGNED_Q20_BASES, 239); + Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 3.0); + Assert.assertEquals(metrics.PF_READS_ALIGNED, 7); + Assert.assertEquals(metrics.PF_READS_IMPROPER_PAIRS, 5); + Assert.assertEquals(metrics.PCT_PF_READS_IMPROPER_PAIRS, 0.714286 /* 5/7 */); + Assert.assertEquals(metrics.PF_ALIGNED_BASES, 707); + Assert.assertEquals(metrics.PCT_READS_ALIGNED_IN_PAIRS, 0.285714 /* 2D/7 */); + Assert.assertEquals(metrics.PF_MISMATCH_RATE, /*19D/707D*/0.026874); + Assert.assertEquals(metrics.BAD_CYCLES, 3); + break; + case PAIR: + Assert.assertEquals(metrics.TOTAL_READS, 18); + Assert.assertEquals(metrics.PF_READS, 16); + Assert.assertEquals(metrics.PF_NOISE_READS, 2); + Assert.assertEquals(metrics.PF_HQ_ALIGNED_READS, 10); + Assert.assertEquals(metrics.PF_HQ_ALIGNED_Q20_BASES, 298); + Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 3.0); + Assert.assertEquals(metrics.PF_READS_ALIGNED, 10); + Assert.assertEquals(metrics.PF_READS_IMPROPER_PAIRS, 6); + Assert.assertEquals(metrics.PCT_PF_READS_IMPROPER_PAIRS, 0.6 /* 6/10 */); + Assert.assertEquals(metrics.PF_ALIGNED_BASES, 1010); + Assert.assertEquals(metrics.PF_MISMATCH_RATE, /*77D/1010D*/0.076238); + Assert.assertEquals(metrics.BAD_CYCLES, 22); + break; + case UNPAIRED: + default: + Assert.fail("Data does not contain this category: " + metrics.CATEGORY); } } } @@ -123,7 +134,7 @@ public void testBisulfite() throws IOException { final NumberFormat format = NumberFormat.getInstance(); format.setMaximumFractionDigits(4); - final MetricsFile> output = new MetricsFile>(); + final MetricsFile> output = new MetricsFile<>(); output.read(new FileReader(outfile)); for (final AlignmentSummaryMetrics metrics : output.getMetrics()) { @@ -180,7 +191,7 @@ public void testNoReference() throws IOException { }; Assert.assertEquals(runPicardCommandLine(args), 0); - final MetricsFile> output = new MetricsFile>(); + final MetricsFile> output = new MetricsFile<>(); output.read(new FileReader(outfile)); for (final AlignmentSummaryMetrics metrics : output.getMetrics()) { @@ -237,7 +248,7 @@ public void testZeroLengthReads() throws IOException { }; Assert.assertEquals(runPicardCommandLine(args), 0); - final MetricsFile> output = new MetricsFile>(); + final MetricsFile> output = new MetricsFile<>(); output.read(new FileReader(outfile)); for (final AlignmentSummaryMetrics metrics : output.getMetrics()) { // test that it doesn't blow up @@ -259,7 +270,7 @@ public void testMultipleLevelsOfMetrics() throws IOException { }; Assert.assertEquals(runPicardCommandLine(args), 0); - final MetricsFile> output = new MetricsFile>(); + final MetricsFile> output = new MetricsFile<>(); output.read(new FileReader(outfile)); for (final AlignmentSummaryMetrics metrics : output.getMetrics()) { @@ -520,7 +531,7 @@ public void testChimeras() throws IOException { }; Assert.assertEquals(runPicardCommandLine(args), 0); - final MetricsFile> output = new MetricsFile>(); + final MetricsFile> output = new MetricsFile<>(); output.read(new FileReader(outfile)); for (final AlignmentSummaryMetrics metrics : output.getMetrics()) {