From 5a159845983fe7360800761af084155594f320e7 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Tue, 28 Mar 2017 15:43:27 -0400 Subject: [PATCH 1/4] reverted ezzat's change and added more stringent tests that should be passing --- .../analysis/AlignmentSummaryMetricsCollector.java | 22 +++--- .../CollectAlignmentSummaryMetricsTest.java | 85 +++++++++++++++++++--- .../sam/summary_alignment_bisulfite_test.sam | 6 +- 3 files changed, 88 insertions(+), 25 deletions(-) diff --git a/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java b/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java index 1dba3fe39..188d76e29 100644 --- a/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java +++ b/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java @@ -300,18 +300,18 @@ else if (!record.getReadFailsVendorQualityCheckFlag()) { final int readBaseIndex = readIndex + i; boolean mismatch = !SequenceUtil.basesEqual(readBases[readBaseIndex], refBases[refIndex+i]); boolean bisulfiteBase = false; - if (mismatch && isBisulfiteSequenced && - record.getReadNegativeStrandFlag() && - (refBases[refIndex + i] == 'G' || refBases[refIndex + i] == 'g') && - (readBases[readBaseIndex] == 'A' || readBases[readBaseIndex] == 'a') - || ((!record.getReadNegativeStrandFlag()) && - (refBases[refIndex + i] == 'C' || refBases[refIndex + i] == 'c') && - (readBases[readBaseIndex] == 'T') || readBases[readBaseIndex] == 't')) { - - bisulfiteBase = true; - mismatch = false; + if (mismatch && isBisulfiteSequenced) { + if ( (record.getReadNegativeStrandFlag() && + (refBases[refIndex+i] == 'G' || refBases[refIndex+i] =='g') && + (readBases[readBaseIndex] == 'A' || readBases[readBaseIndex] == 'a')) + || ((!record.getReadNegativeStrandFlag()) && + (refBases[refIndex+i] == 'C' || refBases[refIndex+i] == 'c') && + (readBases[readBaseIndex] == 'T') || readBases[readBaseIndex] == 't') ) { + + bisulfiteBase = true; + mismatch = false; + } } - if(mismatch) mismatchCount++; metrics.PF_ALIGNED_BASES++; diff --git a/src/test/java/picard/analysis/CollectAlignmentSummaryMetricsTest.java b/src/test/java/picard/analysis/CollectAlignmentSummaryMetricsTest.java index 58e2da9c4..d77b85925 100644 --- a/src/test/java/picard/analysis/CollectAlignmentSummaryMetricsTest.java +++ b/src/test/java/picard/analysis/CollectAlignmentSummaryMetricsTest.java @@ -134,32 +134,32 @@ public void testBisulfite() throws IOException { Assert.assertEquals(metrics.TOTAL_READS, 1); Assert.assertEquals(metrics.PF_READS, 1); Assert.assertEquals(metrics.PF_HQ_ALIGNED_BASES, 101); - Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 20.0); + Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 21.0); Assert.assertEquals(metrics.PF_ALIGNED_BASES, 101); - Assert.assertEquals(metrics.PF_MISMATCH_RATE, 20D/100D); - Assert.assertEquals(metrics.BAD_CYCLES, 20); - Assert.assertEquals(format.format(metrics.PF_HQ_ERROR_RATE), format.format(20/(double)100)); + Assert.assertEquals(metrics.PF_MISMATCH_RATE, 0.212121 /*21D/99D*/); + Assert.assertEquals(metrics.BAD_CYCLES, 21); + Assert.assertEquals(format.format(metrics.PF_HQ_ERROR_RATE), format.format(21/(double)99)); break; case SECOND_OF_PAIR: // Three no-calls, two potentially methylated bases Assert.assertEquals(metrics.TOTAL_READS, 1); Assert.assertEquals(metrics.PF_READS, 1); Assert.assertEquals(metrics.PF_HQ_ALIGNED_BASES, 101); - Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 3.0); + Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 4.0); Assert.assertEquals(metrics.PF_ALIGNED_BASES, 101); - Assert.assertEquals(metrics.PF_MISMATCH_RATE, /*3D/99D*/0.030303); - Assert.assertEquals(metrics.BAD_CYCLES, 3); - Assert.assertEquals(format.format(metrics.PF_HQ_ERROR_RATE), format.format(3/(double)99)); + Assert.assertEquals(metrics.PF_MISMATCH_RATE, /*4D/99D*/0.040404); + Assert.assertEquals(metrics.BAD_CYCLES, 4); + Assert.assertEquals(format.format(metrics.PF_HQ_ERROR_RATE), format.format(4/(double)99)); break; case PAIR: Assert.assertEquals(metrics.TOTAL_READS, 2); Assert.assertEquals(metrics.PF_READS, 2); Assert.assertEquals(metrics.PF_HQ_ALIGNED_BASES, 202); - Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 11.5); + Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 12.5D); Assert.assertEquals(metrics.PF_ALIGNED_BASES, 202); - Assert.assertEquals(metrics.PF_MISMATCH_RATE, /*23D/199D*/0.115578); - Assert.assertEquals(metrics.BAD_CYCLES, 23); - Assert.assertEquals(format.format(metrics.PF_HQ_ERROR_RATE), format.format(23/(double)199)); + Assert.assertEquals(metrics.PF_MISMATCH_RATE, 0.126263);// 25D/198D + Assert.assertEquals(metrics.BAD_CYCLES, 25); + Assert.assertEquals(format.format(metrics.PF_HQ_ERROR_RATE), format.format(25/(double)198)); break; case UNPAIRED: default: @@ -168,6 +168,67 @@ public void testBisulfite() throws IOException { } } + @Test + public void testBisulfiteButNot() throws IOException { + final File input = new File(TEST_DATA_DIR, "summary_alignment_bisulfite_test.sam"); + final File reference = new File(TEST_DATA_DIR, "summary_alignment_stats_test.fasta"); + final File outfile = File.createTempFile("alignmentMetrics", ".txt"); + outfile.deleteOnExit(); + final String[] args = new String[] { + "INPUT=" + input.getAbsolutePath(), + "OUTPUT=" + outfile.getAbsolutePath(), + "REFERENCE_SEQUENCE=" + reference.getAbsolutePath(), + "IS_BISULFITE_SEQUENCED=false" + }; + Assert.assertEquals(runPicardCommandLine(args), 0); + + final NumberFormat format = NumberFormat.getInstance(); + format.setMaximumFractionDigits(4); + + final MetricsFile> output = new MetricsFile<>(); + output.read(new FileReader(outfile)); + + for (final AlignmentSummaryMetrics metrics : output.getMetrics()) { + Assert.assertEquals(metrics.MEAN_READ_LENGTH, 101.0); + switch (metrics.CATEGORY) { + case FIRST_OF_PAIR: + // 19 no-calls, one potentially methylated base, one mismatch at a potentially methylated base + Assert.assertEquals(metrics.TOTAL_READS, 1); + Assert.assertEquals(metrics.PF_READS, 1); + Assert.assertEquals(metrics.PF_HQ_ALIGNED_BASES, 101); + Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 23.0); + Assert.assertEquals(metrics.PF_ALIGNED_BASES, 101); + Assert.assertEquals(metrics.PF_MISMATCH_RATE, 0.227723 /*23D/101D*/); + Assert.assertEquals(metrics.BAD_CYCLES, 23); + Assert.assertEquals(format.format(metrics.PF_HQ_ERROR_RATE), format.format(23/(double)101)); + break; + case SECOND_OF_PAIR: + // Three no-calls, two potentially methylated bases + Assert.assertEquals(metrics.TOTAL_READS, 1); + Assert.assertEquals(metrics.PF_READS, 1); + Assert.assertEquals(metrics.PF_HQ_ALIGNED_BASES, 101); + Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 6.0); + Assert.assertEquals(metrics.PF_ALIGNED_BASES, 101); + Assert.assertEquals(metrics.PF_MISMATCH_RATE, /*6D/101D*/0.059406); + Assert.assertEquals(metrics.BAD_CYCLES, 6); + Assert.assertEquals(format.format(metrics.PF_HQ_ERROR_RATE), format.format(6/(double)101)); + break; + case PAIR: + Assert.assertEquals(metrics.TOTAL_READS, 2); + Assert.assertEquals(metrics.PF_READS, 2); + Assert.assertEquals(metrics.PF_HQ_ALIGNED_BASES, 202); + Assert.assertEquals(metrics.PF_HQ_MEDIAN_MISMATCHES, 14.5D); + Assert.assertEquals(metrics.PF_ALIGNED_BASES, 202); + Assert.assertEquals(metrics.PF_MISMATCH_RATE, 0.143564);// 29D/202D + Assert.assertEquals(metrics.BAD_CYCLES, 29); + Assert.assertEquals(format.format(metrics.PF_HQ_ERROR_RATE), format.format(29/(double)202)); + break; + case UNPAIRED: + default: + Assert.fail("Data does not contain this category: " + metrics.CATEGORY); + } + } + } @Test public void testNoReference() throws IOException { diff --git a/testdata/picard/sam/summary_alignment_bisulfite_test.sam b/testdata/picard/sam/summary_alignment_bisulfite_test.sam index a1e05aa13..27e40ec06 100644 --- a/testdata/picard/sam/summary_alignment_bisulfite_test.sam +++ b/testdata/picard/sam/summary_alignment_bisulfite_test.sam @@ -8,5 +8,7 @@ @SQ SN:chr7 LN:202 @SQ SN:chr8 LN:202 @RG ID:0 SM:Hi,Mom! PL:ILLUMINA -SL-XAV:1:1:0:1721#0/1 83 chr7 1 255 101M = 102 40 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 -SL-XAV:1:1:0:1721#0/2 163 chr7 102 255 101M = 1 -40 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 +@CO reference for /2 ACGCGGCATCCCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTCACTATGGTACATCTTGTCGTGCGGCCAGAGCATACA +@CO reference for /1 CAACAGAAGGGGGGATCTGTGTTTGTGTTTCGGATTTCCTGCTGAAAAGGTTTTCGGGTCCCCCCCCCATCCCGATTTCCTTCCGCAGCTTACCTCCCGAA +SL-XAV:1:1:0:1721#0/1 83 chr7 1 255 101M = 102 40 CAACAGAAGcnGGnATCTGTGTTTGTGTTTCGaATTTtCTGCTGAAnnGnTTnTCGnnTCnnnnnnnnATCCCGATTTCnTTCCGCAGCTnACCTCCCaAn )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +SL-XAV:1:1:0:1721#0/2 163 chr7 102 255 101M = 1 -40 nCGCGGCATCnCGATTTCTTTCCGCAaCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTtATTATGGTACATCTTGTCGTGCGGCnAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 \ No newline at end of file From 65eb90fe3cae7de0429085cae805f241f8b42095 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Tue, 28 Mar 2017 15:45:08 -0400 Subject: [PATCH 2/4] a commit who's sole purpose is to show that ezzat's change breaks the new test --- .../analysis/AlignmentSummaryMetricsCollector.java | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java b/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java index 188d76e29..1dba3fe39 100644 --- a/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java +++ b/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java @@ -300,18 +300,18 @@ else if (!record.getReadFailsVendorQualityCheckFlag()) { final int readBaseIndex = readIndex + i; boolean mismatch = !SequenceUtil.basesEqual(readBases[readBaseIndex], refBases[refIndex+i]); boolean bisulfiteBase = false; - if (mismatch && isBisulfiteSequenced) { - if ( (record.getReadNegativeStrandFlag() && - (refBases[refIndex+i] == 'G' || refBases[refIndex+i] =='g') && - (readBases[readBaseIndex] == 'A' || readBases[readBaseIndex] == 'a')) - || ((!record.getReadNegativeStrandFlag()) && - (refBases[refIndex+i] == 'C' || refBases[refIndex+i] == 'c') && - (readBases[readBaseIndex] == 'T') || readBases[readBaseIndex] == 't') ) { - - bisulfiteBase = true; - mismatch = false; - } + if (mismatch && isBisulfiteSequenced && + record.getReadNegativeStrandFlag() && + (refBases[refIndex + i] == 'G' || refBases[refIndex + i] == 'g') && + (readBases[readBaseIndex] == 'A' || readBases[readBaseIndex] == 'a') + || ((!record.getReadNegativeStrandFlag()) && + (refBases[refIndex + i] == 'C' || refBases[refIndex + i] == 'c') && + (readBases[readBaseIndex] == 'T') || readBases[readBaseIndex] == 't')) { + + bisulfiteBase = true; + mismatch = false; } + if(mismatch) mismatchCount++; metrics.PF_ALIGNED_BASES++; From 192b6d805a5dd813862d2675866a0b38bc080959 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Tue, 28 Mar 2017 15:46:42 -0400 Subject: [PATCH 3/4] a proposed change that cleans up the mess and still passes the tests --- .../analysis/AlignmentSummaryMetricsCollector.java | 24 ++++++++-------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java b/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java index 1dba3fe39..9fca560d2 100644 --- a/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java +++ b/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java @@ -298,28 +298,20 @@ else if (!record.getReadFailsVendorQualityCheckFlag()) { for (int i=0; i= BASE_QUALITY_THRESHOLD) metrics.PF_HQ_ALIGNED_Q20_BASES++; if (mismatch) hqMismatchCount++; } From 61d6d9e9cbfcf94d5c613a3218463e63a54796b8 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Wed, 12 Apr 2017 09:46:19 -0400 Subject: [PATCH 4/4] - spelling issues --- .../java/picard/analysis/AlignmentSummaryMetricsCollector.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java b/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java index 9fca560d2..4061a2f85 100644 --- a/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java +++ b/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java @@ -299,19 +299,19 @@ else if (!record.getReadFailsVendorQualityCheckFlag()) { for (int i=0; i= BASE_QUALITY_THRESHOLD) metrics.PF_HQ_ALIGNED_Q20_BASES++; if (mismatch) hqMismatchCount++; }