From c272532e0cd54c88a268d7eebf32a49c4841f172 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Fri, 3 Feb 2017 13:20:26 -0500 Subject: [PATCH 1/4] fix Exception thrown in MergeBamAlignment when NM tag was present and alignment information was copied out to tag. Added tests specifically covering this usecase. --- src/main/java/picard/sam/AbstractAlignmentMerger.java | 2 +- src/test/java/picard/sam/MergeBamAlignmentTest.java | 2 +- testdata/picard/sam/MergeBamAlignment/contam.aligned.sam | 10 +++++----- .../sam/MergeBamAlignment/contam.expected.COPY_TO_TAG.sam | 14 +++++++------- .../sam/MergeBamAlignment/contam.expected.MOVE_TO_TAG.sam | 14 +++++++------- .../sam/MergeBamAlignment/contam.expected.NO_CHANGE.sam | 4 ++-- 6 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/main/java/picard/sam/AbstractAlignmentMerger.java b/src/main/java/picard/sam/AbstractAlignmentMerger.java index e7a5390df..46d851aab 100644 --- a/src/main/java/picard/sam/AbstractAlignmentMerger.java +++ b/src/main/java/picard/sam/AbstractAlignmentMerger.java @@ -659,7 +659,7 @@ static private String encodeMappingInformation(SAMRecord rec) { ((Integer) rec.getAlignmentStart()).toString(), rec.getCigarString(), ((Integer) rec.getMappingQuality()).toString(), - rec.getStringAttribute(SAMTag.NM.name()))+";"; + Optional.ofNullable(rec.getIntegerAttribute(SAMTag.NM.name())).map(o->o.toString()).orElse(""))+";"; } /** diff --git a/src/test/java/picard/sam/MergeBamAlignmentTest.java b/src/test/java/picard/sam/MergeBamAlignmentTest.java index 2230923ec..4128337b9 100644 --- a/src/test/java/picard/sam/MergeBamAlignmentTest.java +++ b/src/test/java/picard/sam/MergeBamAlignmentTest.java @@ -1717,7 +1717,7 @@ public void testContaminationDetection(final AbstractAlignmentMerger.UnmappingRe false, true, false, 1, "0", "1.0", "align!", "myAligner", true, refFasta, mergedSam, - null, null, null, null, true, null, strategy); + null, null, null, null, true, SAMFileHeader.SortOrder.coordinate, strategy); assertSamValid(mergedSam); IOUtil.assertFilesEqual(expectedSam, mergedSam); diff --git a/testdata/picard/sam/MergeBamAlignment/contam.aligned.sam b/testdata/picard/sam/MergeBamAlignment/contam.aligned.sam index 1d0144849..0deabe1d4 100644 --- a/testdata/picard/sam/MergeBamAlignment/contam.aligned.sam +++ b/testdata/picard/sam/MergeBamAlignment/contam.aligned.sam @@ -8,11 +8,11 @@ @CO r1_clipped_r2_clipped should be marked contaminant because at least one segment is overclipped @CO r1_clipped_r2_perfect should be marked contaminant because at least one segment is overclipped @CO r1_clipped_r2_unmapped should be marked contaminant because at least one segment is overclipped -frag_multiple_primary_1 0 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_multiple_primary_1 0 chr1 1 15 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_multiple_primary_2 0 chr1 1 15 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_multiple_primary_2 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_primary_clipped 0 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_multiple_primary_1 0 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 NM:i:0 +frag_multiple_primary_1 0 chr1 1 15 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 NM:i:0 +frag_multiple_primary_2 0 chr1 1 15 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 NM:i:0 +frag_multiple_primary_2 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 NM:i:0 +frag_primary_clipped 0 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 NM:i:0 frag_primary_clipped 256 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 frag_primary_clipped 2048 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 frag_secondary_clipped 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 diff --git a/testdata/picard/sam/MergeBamAlignment/contam.expected.COPY_TO_TAG.sam b/testdata/picard/sam/MergeBamAlignment/contam.expected.COPY_TO_TAG.sam index 4dca588a6..e175bcfa6 100644 --- a/testdata/picard/sam/MergeBamAlignment/contam.expected.COPY_TO_TAG.sam +++ b/testdata/picard/sam/MergeBamAlignment/contam.expected.COPY_TO_TAG.sam @@ -2,15 +2,15 @@ @SQ SN:chr1 LN:1000 M5:17522ddd273279f4595f50fea9864734 UR:file:testdata/net/sf/picard/sam/MergeBamAlignment/cliptest.fasta @RG ID:0 SM:Hi,Mom! PL:ILLUMINA @PG ID:0 VN:1.0 CL:align! PN:myAligner -frag_multiple_primary_1 4 chr1 1 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,null; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +frag_multiple_primary_1 4 chr1 1 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,0; PG:Z:0 RG:Z:0 NM:i:0 CO:Z:Cross-species contamination frag_multiple_primary_2 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:50 PG:Z:0 RG:Z:0 NM:i:0 UQ:i:0 frag_multiple_primary_2 256 chr1 1 15 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:0T0T0C0A0T1C0T0G1 PG:Z:0 RG:Z:0 NM:i:8 UQ:i:240 -frag_primary_clipped 4 chr1 1 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,null; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +frag_primary_clipped 4 chr1 1 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,0; PG:Z:0 RG:Z:0 NM:i:0 CO:Z:Cross-species contamination frag_secondary_clipped 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:50 PG:Z:0 RG:Z:0 NM:i:0 UQ:i:0 frag_secondary_clipped 256 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:0T0T0C0A0T1C0T0G1 PG:Z:0 RG:Z:0 NM:i:8 UQ:i:240 -r1_clipped_r2_clipped 109 * 0 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,null; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination -r1_clipped_r2_perfect 109 * 0 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,null; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination -r1_clipped_r2_unmapped 77 * 0 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,null; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +r1_clipped_r2_clipped 109 * 0 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +r1_clipped_r2_perfect 109 * 0 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +r1_clipped_r2_unmapped 77 * 0 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination r1_clipped_r2_unmapped 141 * 0 0 * * 0 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 -r1_clipped_r2_clipped 157 * 0 0 20S10M20S * 0 0 TGGAGTGTTAACGTACTCTATTATTGTATTGTTTTTTTTTTGCCCTTAAA ?????????????????????????????????????????????????? PA:Z:chr1,51,20S10M20S,30,null; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination -r1_clipped_r2_perfect 157 * 0 0 50M * 0 0 TGGAGTGTTAACGTACTCTATTATTGTATTGTTTTTTTTTTGCCCTTAAA ?????????????????????????????????????????????????? PA:Z:chr1,51,50M,30,null; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +r1_clipped_r2_clipped 157 * 0 0 20S10M20S * 0 0 TGGAGTGTTAACGTACTCTATTATTGTATTGTTTTTTTTTTGCCCTTAAA ?????????????????????????????????????????????????? PA:Z:chr1,51,20S10M20S,30,; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +r1_clipped_r2_perfect 157 * 0 0 50M * 0 0 TGGAGTGTTAACGTACTCTATTATTGTATTGTTTTTTTTTTGCCCTTAAA ?????????????????????????????????????????????????? PA:Z:chr1,51,50M,30,; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination diff --git a/testdata/picard/sam/MergeBamAlignment/contam.expected.MOVE_TO_TAG.sam b/testdata/picard/sam/MergeBamAlignment/contam.expected.MOVE_TO_TAG.sam index 40c06f420..cfc0931fa 100644 --- a/testdata/picard/sam/MergeBamAlignment/contam.expected.MOVE_TO_TAG.sam +++ b/testdata/picard/sam/MergeBamAlignment/contam.expected.MOVE_TO_TAG.sam @@ -6,11 +6,11 @@ frag_multiple_primary_2 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATG frag_multiple_primary_2 256 chr1 1 15 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:0T0T0C0A0T1C0T0G1 PG:Z:0 RG:Z:0 NM:i:8 UQ:i:240 frag_secondary_clipped 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:50 PG:Z:0 RG:Z:0 NM:i:0 UQ:i:0 frag_secondary_clipped 256 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:0T0T0C0A0T1C0T0G1 PG:Z:0 RG:Z:0 NM:i:8 UQ:i:240 -frag_multiple_primary_1 4 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,null; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination -frag_primary_clipped 4 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,null; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination -r1_clipped_r2_clipped 109 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,null; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination -r1_clipped_r2_perfect 109 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,null; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination -r1_clipped_r2_unmapped 77 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,null; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +frag_multiple_primary_1 4 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,0; PG:Z:0 RG:Z:0 NM:i:0 CO:Z:Cross-species contamination +frag_primary_clipped 4 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,0; PG:Z:0 RG:Z:0 NM:i:0 CO:Z:Cross-species contamination +r1_clipped_r2_clipped 109 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +r1_clipped_r2_perfect 109 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +r1_clipped_r2_unmapped 77 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination r1_clipped_r2_unmapped 141 * 0 0 * * 0 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 -r1_clipped_r2_clipped 157 * 0 0 * * 0 0 TGGAGTGTTAACGTACTCTATTATTGTATTGTTTTTTTTTTGCCCTTAAA ?????????????????????????????????????????????????? PA:Z:chr1,51,20S10M20S,30,null; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination -r1_clipped_r2_perfect 157 * 0 0 * * 0 0 TGGAGTGTTAACGTACTCTATTATTGTATTGTTTTTTTTTTGCCCTTAAA ?????????????????????????????????????????????????? PA:Z:chr1,51,50M,30,null; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +r1_clipped_r2_clipped 157 * 0 0 * * 0 0 TGGAGTGTTAACGTACTCTATTATTGTATTGTTTTTTTTTTGCCCTTAAA ?????????????????????????????????????????????????? PA:Z:chr1,51,20S10M20S,30,; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +r1_clipped_r2_perfect 157 * 0 0 * * 0 0 TGGAGTGTTAACGTACTCTATTATTGTATTGTTTTTTTTTTGCCCTTAAA ?????????????????????????????????????????????????? PA:Z:chr1,51,50M,30,; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination diff --git a/testdata/picard/sam/MergeBamAlignment/contam.expected.NO_CHANGE.sam b/testdata/picard/sam/MergeBamAlignment/contam.expected.NO_CHANGE.sam index 5bead4783..e574982dd 100644 --- a/testdata/picard/sam/MergeBamAlignment/contam.expected.NO_CHANGE.sam +++ b/testdata/picard/sam/MergeBamAlignment/contam.expected.NO_CHANGE.sam @@ -2,10 +2,10 @@ @SQ SN:chr1 LN:1000 M5:17522ddd273279f4595f50fea9864734 UR:file:testdata/net/sf/picard/sam/MergeBamAlignment/cliptest.fasta @RG ID:0 SM:Hi,Mom! PL:ILLUMINA @PG ID:0 VN:1.0 CL:align! PN:myAligner -frag_multiple_primary_1 4 chr1 1 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +frag_multiple_primary_1 4 chr1 1 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 NM:i:0 CO:Z:Cross-species contamination frag_multiple_primary_2 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:50 PG:Z:0 RG:Z:0 NM:i:0 UQ:i:0 frag_multiple_primary_2 256 chr1 1 15 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:0T0T0C0A0T1C0T0G1 PG:Z:0 RG:Z:0 NM:i:8 UQ:i:240 -frag_primary_clipped 4 chr1 1 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +frag_primary_clipped 4 chr1 1 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 NM:i:0 CO:Z:Cross-species contamination frag_secondary_clipped 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:50 PG:Z:0 RG:Z:0 NM:i:0 UQ:i:0 frag_secondary_clipped 256 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:0T0T0C0A0T1C0T0G1 PG:Z:0 RG:Z:0 NM:i:8 UQ:i:240 r1_clipped_r2_clipped 109 * 0 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination From 2d8c9602d1ca6424954ff7328c641def2ecaad82 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Fri, 3 Feb 2017 13:49:52 -0500 Subject: [PATCH 2/4] if asked to move alignment information to tag, NM should be reset. --- src/main/java/picard/sam/AbstractAlignmentMerger.java | 1 + testdata/picard/sam/MergeBamAlignment/contam.aligned.sam | 8 ++++---- .../picard/sam/MergeBamAlignment/contam.expected.COPY_TO_TAG.sam | 2 +- .../picard/sam/MergeBamAlignment/contam.expected.MOVE_TO_TAG.sam | 6 +++--- .../picard/sam/MergeBamAlignment/contam.expected.NO_CHANGE.sam | 2 +- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/main/java/picard/sam/AbstractAlignmentMerger.java b/src/main/java/picard/sam/AbstractAlignmentMerger.java index 46d851aab..c29b96de1 100644 --- a/src/main/java/picard/sam/AbstractAlignmentMerger.java +++ b/src/main/java/picard/sam/AbstractAlignmentMerger.java @@ -632,6 +632,7 @@ private void transferAlignmentInfoToFragment(final SAMRecord unaligned, final SA unaligned.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START); unaligned.setCigar(null); unaligned.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR); + unaligned.setAttribute("NM", null); } unaligned.setReadUnmappedFlag(true); diff --git a/testdata/picard/sam/MergeBamAlignment/contam.aligned.sam b/testdata/picard/sam/MergeBamAlignment/contam.aligned.sam index 0deabe1d4..8bde03203 100644 --- a/testdata/picard/sam/MergeBamAlignment/contam.aligned.sam +++ b/testdata/picard/sam/MergeBamAlignment/contam.aligned.sam @@ -9,16 +9,16 @@ @CO r1_clipped_r2_perfect should be marked contaminant because at least one segment is overclipped @CO r1_clipped_r2_unmapped should be marked contaminant because at least one segment is overclipped frag_multiple_primary_1 0 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 NM:i:0 -frag_multiple_primary_1 0 chr1 1 15 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 NM:i:0 -frag_multiple_primary_2 0 chr1 1 15 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 NM:i:0 -frag_multiple_primary_2 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 NM:i:0 +frag_multiple_primary_1 0 chr1 1 15 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 NM:i:1 +frag_multiple_primary_2 0 chr1 1 15 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 NM:i:10 +frag_multiple_primary_2 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 NM:i:35 frag_primary_clipped 0 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 NM:i:0 frag_primary_clipped 256 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 frag_primary_clipped 2048 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 frag_secondary_clipped 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 frag_secondary_clipped 256 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 r1_clipped_r2_clipped 97 chr1 1 30 20S10M20S chr1 51 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_clipped 145 chr1 51 30 20S10M20S chr1 1 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_clipped 145 chr1 51 30 20S10M20S chr1 1 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 NM:i:37 r1_clipped_r2_perfect 97 chr1 1 30 20S10M20S chr1 51 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 r1_clipped_r2_perfect 145 chr1 51 30 50M chr1 1 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 r1_clipped_r2_unmapped 73 chr1 1 30 20S10M20S chr1 51 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 diff --git a/testdata/picard/sam/MergeBamAlignment/contam.expected.COPY_TO_TAG.sam b/testdata/picard/sam/MergeBamAlignment/contam.expected.COPY_TO_TAG.sam index e175bcfa6..6b47b2699 100644 --- a/testdata/picard/sam/MergeBamAlignment/contam.expected.COPY_TO_TAG.sam +++ b/testdata/picard/sam/MergeBamAlignment/contam.expected.COPY_TO_TAG.sam @@ -12,5 +12,5 @@ r1_clipped_r2_clipped 109 * 0 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGA r1_clipped_r2_perfect 109 * 0 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination r1_clipped_r2_unmapped 77 * 0 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination r1_clipped_r2_unmapped 141 * 0 0 * * 0 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 -r1_clipped_r2_clipped 157 * 0 0 20S10M20S * 0 0 TGGAGTGTTAACGTACTCTATTATTGTATTGTTTTTTTTTTGCCCTTAAA ?????????????????????????????????????????????????? PA:Z:chr1,51,20S10M20S,30,; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +r1_clipped_r2_clipped 157 * 0 0 20S10M20S * 0 0 TGGAGTGTTAACGTACTCTATTATTGTATTGTTTTTTTTTTGCCCTTAAA ?????????????????????????????????????????????????? PA:Z:chr1,51,20S10M20S,30,37; PG:Z:0 RG:Z:0 NM:i:37 CO:Z:Cross-species contamination r1_clipped_r2_perfect 157 * 0 0 50M * 0 0 TGGAGTGTTAACGTACTCTATTATTGTATTGTTTTTTTTTTGCCCTTAAA ?????????????????????????????????????????????????? PA:Z:chr1,51,50M,30,; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination diff --git a/testdata/picard/sam/MergeBamAlignment/contam.expected.MOVE_TO_TAG.sam b/testdata/picard/sam/MergeBamAlignment/contam.expected.MOVE_TO_TAG.sam index cfc0931fa..e3a9c761b 100644 --- a/testdata/picard/sam/MergeBamAlignment/contam.expected.MOVE_TO_TAG.sam +++ b/testdata/picard/sam/MergeBamAlignment/contam.expected.MOVE_TO_TAG.sam @@ -6,11 +6,11 @@ frag_multiple_primary_2 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATG frag_multiple_primary_2 256 chr1 1 15 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:0T0T0C0A0T1C0T0G1 PG:Z:0 RG:Z:0 NM:i:8 UQ:i:240 frag_secondary_clipped 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:50 PG:Z:0 RG:Z:0 NM:i:0 UQ:i:0 frag_secondary_clipped 256 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:0T0T0C0A0T1C0T0G1 PG:Z:0 RG:Z:0 NM:i:8 UQ:i:240 -frag_multiple_primary_1 4 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,0; PG:Z:0 RG:Z:0 NM:i:0 CO:Z:Cross-species contamination -frag_primary_clipped 4 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,0; PG:Z:0 RG:Z:0 NM:i:0 CO:Z:Cross-species contamination +frag_multiple_primary_1 4 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,0; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +frag_primary_clipped 4 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,0; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination r1_clipped_r2_clipped 109 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination r1_clipped_r2_perfect 109 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination r1_clipped_r2_unmapped 77 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination r1_clipped_r2_unmapped 141 * 0 0 * * 0 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 -r1_clipped_r2_clipped 157 * 0 0 * * 0 0 TGGAGTGTTAACGTACTCTATTATTGTATTGTTTTTTTTTTGCCCTTAAA ?????????????????????????????????????????????????? PA:Z:chr1,51,20S10M20S,30,; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +r1_clipped_r2_clipped 157 * 0 0 * * 0 0 TGGAGTGTTAACGTACTCTATTATTGTATTGTTTTTTTTTTGCCCTTAAA ?????????????????????????????????????????????????? PA:Z:chr1,51,20S10M20S,30,37; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination r1_clipped_r2_perfect 157 * 0 0 * * 0 0 TGGAGTGTTAACGTACTCTATTATTGTATTGTTTTTTTTTTGCCCTTAAA ?????????????????????????????????????????????????? PA:Z:chr1,51,50M,30,; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination diff --git a/testdata/picard/sam/MergeBamAlignment/contam.expected.NO_CHANGE.sam b/testdata/picard/sam/MergeBamAlignment/contam.expected.NO_CHANGE.sam index e574982dd..13b8d88e5 100644 --- a/testdata/picard/sam/MergeBamAlignment/contam.expected.NO_CHANGE.sam +++ b/testdata/picard/sam/MergeBamAlignment/contam.expected.NO_CHANGE.sam @@ -12,5 +12,5 @@ r1_clipped_r2_clipped 109 * 0 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGA r1_clipped_r2_perfect 109 * 0 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination r1_clipped_r2_unmapped 77 * 0 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination r1_clipped_r2_unmapped 141 * 0 0 * * 0 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 -r1_clipped_r2_clipped 157 * 0 0 20S10M20S * 0 0 TGGAGTGTTAACGTACTCTATTATTGTATTGTTTTTTTTTTGCCCTTAAA ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +r1_clipped_r2_clipped 157 * 0 0 20S10M20S * 0 0 TGGAGTGTTAACGTACTCTATTATTGTATTGTTTTTTTTTTGCCCTTAAA ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 NM:i:37 CO:Z:Cross-species contamination r1_clipped_r2_perfect 157 * 0 0 50M * 0 0 TGGAGTGTTAACGTACTCTATTATTGTATTGTTTTTTTTTTGCCCTTAAA ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination From d501f5682970c43ce173b538e16021fcde7eaefa Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Fri, 3 Feb 2017 13:52:33 -0500 Subject: [PATCH 3/4] use SamTags value of NM --- src/main/java/picard/sam/AbstractAlignmentMerger.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/picard/sam/AbstractAlignmentMerger.java b/src/main/java/picard/sam/AbstractAlignmentMerger.java index c29b96de1..7ae141feb 100644 --- a/src/main/java/picard/sam/AbstractAlignmentMerger.java +++ b/src/main/java/picard/sam/AbstractAlignmentMerger.java @@ -632,7 +632,7 @@ private void transferAlignmentInfoToFragment(final SAMRecord unaligned, final SA unaligned.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START); unaligned.setCigar(null); unaligned.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR); - unaligned.setAttribute("NM", null); + unaligned.setAttribute(SAMTag.NM.name(), null); } unaligned.setReadUnmappedFlag(true); From cc66e2e2eb134dc873fd407a8ff69e4f398de6ea Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Fri, 3 Feb 2017 14:01:07 -0500 Subject: [PATCH 4/4] broke out long line into function to help clarify --- src/main/java/picard/sam/AbstractAlignmentMerger.java | 9 ++++++++- src/test/java/picard/sam/MergeBamAlignmentTest.java | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/main/java/picard/sam/AbstractAlignmentMerger.java b/src/main/java/picard/sam/AbstractAlignmentMerger.java index 7ae141feb..6b4e0ac43 100644 --- a/src/main/java/picard/sam/AbstractAlignmentMerger.java +++ b/src/main/java/picard/sam/AbstractAlignmentMerger.java @@ -660,7 +660,14 @@ static private String encodeMappingInformation(SAMRecord rec) { ((Integer) rec.getAlignmentStart()).toString(), rec.getCigarString(), ((Integer) rec.getMappingQuality()).toString(), - Optional.ofNullable(rec.getIntegerAttribute(SAMTag.NM.name())).map(o->o.toString()).orElse(""))+";"; + getStringOfNullable(rec.getIntegerAttribute(SAMTag.NM.name()))) + ";"; + } + + //returns the toString() of its input or an empty string if null. + static private String getStringOfNullable(final Object obj) { + return Optional.ofNullable(obj) + .map(Object::toString) + .orElse(""); } /** diff --git a/src/test/java/picard/sam/MergeBamAlignmentTest.java b/src/test/java/picard/sam/MergeBamAlignmentTest.java index 4128337b9..5c08ad9c4 100644 --- a/src/test/java/picard/sam/MergeBamAlignmentTest.java +++ b/src/test/java/picard/sam/MergeBamAlignmentTest.java @@ -1697,7 +1697,7 @@ private MostDistantStrategyAlignmentSpec(final boolean expectedPrimary, final St public Object[][] UnmappedReadStrategiesProvider() { return new Object[][] { {AbstractAlignmentMerger.UnmappingReadStrategy.DO_NOT_CHANGE, "contam.expected.NO_CHANGE.sam"}, - {null, "contam.expected.NO_CHANGE.sam"}, + {null, "contam.expected.NO_CHANGE.sam"}, {AbstractAlignmentMerger.UnmappingReadStrategy.COPY_TO_TAG, "contam.expected.COPY_TO_TAG.sam"}, {AbstractAlignmentMerger.UnmappingReadStrategy.MOVE_TO_TAG, "contam.expected.MOVE_TO_TAG.sam"} };