From 37db695a5d4d92bb566b2fa904e280e9020bed43 Mon Sep 17 00:00:00 2001 From: tfenne Date: Mon, 12 Dec 2016 18:13:28 -0500 Subject: [PATCH 1/2] Fix to handle overlapped read clipping in MergeBamAlignment when soft-clipping is already present. --- .../java/picard/sam/AbstractAlignmentMerger.java | 31 +++++++++---- .../picard/sam/AbstractAlignmentMergerTest.java | 52 ++++++++++++++++++++++ 2 files changed, 74 insertions(+), 9 deletions(-) create mode 100644 src/test/java/picard/sam/AbstractAlignmentMergerTest.java diff --git a/src/main/java/picard/sam/AbstractAlignmentMerger.java b/src/main/java/picard/sam/AbstractAlignmentMerger.java index 484a9b568..c10827e4a 100644 --- a/src/main/java/picard/sam/AbstractAlignmentMerger.java +++ b/src/main/java/picard/sam/AbstractAlignmentMerger.java @@ -58,6 +58,8 @@ import java.io.File; import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; import java.util.List; /** @@ -590,11 +592,10 @@ private void transferAlignmentInfoToPairedRead(final SAMRecord firstUnaligned, f * Checks to see whether the ends of the reads overlap and soft clips reads * them if necessary. */ - protected void clipForOverlappingReads(final SAMRecord read1, final SAMRecord read2) { + protected static void clipForOverlappingReads(final SAMRecord read1, final SAMRecord read2) { // If both reads are mapped, see if we need to clip the ends due to small // insert size if (!(read1.getReadUnmappedFlag() || read2.getReadUnmappedFlag())) { - if (read1.getReadNegativeStrandFlag() != read2.getReadNegativeStrandFlag()) { final SAMRecord pos = (read1.getReadNegativeStrandFlag()) ? read2 : read1; final SAMRecord neg = (read1.getReadNegativeStrandFlag()) ? read1 : read2; @@ -605,23 +606,35 @@ protected void clipForOverlappingReads(final SAMRecord read1, final SAMRecord re final int negDiff = pos.getAlignmentStart() - neg.getAlignmentStart(); if (posDiff > 0) { - CigarUtil.softClip3PrimeEndOfRead(pos, Math.min(pos.getReadLength(), - pos.getReadLength() - posDiff + 1)); + final List elems = new ArrayList<>(pos.getCigar().getCigarElements()); + Collections.reverse(elems); + final int clipped = lengthOfSoftClipping(elems.iterator()); + final int clipFrom = pos.getReadLength() - posDiff - clipped + 1; + CigarUtil.softClip3PrimeEndOfRead(pos, Math.min(pos.getReadLength(), clipFrom)); removeNmMdAndUqTags(pos); // these tags are now invalid! } if (negDiff > 0) { - CigarUtil.softClip3PrimeEndOfRead(neg, Math.min(neg.getReadLength(), - neg.getReadLength() - negDiff + 1)); + final int clipped = lengthOfSoftClipping(neg.getCigar().getCigarElements().iterator()); + final int clipFrom = neg.getReadLength() - negDiff - clipped + 1; + CigarUtil.softClip3PrimeEndOfRead(neg, Math.min(neg.getReadLength(), clipFrom)); removeNmMdAndUqTags(neg); // these tags are now invalid! } - } - } else { - // TODO: What about RR/FF pairs? } } + } + + /** Returns the number of soft-clipped bases until a non-soft-clipping element is encountered. */ + private static int lengthOfSoftClipping(Iterator iterator) { + int clipped = 0; + while (iterator.hasNext()) { + final CigarElement elem = iterator.next(); + if (elem.getOperator() != CigarOperator.SOFT_CLIP) break; + clipped = elem.getLength(); + } + return clipped; } /** diff --git a/src/test/java/picard/sam/AbstractAlignmentMergerTest.java b/src/test/java/picard/sam/AbstractAlignmentMergerTest.java new file mode 100644 index 000000000..6481a6bea --- /dev/null +++ b/src/test/java/picard/sam/AbstractAlignmentMergerTest.java @@ -0,0 +1,52 @@ +package picard.sam; + +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMRecordSetBuilder; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.util.List; + +/** + * Tests related to code in AbstractAlignmentMerger + */ +public class AbstractAlignmentMergerTest { + @Test public void tesOverlappedReadClippingWithNonOverlappedReads() { + final SAMRecordSetBuilder set = new SAMRecordSetBuilder(); + set.setReadLength(110); + final List recs = set.addPair("q1", 0, 100, 200, false, false, "110M", "110M", false, true, 30); + final SAMRecord r1 = recs.get(0); + final SAMRecord r2 = recs.get(1); + AbstractAlignmentMerger.clipForOverlappingReads(r1, r2); + Assert.assertEquals(r1.getAlignmentStart(), 100); + Assert.assertEquals(r1.getCigarString(), "110M"); + Assert.assertEquals(r2.getAlignmentStart(), 200); + Assert.assertEquals(r2.getCigarString(), "110M"); + } + + @Test public void testBasicOverlappedReadClipping() { + final SAMRecordSetBuilder set = new SAMRecordSetBuilder(); + set.setReadLength(110); + final List recs = set.addPair("q1", 0, 100, 90, false, false, "110M", "110M", false, true, 30); + final SAMRecord r1 = recs.get(0); + final SAMRecord r2 = recs.get(1); + AbstractAlignmentMerger.clipForOverlappingReads(r1, r2); + Assert.assertEquals(r1.getAlignmentStart(), 100); + Assert.assertEquals(r1.getCigarString(), "100M10S"); + Assert.assertEquals(r2.getAlignmentStart(), 100); + Assert.assertEquals(r2.getCigarString(), "10S100M"); + } + + @Test public void testOverlappedReadClippingWithExistingSoftClipping() { + final SAMRecordSetBuilder set = new SAMRecordSetBuilder(); + set.setReadLength(120); + final List recs = set.addPair("q1", 0, 100, 95, false, false, "110M10S", "15S105M", false, true, 30); + final SAMRecord r1 = recs.get(0); + final SAMRecord r2 = recs.get(1); + AbstractAlignmentMerger.clipForOverlappingReads(r1, r2); + Assert.assertEquals(r1.getAlignmentStart(), 100); + Assert.assertEquals(r1.getCigarString(), "100M20S"); + Assert.assertEquals(r2.getAlignmentStart(), 100); + Assert.assertEquals(r2.getCigarString(), "20S100M"); + } +} From 535937c85666f1ea11a5a2024b95f509e85b923d Mon Sep 17 00:00:00 2001 From: tfenne Date: Tue, 13 Dec 2016 04:38:22 -0500 Subject: [PATCH 2/2] Added handling for hard-clipping outside of soft-clipping. --- src/main/java/picard/sam/AbstractAlignmentMerger.java | 4 ++-- src/test/java/picard/sam/AbstractAlignmentMergerTest.java | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/main/java/picard/sam/AbstractAlignmentMerger.java b/src/main/java/picard/sam/AbstractAlignmentMerger.java index c10827e4a..ec4dfae50 100644 --- a/src/main/java/picard/sam/AbstractAlignmentMerger.java +++ b/src/main/java/picard/sam/AbstractAlignmentMerger.java @@ -630,8 +630,8 @@ private static int lengthOfSoftClipping(Iterator iterator) { int clipped = 0; while (iterator.hasNext()) { final CigarElement elem = iterator.next(); - if (elem.getOperator() != CigarOperator.SOFT_CLIP) break; - clipped = elem.getLength(); + if (elem.getOperator() != CigarOperator.SOFT_CLIP && elem.getOperator() != CigarOperator.HARD_CLIP) break; + if (elem.getOperator() == CigarOperator.SOFT_CLIP) clipped = elem.getLength(); } return clipped; diff --git a/src/test/java/picard/sam/AbstractAlignmentMergerTest.java b/src/test/java/picard/sam/AbstractAlignmentMergerTest.java index 6481a6bea..828d1c466 100644 --- a/src/test/java/picard/sam/AbstractAlignmentMergerTest.java +++ b/src/test/java/picard/sam/AbstractAlignmentMergerTest.java @@ -49,4 +49,17 @@ Assert.assertEquals(r2.getAlignmentStart(), 100); Assert.assertEquals(r2.getCigarString(), "20S100M"); } + + @Test public void testOverlappedReadClippingWithExistingSoftClippingAndHardClipping() { + final SAMRecordSetBuilder set = new SAMRecordSetBuilder(); + set.setReadLength(120); + final List recs = set.addPair("q1", 0, 100, 95, false, false, "110M10S5H", "5H15S105M", false, true, 30); + final SAMRecord r1 = recs.get(0); + final SAMRecord r2 = recs.get(1); + AbstractAlignmentMerger.clipForOverlappingReads(r1, r2); + Assert.assertEquals(r1.getAlignmentStart(), 100); + Assert.assertEquals(r1.getCigarString(), "100M20S"); // Should ideally be 100M20S5H + Assert.assertEquals(r2.getAlignmentStart(), 100); + Assert.assertEquals(r2.getCigarString(), "20S100M"); // Should ideally be 5H20S100M + } }