diff --git a/src/main/java/picard/sam/AbstractAlignmentMerger.java b/src/main/java/picard/sam/AbstractAlignmentMerger.java index 484a9b568..ec4dfae50 100644 --- a/src/main/java/picard/sam/AbstractAlignmentMerger.java +++ b/src/main/java/picard/sam/AbstractAlignmentMerger.java @@ -58,6 +58,8 @@ import java.io.File; import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; import java.util.List; /** @@ -590,11 +592,10 @@ private void transferAlignmentInfoToPairedRead(final SAMRecord firstUnaligned, f * Checks to see whether the ends of the reads overlap and soft clips reads * them if necessary. */ - protected void clipForOverlappingReads(final SAMRecord read1, final SAMRecord read2) { + protected static void clipForOverlappingReads(final SAMRecord read1, final SAMRecord read2) { // If both reads are mapped, see if we need to clip the ends due to small // insert size if (!(read1.getReadUnmappedFlag() || read2.getReadUnmappedFlag())) { - if (read1.getReadNegativeStrandFlag() != read2.getReadNegativeStrandFlag()) { final SAMRecord pos = (read1.getReadNegativeStrandFlag()) ? read2 : read1; final SAMRecord neg = (read1.getReadNegativeStrandFlag()) ? read1 : read2; @@ -605,23 +606,35 @@ protected void clipForOverlappingReads(final SAMRecord read1, final SAMRecord re final int negDiff = pos.getAlignmentStart() - neg.getAlignmentStart(); if (posDiff > 0) { - CigarUtil.softClip3PrimeEndOfRead(pos, Math.min(pos.getReadLength(), - pos.getReadLength() - posDiff + 1)); + final List elems = new ArrayList<>(pos.getCigar().getCigarElements()); + Collections.reverse(elems); + final int clipped = lengthOfSoftClipping(elems.iterator()); + final int clipFrom = pos.getReadLength() - posDiff - clipped + 1; + CigarUtil.softClip3PrimeEndOfRead(pos, Math.min(pos.getReadLength(), clipFrom)); removeNmMdAndUqTags(pos); // these tags are now invalid! } if (negDiff > 0) { - CigarUtil.softClip3PrimeEndOfRead(neg, Math.min(neg.getReadLength(), - neg.getReadLength() - negDiff + 1)); + final int clipped = lengthOfSoftClipping(neg.getCigar().getCigarElements().iterator()); + final int clipFrom = neg.getReadLength() - negDiff - clipped + 1; + CigarUtil.softClip3PrimeEndOfRead(neg, Math.min(neg.getReadLength(), clipFrom)); removeNmMdAndUqTags(neg); // these tags are now invalid! } - } - } else { - // TODO: What about RR/FF pairs? } } + } + + /** Returns the number of soft-clipped bases until a non-soft-clipping element is encountered. */ + private static int lengthOfSoftClipping(Iterator iterator) { + int clipped = 0; + while (iterator.hasNext()) { + final CigarElement elem = iterator.next(); + if (elem.getOperator() != CigarOperator.SOFT_CLIP && elem.getOperator() != CigarOperator.HARD_CLIP) break; + if (elem.getOperator() == CigarOperator.SOFT_CLIP) clipped = elem.getLength(); + } + return clipped; } /** diff --git a/src/test/java/picard/sam/AbstractAlignmentMergerTest.java b/src/test/java/picard/sam/AbstractAlignmentMergerTest.java new file mode 100644 index 000000000..828d1c466 --- /dev/null +++ b/src/test/java/picard/sam/AbstractAlignmentMergerTest.java @@ -0,0 +1,65 @@ +package picard.sam; + +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMRecordSetBuilder; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.util.List; + +/** + * Tests related to code in AbstractAlignmentMerger + */ +public class AbstractAlignmentMergerTest { + @Test public void tesOverlappedReadClippingWithNonOverlappedReads() { + final SAMRecordSetBuilder set = new SAMRecordSetBuilder(); + set.setReadLength(110); + final List recs = set.addPair("q1", 0, 100, 200, false, false, "110M", "110M", false, true, 30); + final SAMRecord r1 = recs.get(0); + final SAMRecord r2 = recs.get(1); + AbstractAlignmentMerger.clipForOverlappingReads(r1, r2); + Assert.assertEquals(r1.getAlignmentStart(), 100); + Assert.assertEquals(r1.getCigarString(), "110M"); + Assert.assertEquals(r2.getAlignmentStart(), 200); + Assert.assertEquals(r2.getCigarString(), "110M"); + } + + @Test public void testBasicOverlappedReadClipping() { + final SAMRecordSetBuilder set = new SAMRecordSetBuilder(); + set.setReadLength(110); + final List recs = set.addPair("q1", 0, 100, 90, false, false, "110M", "110M", false, true, 30); + final SAMRecord r1 = recs.get(0); + final SAMRecord r2 = recs.get(1); + AbstractAlignmentMerger.clipForOverlappingReads(r1, r2); + Assert.assertEquals(r1.getAlignmentStart(), 100); + Assert.assertEquals(r1.getCigarString(), "100M10S"); + Assert.assertEquals(r2.getAlignmentStart(), 100); + Assert.assertEquals(r2.getCigarString(), "10S100M"); + } + + @Test public void testOverlappedReadClippingWithExistingSoftClipping() { + final SAMRecordSetBuilder set = new SAMRecordSetBuilder(); + set.setReadLength(120); + final List recs = set.addPair("q1", 0, 100, 95, false, false, "110M10S", "15S105M", false, true, 30); + final SAMRecord r1 = recs.get(0); + final SAMRecord r2 = recs.get(1); + AbstractAlignmentMerger.clipForOverlappingReads(r1, r2); + Assert.assertEquals(r1.getAlignmentStart(), 100); + Assert.assertEquals(r1.getCigarString(), "100M20S"); + Assert.assertEquals(r2.getAlignmentStart(), 100); + Assert.assertEquals(r2.getCigarString(), "20S100M"); + } + + @Test public void testOverlappedReadClippingWithExistingSoftClippingAndHardClipping() { + final SAMRecordSetBuilder set = new SAMRecordSetBuilder(); + set.setReadLength(120); + final List recs = set.addPair("q1", 0, 100, 95, false, false, "110M10S5H", "5H15S105M", false, true, 30); + final SAMRecord r1 = recs.get(0); + final SAMRecord r2 = recs.get(1); + AbstractAlignmentMerger.clipForOverlappingReads(r1, r2); + Assert.assertEquals(r1.getAlignmentStart(), 100); + Assert.assertEquals(r1.getCigarString(), "100M20S"); // Should ideally be 100M20S5H + Assert.assertEquals(r2.getAlignmentStart(), 100); + Assert.assertEquals(r2.getCigarString(), "20S100M"); // Should ideally be 5H20S100M + } +}