diff --git a/src/main/java/htsjdk/samtools/SAMRecord.java b/src/main/java/htsjdk/samtools/SAMRecord.java index a22ded540..3bb11ec2b 100644 --- a/src/main/java/htsjdk/samtools/SAMRecord.java +++ b/src/main/java/htsjdk/samtools/SAMRecord.java @@ -26,17 +26,12 @@ import htsjdk.samtools.util.CoordMath; import htsjdk.samtools.util.Locatable; +import htsjdk.samtools.util.SequenceUtil; import htsjdk.samtools.util.StringUtil; import java.io.Serializable; import java.lang.reflect.Array; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; /** @@ -161,6 +156,16 @@ */ public static final int MAX_INSERT_SIZE = 1<<29; + /** + * Tags that are known to need the reverse complement if the read is reverse complemented. + */ + public static List TAGS_TO_REVERSE_COMPLEMENT = Arrays.asList(SAMTag.E2.name(), SAMTag.SQ.name()); + + /** + * Tags that are known to need the reverse if the read is reverse complemented. + */ + public static List TAGS_TO_REVERSE = Arrays.asList(SAMTag.OQ.name(), SAMTag.U2.name()); + private String mReadName = null; private byte[] mReadBases = NULL_SEQUENCE; private byte[] mBaseQualities = NULL_QUALS; @@ -2112,7 +2117,8 @@ private String buildMessage(final String baseMessage, final boolean isMate) { /** * Note that this does a shallow copy of everything, except for the attribute list, for which a copy of the list * is made, but the attributes themselves are copied by reference. This should be safe because callers should - * never modify a mutable value returned by any of the get() methods anyway. + * never modify a mutable value returned by any of the get() methods anyway. If one of the cloned record's SEQ or + * QUAL needs to be modified, a deeper copy should be made (e.g. Reverse Complement). */ @Override public Object clone() throws CloneNotSupportedException { @@ -2248,5 +2254,125 @@ public final Object removeTransientAttribute(final Object key) { if (this.transientAttributes != null) return this.transientAttributes.remove(key); else return null; } -} + /** + * Reverse-complement bases and reverse quality scores along with known optional attributes that + * need the same treatment. Changes made after making a copy of the bases, qualities, + * and any attributes that will be altered. If in-place update is needed use + * {@link #reverseComplement(boolean)}. + * See {@link #TAGS_TO_REVERSE_COMPLEMENT} {@link #TAGS_TO_REVERSE} + * for the default set of tags that are handled. + */ + public void reverseComplement() { + reverseComplement(false); + } + + /** + * Reverse-complement bases and reverse quality scores along with known optional attributes that + * need the same treatment. Optionally makes a copy of the bases, qualities or attributes instead + * of altering them in-place. See {@link #TAGS_TO_REVERSE_COMPLEMENT} {@link #TAGS_TO_REVERSE} + * for the default set of tags that are handled. + * + * @param inplace Setting this to false will clone all attributes, bases and qualities before changing the values. + */ + public void reverseComplement(boolean inplace) { + reverseComplement(TAGS_TO_REVERSE_COMPLEMENT, TAGS_TO_REVERSE, inplace); + } + + /** + * Reverse complement bases and reverse quality scores. In addition reverse complement any + * non-null attributes specified by tagsToRevcomp and reverse and non-null attributes + * specified by tagsToReverse. + */ + public void reverseComplement(final Collection tagsToRevcomp, final Collection tagsToReverse, boolean inplace) { + final byte[] readBases = inplace ? getReadBases() : getReadBases().clone(); + SequenceUtil.reverseComplement(readBases); + setReadBases(readBases); + final byte qualities[] = inplace ? getBaseQualities() : getBaseQualities().clone(); + reverseArray(qualities); + setBaseQualities(qualities); + + // Deal with tags that need to be reverse complemented + if (tagsToRevcomp != null) { + for (final String tag: tagsToRevcomp) { + Object value = getAttribute(tag); + if (value != null) { + if (value instanceof byte[]) { + value = inplace ? value : ((byte[]) value).clone(); + SequenceUtil.reverseComplement((byte[]) value); + } else if (value instanceof String) { + //SequenceUtil.reverseComplement is in-place for bytes but copies Strings since they are immutable. + value = SequenceUtil.reverseComplement((String) value); + } else { + throw new UnsupportedOperationException("Don't know how to reverse complement: " + value); + } + setAttribute(tag, value); + } + } + } + + // Deal with tags that needed to just be reversed + if (tagsToReverse != null) { + for (final String tag : tagsToReverse) { + Object value = getAttribute(tag); + if (value != null) { + if (value instanceof String) { + value = StringUtil.reverseString((String) value); + } else if (value.getClass().isArray()) { + if (value instanceof byte[]) { + value = inplace ? value : ((byte[]) value).clone(); + reverseArray((byte[]) value); + } else if (value instanceof short[]) { + value = inplace ? value : ((short[]) value).clone(); + reverseArray((short[]) value); + } else if (value instanceof int[]) { + value = inplace ? value : ((int[]) value).clone(); + reverseArray((int[]) value); + } else if (value instanceof float[]) { + value = inplace ? value : ((float[]) value).clone(); + reverseArray((float[]) value); + } else { + throw new UnsupportedOperationException("Reversing array attribute of type " + value.getClass().getComponentType() + " not supported."); + } + } else { + throw new UnsupportedOperationException("Don't know how to reverse: " + value); + } + + setAttribute(tag, value); + } + } + } + } + + private static void reverseArray(final byte[] array) { + for (int i=0, j=array.length-1; i TAGS_TO_REVERSE_COMPLEMENT = Arrays.asList(SAMTag.E2.name(), SAMTag.SQ.name()); public static List TAGS_TO_REVERSE = Arrays.asList(SAMTag.OQ.name(), SAMTag.U2.name()); /** * Reverse-complement bases and reverse quality scores along with known optional attributes that - * need the same treatment. See {@link #TAGS_TO_REVERSE_COMPLEMENT} {@link #TAGS_TO_REVERSE} + * need the same treatment. Changes made in-place, instead of making a copy of the bases, qualities, + * or attributes. If a copy is needed use {@link #reverseComplement(SAMRecord, boolean)}. + * See {@link #TAGS_TO_REVERSE_COMPLEMENT} {@link #TAGS_TO_REVERSE} * for the default set of tags that are handled. */ public static void reverseComplement(final SAMRecord rec) { - reverseComplement(rec, TAGS_TO_REVERSE_COMPLEMENT, TAGS_TO_REVERSE); + rec.reverseComplement(TAGS_TO_REVERSE_COMPLEMENT, TAGS_TO_REVERSE, true); + } + + /** + * Reverse-complement bases and reverse quality scores along with known optional attributes that + * need the same treatment. Optionally makes a copy of the bases, qualities or attributes instead + * of altering them in-place. See {@link #TAGS_TO_REVERSE_COMPLEMENT} {@link #TAGS_TO_REVERSE} + * for the default set of tags that are handled. + * + * @param rec Record to reverse complement. + * @param inplace Setting this to false will clone all attributes, bases and qualities before changing the values. + */ + public static void reverseComplement(final SAMRecord rec, boolean inplace) { + rec.reverseComplement(TAGS_TO_REVERSE_COMPLEMENT, TAGS_TO_REVERSE, inplace); } /** @@ -51,79 +71,7 @@ public static void reverseComplement(final SAMRecord rec) { * non-null attributes specified by tagsToRevcomp and reverse and non-null attributes * specified by tagsToReverse. */ - public static void reverseComplement(final SAMRecord rec, final Collection tagsToRevcomp, final Collection tagsToReverse) { - final byte[] readBases = rec.getReadBases(); - SequenceUtil.reverseComplement(readBases); - rec.setReadBases(readBases); - final byte qualities[] = rec.getBaseQualities(); - reverseArray(qualities); - rec.setBaseQualities(qualities); - - // Deal with tags that need to be reverse complemented - if (tagsToRevcomp != null) { - for (final String tag: tagsToRevcomp) { - Object value = rec.getAttribute(tag); - if (value != null) { - if (value instanceof byte[]) SequenceUtil.reverseComplement((byte[]) value); - else if (value instanceof String) value = SequenceUtil.reverseComplement((String) value); - else throw new UnsupportedOperationException("Don't know how to reverse complement: " + value); - rec.setAttribute(tag, value); - } - } - } - - // Deal with tags that needed to just be reversed - if (tagsToReverse != null) { - for (final String tag : tagsToReverse) { - Object value = rec.getAttribute(tag); - if (value != null) { - if (value instanceof String) { - value = StringUtil.reverseString((String) value); - } - else if (value.getClass().isArray()) { - if (value instanceof byte[]) reverseArray((byte[]) value); - else if (value instanceof short[]) reverseArray((short[]) value); - else if (value instanceof int[]) reverseArray((int[]) value); - else if (value instanceof float[]) reverseArray((float[]) value); - else throw new UnsupportedOperationException("Reversing array attribute of type " + value.getClass().getComponentType() + " not supported."); - } - else throw new UnsupportedOperationException("Don't know how to reverse: " + value); - - rec.setAttribute(tag, value); - } - } - } - } - - private static void reverseArray(final byte[] array) { - for (int i=0, j=array.length-1; i tagsToRevcomp, final Collection tagsToReverse, boolean inplace) { + rec.reverseComplement(tagsToRevcomp, tagsToReverse, inplace); } } diff --git a/src/test/java/htsjdk/samtools/SAMRecordUnitTest.java b/src/test/java/htsjdk/samtools/SAMRecordUnitTest.java index 15b732607..951ecee78 100644 --- a/src/test/java/htsjdk/samtools/SAMRecordUnitTest.java +++ b/src/test/java/htsjdk/samtools/SAMRecordUnitTest.java @@ -973,4 +973,69 @@ public void testResolveNameNullHeader() { SAMRecord.resolveNameFromIndex(1, null); } + @Test + public void testReverseComplement() { + final SAMRecord rec = createTestSamRec(); + + rec.reverseComplement(Arrays.asList("Y1"), Arrays.asList("X1", "X2", "X3", "X4", "X5"), false); + Assert.assertEquals(rec.getReadString(), "GTGTGTGTGT"); + Assert.assertEquals(rec.getBaseQualityString(), "IIIIIHHHHH"); + Assert.assertEquals(rec.getByteArrayAttribute("X1"), new byte[] {5,4,3,2,1}); + Assert.assertEquals(rec.getSignedShortArrayAttribute("X2"), new short[] {5,4,3,2,1}); + Assert.assertEquals(rec.getSignedIntArrayAttribute("X3"), new int[] {5,4,3,2,1}); + Assert.assertEquals(rec.getFloatArrayAttribute("X4"), new float[] {5.0f,4.0f,3.0f,2.0f,1.0f}); + Assert.assertEquals(rec.getStringAttribute("Y1"), "GTTTTCTTTT"); + } + + /** + * Note that since strings are immutable the Y1 attribute, which is a String, is not reversed in the original even + * if an in-place reverse complement occurred. The bases and qualities are byte[] so they are reversed if in-place + * is true. + */ + @DataProvider + public Object [][] reverseComplementData() { + return new Object[][]{ + {false, "ACACACACAC", "HHHHHIIIII", "AAAAGAAAAC", new byte[] {1,2,3,4,5}, new short[] {1,2,3,4,5}, new int[] {1,2,3,4,5}, new float[] {1,2,3,4,5}}, + {true, "GTGTGTGTGT", "IIIIIHHHHH", "AAAAGAAAAC", new byte[] {5,4,3,2,1}, new short[] {5,4,3,2,1}, new int[] {5,4,3,2,1}, new float[] {5,4,3,2,1}}, + }; + } + + @Test(dataProvider = "reverseComplementData") + public void testSafeReverseComplement(boolean inplace, String bases, String quals, String y1, byte[] x1, short[] x2, int[] x3, float[] x4) throws CloneNotSupportedException { + final SAMRecord original = createTestSamRec(); + final SAMRecord cloneOfOriginal = (SAMRecord) original.clone(); + //Runs a copy (rather than in-place) reverseComplement + cloneOfOriginal.reverseComplement(Arrays.asList("Y1"), Arrays.asList("X1", "X2", "X3", "X4", "X5"), inplace); + + Assert.assertEquals(original.getReadString(), bases); + Assert.assertEquals(original.getBaseQualityString(), quals); + Assert.assertEquals(original.getByteArrayAttribute("X1"), x1); + Assert.assertEquals(original.getSignedShortArrayAttribute("X2"), x2); + Assert.assertEquals(original.getSignedIntArrayAttribute("X3"), x3); + Assert.assertEquals(original.getFloatArrayAttribute("X4"), x4); + Assert.assertEquals(original.getStringAttribute("Y1"), y1); + + Assert.assertEquals(cloneOfOriginal.getReadString(), "GTGTGTGTGT"); + Assert.assertEquals(cloneOfOriginal.getBaseQualityString(), "IIIIIHHHHH"); + Assert.assertEquals(cloneOfOriginal.getByteArrayAttribute("X1"), new byte[] {5,4,3,2,1}); + Assert.assertEquals(cloneOfOriginal.getSignedShortArrayAttribute("X2"), new short[] {5,4,3,2,1}); + Assert.assertEquals(cloneOfOriginal.getSignedIntArrayAttribute("X3"), new int[] {5,4,3,2,1}); + Assert.assertEquals(cloneOfOriginal.getFloatArrayAttribute("X4"), new float[] {5.0f,4.0f,3.0f,2.0f,1.0f}); + Assert.assertEquals(cloneOfOriginal.getStringAttribute("Y1"), "GTTTTCTTTT"); + + } + + public SAMRecord createTestSamRec() { + final SAMFileHeader header = new SAMFileHeader(); + final SAMRecord rec = new SAMRecord(header); + rec.setReadString("ACACACACAC"); + rec.setBaseQualityString("HHHHHIIIII"); + rec.setAttribute("X1", new byte[] {1,2,3,4,5}); + rec.setAttribute("X2", new short[] {1,2,3,4,5}); + rec.setAttribute("X3", new int[] {1,2,3,4,5}); + rec.setAttribute("X4", new float[] {1.0f,2.0f,3.0f,4.0f,5.0f}); + rec.setAttribute("Y1", "AAAAGAAAAC"); + + return(rec); + } } diff --git a/src/test/java/htsjdk/samtools/SAMRecordUtilTest.java b/src/test/java/htsjdk/samtools/SAMRecordUtilTest.java deleted file mode 100644 index eb3712fc1..000000000 --- a/src/test/java/htsjdk/samtools/SAMRecordUtilTest.java +++ /dev/null @@ -1,29 +0,0 @@ -package htsjdk.samtools; - -import org.testng.Assert; -import org.testng.annotations.Test; - -import java.util.Arrays; - -public class SAMRecordUtilTest { - @Test public void testReverseComplement() { - final SAMFileHeader header = new SAMFileHeader(); - final SAMRecord rec = new SAMRecord(header); - rec.setReadString("ACACACACAC"); - rec.setBaseQualityString("HHHHHIIIII"); - rec.setAttribute("X1", new byte[] {1,2,3,4,5}); - rec.setAttribute("X2", new short[] {1,2,3,4,5}); - rec.setAttribute("X3", new int[] {1,2,3,4,5}); - rec.setAttribute("X4", new float[] {1.0f,2.0f,3.0f,4.0f,5.0f}); - rec.setAttribute("Y1", "AAAAGAAAAC"); - - SAMRecordUtil.reverseComplement(rec, Arrays.asList("Y1"), Arrays.asList("X1", "X2", "X3", "X4", "X5")); - Assert.assertEquals(rec.getReadString(), "GTGTGTGTGT"); - Assert.assertEquals(rec.getBaseQualityString(), "IIIIIHHHHH"); - Assert.assertEquals(rec.getByteArrayAttribute("X1"), new byte[] {5,4,3,2,1}); - Assert.assertEquals(rec.getSignedShortArrayAttribute("X2"), new short[] {5,4,3,2,1}); - Assert.assertEquals(rec.getSignedIntArrayAttribute("X3"), new int[] {5,4,3,2,1}); - Assert.assertEquals(rec.getFloatArrayAttribute("X4"), new float[] {5.0f,4.0f,3.0f,2.0f,1.0f}); - Assert.assertEquals(rec.getStringAttribute("Y1"), "GTTTTCTTTT"); - } -}