diff --git a/src/main/java/htsjdk/samtools/BAMRecord.java b/src/main/java/htsjdk/samtools/BAMRecord.java index 672e802c3..14b629595 100644 --- a/src/main/java/htsjdk/samtools/BAMRecord.java +++ b/src/main/java/htsjdk/samtools/BAMRecord.java @@ -342,7 +342,12 @@ private String decodeReadName() { return NULL_SEQUENCE; } final int basesOffset = readNameSize() + cigarSize(); - return SAMUtils.compressedBasesToBytes(mReadLength, mRestOfBinaryData, basesOffset); + try { + return SAMUtils.compressedBasesToBytes(mReadLength, mRestOfBinaryData, basesOffset); + } catch ( final IllegalArgumentException ex ) { + final String msg = ex.getMessage() + " in read: " + getReadName(); + throw new IllegalStateException(msg, ex); + } } /* methods for computing disk size of variably-sized elements, in order to locate diff --git a/src/main/java/htsjdk/samtools/BAMRecordCodec.java b/src/main/java/htsjdk/samtools/BAMRecordCodec.java index 5b0a408f6..e363a5b95 100644 --- a/src/main/java/htsjdk/samtools/BAMRecordCodec.java +++ b/src/main/java/htsjdk/samtools/BAMRecordCodec.java @@ -154,7 +154,12 @@ public void encode(final SAMRecord alignment) { // that it is specced as a uint. this.binaryCodec.writeInt(cigarElement); } - this.binaryCodec.writeBytes(SAMUtils.bytesToCompressedBases(alignment.getReadBases())); + try { + this.binaryCodec.writeBytes(SAMUtils.bytesToCompressedBases(alignment.getReadBases())); + } catch ( final IllegalArgumentException ex ) { + final String msg = ex.getMessage() + " in read: " + alignment.getReadName(); + throw new IllegalStateException(msg, ex); + } byte[] qualities = alignment.getBaseQualities(); if (qualities.length == 0) { qualities = new byte[alignment.getReadLength()]; diff --git a/src/main/java/htsjdk/samtools/SAMUtils.java b/src/main/java/htsjdk/samtools/SAMUtils.java index 25b6799c7..d439a4a83 100644 --- a/src/main/java/htsjdk/samtools/SAMUtils.java +++ b/src/main/java/htsjdk/samtools/SAMUtils.java @@ -111,8 +111,8 @@ public static final int MAX_PHRED_SCORE = 93; /** - * Convert from a byte array containing =AaCcGgTtNn represented as ASCII, to a byte array half as long, - * with =, A, C, G, T converted to 0, 1, 2, 4, 8, 15. + * Convert from a byte array containing =AaCcGgTtNnMmRrSsVvWwYyHhKkDdBb represented as ASCII, to a byte array half as long, + * with for example, =, A, C, G, T converted to 0, 1, 2, 4, 8, 15. * * @param readBases Bases as ASCII bytes. * @return New byte array with bases represented as nybbles, in BAM binary format. @@ -126,13 +126,13 @@ } // Last nybble if (i == readBases.length) { - compressedBases[i / 2] = charToCompressedBaseHigh((char) readBases[i - 1]); + compressedBases[i / 2] = charToCompressedBaseHigh(readBases[i - 1]); } return compressedBases; } /** - * Convert from a byte array with basese stored in nybbles, with =, A, C, G, T represented as 0, 1, 2, 4, 8, 15, + * Convert from a byte array with bases stored in nybbles, with for example,=, A, C, G, T, N represented as 0, 1, 2, 4, 8, 15, * to a a byte array containing =AaCcGgTtNn represented as ASCII. * * @param length Number of bases (not bytes) to convert. @@ -158,10 +158,11 @@ /** * Convert from ASCII byte to BAM nybble representation of a base in low-order nybble. * - * @param base One of =AaCcGgTtNn. + * @param base One of =AaCcGgTtNnMmRrSsVvWwYyHhKkDdBb. * @return Low-order nybble-encoded equivalent. + * @throws IllegalArgumentException if the base is not one of =AaCcGgTtNnMmRrSsVvWwYyHhKkDdBb. */ - private static byte charToCompressedBaseLow(final int base) { + private static byte charToCompressedBaseLow(final byte base) { switch (base) { case '=': return COMPRESSED_EQUAL_LOW; @@ -214,17 +215,18 @@ private static byte charToCompressedBaseLow(final int base) { case 'b': return COMPRESSED_B_LOW; default: - throw new IllegalArgumentException("Bad byte passed to charToCompressedBase: " + base); + throw new IllegalArgumentException("Bad base passed to charToCompressedBaseLow: " + Character.toString((char)base) + "(" + base + ")"); } } /** * Convert from ASCII byte to BAM nybble representation of a base in high-order nybble. * - * @param base One of =AaCcGgTtNn. + * @param base One of =AaCcGgTtNnMmRrSsVvWwYyHhKkDdBb. * @return High-order nybble-encoded equivalent. + * @throws IllegalArgumentException if the base is not one of =AaCcGgTtNnMmRrSsVvWwYyHhKkDdBb. */ - private static byte charToCompressedBaseHigh(final int base) { + private static byte charToCompressedBaseHigh(final byte base) { switch (base) { case '=': return COMPRESSED_EQUAL_HIGH; @@ -277,20 +279,21 @@ private static byte charToCompressedBaseHigh(final int base) { case 'b': return COMPRESSED_B_HIGH; default: - throw new IllegalArgumentException("Bad byte passed to charToCompressedBase: " + base); + throw new IllegalArgumentException("Bad base passed to charToCompressedBaseHigh: " + Character.toString((char)base) + "(" + base + ")"); } } /** * Returns the byte corresponding to a certain nybble * @param base One of COMPRESSED_*_LOW, a low-order nybble encoded base. - * @return ASCII base, one of ACGTN=. + * @return ASCII base, one of =ACGTNMRSVWYHKDB. + * @throws IllegalArgumentException if the base is not one of =ACGTNMRSVWYHKDB. */ private static byte compressedBaseToByte(byte base){ try{ return COMPRESSED_LOOKUP_TABLE[base]; }catch(IndexOutOfBoundsException e){ - throw new IllegalArgumentException("Bad byte passed to charToCompressedBase: " + base); + throw new IllegalArgumentException("Bad base passed to charToCompressedBase: " + Character.toString((char)base) + "(" + base + ")"); } } diff --git a/src/test/java/htsjdk/samtools/SAMUtilsTest.java b/src/test/java/htsjdk/samtools/SAMUtilsTest.java index 3be7e390c..e3fe72656 100644 --- a/src/test/java/htsjdk/samtools/SAMUtilsTest.java +++ b/src/test/java/htsjdk/samtools/SAMUtilsTest.java @@ -24,8 +24,10 @@ package htsjdk.samtools; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.util.Arrays; import java.util.List; public class SAMUtilsTest { @@ -244,7 +246,41 @@ public void testOtherCanonicalAlignments() { Assert.assertEquals(other.getAttribute(SAMTagUtil.getSingleton().NM),null); Assert.assertEquals(other.getCigarString(),"8M2S"); Assert.assertEquals(other.getInferredInsertSize(),-91);//100(mate) - 191(other) + } + + @Test() + public void testBytesToCompressedBases() { + final byte[] bases = new byte[]{'=', 'a', 'A', 'c', 'C', 'g', 'G', 't', 'T', 'n', 'N', '.', 'M', 'm', + 'R', 'r', 'S', 's', 'V', 'v', 'W', 'w', 'Y', 'y', 'H', 'h', 'K', 'k', 'D', 'd', 'B', 'b'}; + final byte[] compressedBases = SAMUtils.bytesToCompressedBases(bases); + String expectedCompressedBases = "[1, 18, 36, 72, -113, -1, 51, 85, 102, 119, -103, -86, -69, -52, -35, -18]"; + Assert.assertEquals(Arrays.toString(compressedBases), expectedCompressedBases); + } + + @DataProvider + public Object[][] testBadBase() { + return new Object[][]{ + {new byte[]{'>', 'A'}, '>'}, + {new byte[]{'A', '>'} , '>'} + }; + } + @Test(dataProvider = "testBadBase", expectedExceptions = IllegalArgumentException.class) + public void testBytesToCompressedBasesException(final byte[] bases, final char failingBase) { + try { + SAMUtils.bytesToCompressedBases(bases); + } catch ( final IllegalArgumentException ex ) { + Assert.assertTrue(ex.getMessage().contains(Character.toString(failingBase))); + throw ex; + } } + @Test + public void testCompressedBasesToBytes() { + final byte[] compressedBases = new byte[]{1, 18, 36, 72, -113, -1, 51, 85, 102, 119, -103, -86, -69, -52, -35, -18}; + final byte[] bytes = SAMUtils.compressedBasesToBytes(2*compressedBases.length, compressedBases, 0); + final byte[] expectedBases = new byte[]{'=', 'A', 'A', 'C', 'C', 'G', 'G', 'T', 'T', 'N', 'N', 'N', 'M', 'M', + 'R', 'R', 'S', 'S', 'V', 'V', 'W', 'W', 'Y', 'Y', 'H', 'H', 'K', 'K', 'D', 'D', 'B', 'B'}; + Assert.assertEquals(new String(bytes), new String(expectedBases)); + } }