From 28bc84d9af689e3a4d5dfdf777a1199ea8c04968 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Fri, 15 Apr 2016 17:24:01 +0200 Subject: [PATCH 1/6] FastqRecord refactoring and FastqCodec for encode/decode --- .../htsjdk/samtools/fastq/BasicFastqWriter.java | 7 +- .../java/htsjdk/samtools/fastq/FastqCodec.java | 80 ++++++++ .../java/htsjdk/samtools/fastq/FastqConstants.java | 4 +- .../java/htsjdk/samtools/fastq/FastqRecord.java | 216 +++++++++++++++------ .../java/htsjdk/samtools/util/SequenceUtil.java | 3 +- .../java/htsjdk/samtools/fastq/FastqCodecTest.java | 75 +++++++ .../htsjdk/samtools/fastq/FastqRecordTest.java | 13 +- 7 files changed, 329 insertions(+), 69 deletions(-) create mode 100644 src/main/java/htsjdk/samtools/fastq/FastqCodec.java create mode 100644 src/test/java/htsjdk/samtools/fastq/FastqCodecTest.java diff --git a/src/main/java/htsjdk/samtools/fastq/BasicFastqWriter.java b/src/main/java/htsjdk/samtools/fastq/BasicFastqWriter.java index 8a5afd38a..ab4ad94f9 100644 --- a/src/main/java/htsjdk/samtools/fastq/BasicFastqWriter.java +++ b/src/main/java/htsjdk/samtools/fastq/BasicFastqWriter.java @@ -58,12 +58,7 @@ public BasicFastqWriter(final PrintStream writer) { @Override public void write(final FastqRecord rec) { - writer.print(FastqConstants.SEQUENCE_HEADER); - writer.println(rec.getReadHeader()); - writer.println(rec.getReadString()); - writer.print(FastqConstants.QUALITY_HEADER); - writer.println(rec.getBaseQualityHeader() == null ? "" : rec.getBaseQualityHeader()); - writer.println(rec.getBaseQualityString()); + writer.println(FastqCodec.encode(rec)); if (writer.checkError()) { throw new SAMException("Error in writing fastq file " + path); } diff --git a/src/main/java/htsjdk/samtools/fastq/FastqCodec.java b/src/main/java/htsjdk/samtools/fastq/FastqCodec.java new file mode 100644 index 000000000..772781fc3 --- /dev/null +++ b/src/main/java/htsjdk/samtools/fastq/FastqCodec.java @@ -0,0 +1,80 @@ +/* + * The MIT License + * + * Copyright (c) 2016 Daniel Gomez-Sanchez + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools.fastq; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.util.SequenceUtil; + +/** + * Codec for encode records into FASTQ format. + * + * @author Daniel Gomez-Sanchez (magicDGS) + */ +public class FastqCodec { + + /** + * Encodes a FastqRecord in the String FASTQ format. + */ + public static String encode(final FastqRecord record) { + final String readName = record.getReadName(); + final String readString = record.getReadString(); + final String qualHeader = record.getBaseQualityHeader(); + final String qualityString = record.getBaseQualityString(); + return new StringBuilder() + .append(FastqConstants.SEQUENCE_HEADER).append(readName == null ? "" : readName).append('\n') + .append(readString == null ? "" : readString).append('\n') + .append(FastqConstants.QUALITY_HEADER).append(qualHeader == null ? "" : qualHeader).append('\n') + .append(qualityString == null ? "" : qualityString) + .toString(); + } + + /** + * Converts a {@link SAMRecord} into a {@link FastqRecord}. + */ + public static FastqRecord asFastqRecord(final SAMRecord record) { + String readName = record.getReadName(); + if(record.getReadPairedFlag() && (record.getFirstOfPairFlag() || record.getSecondOfPairFlag())) { + readName += (record.getFirstOfPairFlag()) ? FastqConstants.FIRST_OF_PAIR : FastqConstants.SECOND_OF_PAIR; + } + return new FastqRecord(readName, record.getReadString(), null, record.getBaseQualityString()); + } + + /** + * Converts a {@link FastqRecord} into a simple unmapped {@link SAMRecord}. + */ + public static SAMRecord asSAMRecord(final FastqRecord record, final SAMFileHeader header) { + // construct the SAMRecord and set the unmapped flag + final SAMRecord samRecord = new SAMRecord(header); + samRecord.setReadUnmappedFlag(true); + // get the read name from the FastqRecord correctly formatted + final String readName = SequenceUtil.getSamReadNameFromFastqHeader(record.getReadName()); + // set the basic information from the FastqRecord + samRecord.setReadName(readName); + samRecord.setReadBases(record.getReadBases()); + samRecord.setBaseQualities(record.getBaseQualities()); + return samRecord; + } + +} diff --git a/src/main/java/htsjdk/samtools/fastq/FastqConstants.java b/src/main/java/htsjdk/samtools/fastq/FastqConstants.java index f5d4150ea..4e9b95e5b 100644 --- a/src/main/java/htsjdk/samtools/fastq/FastqConstants.java +++ b/src/main/java/htsjdk/samtools/fastq/FastqConstants.java @@ -29,7 +29,9 @@ public class FastqConstants { public static final String SEQUENCE_HEADER = "@" ; public static final String QUALITY_HEADER = "+" ; - + public static final String FIRST_OF_PAIR = "/1"; + public static final String SECOND_OF_PAIR = "/2"; + public enum FastqExtensions { FASTQ(".fastq"), FASTQ_GZ(".fastq.gz"), diff --git a/src/main/java/htsjdk/samtools/fastq/FastqRecord.java b/src/main/java/htsjdk/samtools/fastq/FastqRecord.java index b1d3f7507..074b6c128 100755 --- a/src/main/java/htsjdk/samtools/fastq/FastqRecord.java +++ b/src/main/java/htsjdk/samtools/fastq/FastqRecord.java @@ -23,62 +23,168 @@ */ package htsjdk.samtools.fastq; +import htsjdk.samtools.SAMUtils; +import htsjdk.samtools.util.StringUtil; + import java.io.Serializable; /** - * Represents a fastq record, fairly literally, i.e. without any conversion. + * Simple representation of a FASTQ record, without any conversion */ public class FastqRecord implements Serializable { private static final long serialVersionUID = 1L; - private final String seqHeaderPrefix; - private final String seqLine; - private final String qualHeaderPrefix; - private final String qualLine; - - public FastqRecord(final String seqHeaderPrefix, final String seqLine, final String qualHeaderPrefix, final String qualLine) { - if (seqHeaderPrefix != null && !seqHeaderPrefix.isEmpty()) this.seqHeaderPrefix = seqHeaderPrefix; - else this.seqHeaderPrefix = null; - if (qualHeaderPrefix != null && !qualHeaderPrefix.isEmpty()) this.qualHeaderPrefix = qualHeaderPrefix; - else this.qualHeaderPrefix = null; - this.seqLine = seqLine ; - this.qualLine = qualLine ; + private final String readName; + private final String readString; + private final String qualityHeader; + private final String baseQualityString; + + /** + * Default constructor + * + * @param readName the read name (without {@link FastqConstants#SEQUENCE_HEADER}) + * @param readBases the read sequence bases + * @param qualityHeader the quality header (without {@link FastqConstants#SEQUENCE_HEADER}) + * @param baseQualities the base quality scores + */ + public FastqRecord(final String readName, final String readBases, final String qualityHeader, final String baseQualities) { + if (readName != null && !readName.isEmpty()) { + this.readName = readName; + } else { + this.readName = null; + } + if (qualityHeader != null && !qualityHeader.isEmpty()) { + this.qualityHeader = qualityHeader; + } else { + this.qualityHeader = null; + } + this.readString = readBases; + this.baseQualityString = baseQualities; } - - /** copy constructor */ + + /** + * Constructor for byte[] arrays + * + * @param readName the read name (without {@link FastqConstants#SEQUENCE_HEADER}) + * @param readBases the read sequence bases as ASCII bytes ACGTN=. + * @param qualityHeader the quality header (without {@link FastqConstants#SEQUENCE_HEADER}) + * @param baseQualities the base qualities as binary PHRED scores (not ASCII) + */ + public FastqRecord(final String readName, final byte[] readBases, final String qualityHeader, final byte[] baseQualities) { + this(readName, StringUtil.bytesToString(readBases), qualityHeader, SAMUtils.phredToFastq(baseQualities)); + } + + /** + * Copy constructor + * + * @param other record to copy + */ public FastqRecord(final FastqRecord other) { - if( other == null ) throw new IllegalArgumentException("new FastqRecord(null)"); - this.seqHeaderPrefix = other.seqHeaderPrefix; - this.seqLine = other.seqLine; - this.qualHeaderPrefix = other.qualHeaderPrefix; - this.qualLine = other.qualLine; + if (other == null) { + throw new IllegalArgumentException("new FastqRecord(null)"); + } + this.readName = other.readName; + this.readString = other.readString; + this.qualityHeader = other.qualityHeader; + this.baseQualityString = other.baseQualityString; + } + + /** + * @return the read name + * @deprecated use {@link #getReadName()} instead + */ + @Deprecated + public String getReadHeader() { + return getReadName(); + } + + /** + * Get the read name + * + * @return the read name + */ + public String getReadName() { + return readName; + } + + /** + * Get the DNA sequence + * + * @return read sequence as a string of ACGTN=. + */ + public String getReadString() { + return readString; + } + + /** + * Get the DNA sequence. + * + * @return read sequence as ASCII bytes ACGTN=. + */ + public byte[] getReadBases() { + return StringUtil.stringToBytes(readString); + } + + /** + * Get the base qualities encoded as a FASTQ string + * + * @return the quality string + */ + public String getBaseQualityString() { + return baseQualityString; + } + + /** + * Get the base qualities as binary PHRED scores (not ASCII) + * + * @return the base quality + */ + public byte[] getBaseQualities() { + return SAMUtils.fastqToPhred(baseQualityString); + } + + /** + * Get the read length + * + * @return number of bases in the read + */ + public int getReadLength() { + return (readString == null) ? 0 : readString.length(); + } + + /** + * Get the base quality header + * + * @return the base quality header + */ + public String getBaseQualityHeader() { + return qualityHeader; + } + + /** + * shortcut to getReadString().length() + * + * @deprecated use {@link #getReadLength()} instead + */ + @Deprecated + public int length() { + return getReadLength(); } - /** @return the read name */ - public String getReadHeader() { return seqHeaderPrefix; } - /** @return the read DNA sequence */ - public String getReadString() { return seqLine; } - /** @return the quality header */ - public String getBaseQualityHeader() { return qualHeaderPrefix; } - /** @return the quality string */ - public String getBaseQualityString() { return qualLine; } - /** shortcut to getReadString().length() */ - public int length() { return this.seqLine==null?0:this.seqLine.length();} - @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result - + ((qualHeaderPrefix == null) ? 0 : qualHeaderPrefix.hashCode()); + + ((qualityHeader == null) ? 0 : qualityHeader.hashCode()); result = prime * result - + ((qualLine == null) ? 0 : qualLine.hashCode()); + + ((baseQualityString == null) ? 0 : baseQualityString.hashCode()); result = prime * result - + ((seqHeaderPrefix == null) ? 0 : seqHeaderPrefix.hashCode()); - result = prime * result + ((seqLine == null) ? 0 : seqLine.hashCode()); + + ((readName == null) ? 0 : readName.hashCode()); + result = prime * result + ((readString == null) ? 0 : readString.hashCode()); return result; } - + @Override public boolean equals(Object obj) { if (this == obj) @@ -88,37 +194,33 @@ public boolean equals(Object obj) { if (getClass() != obj.getClass()) return false; FastqRecord other = (FastqRecord) obj; - if (seqLine == null) { - if (other.seqLine != null) + if (readString == null) { + if (other.readString != null) return false; - } else if (!seqLine.equals(other.seqLine)) + } else if (!readString.equals(other.readString)) return false; - if (qualHeaderPrefix == null) { - if (other.qualHeaderPrefix != null) + if (qualityHeader == null) { + if (other.qualityHeader != null) return false; - } else if (!qualHeaderPrefix.equals(other.qualHeaderPrefix)) + } else if (!qualityHeader.equals(other.qualityHeader)) return false; - if (qualLine == null) { - if (other.qualLine != null) + if (baseQualityString == null) { + if (other.baseQualityString != null) return false; - } else if (!qualLine.equals(other.qualLine)) + } else if (!baseQualityString.equals(other.baseQualityString)) return false; - if (seqHeaderPrefix == null) { - if (other.seqHeaderPrefix != null) + if (readName == null) { + if (other.readName != null) return false; - } else if (!seqHeaderPrefix.equals(other.seqHeaderPrefix)) + } else if (!readName.equals(other.readName)) return false; - + return true; } - + + /** Simple toString() that gives a read name and length */ @Override public String toString() { - return new StringBuilder(). - append(FastqConstants.SEQUENCE_HEADER).append(this.seqHeaderPrefix==null?"":this.seqHeaderPrefix).append('\n'). - append(this.seqLine==null?"":this.seqLine).append('\n'). - append(FastqConstants.QUALITY_HEADER).append(this.qualHeaderPrefix==null?"":this.qualHeaderPrefix).append('\n'). - append(this.qualLine==null?"":this.qualLine). - toString(); - } + return String.format("%s: %s bp", readName, getReadLength()); + } } diff --git a/src/main/java/htsjdk/samtools/util/SequenceUtil.java b/src/main/java/htsjdk/samtools/util/SequenceUtil.java index 92c1a507d..7088217da 100644 --- a/src/main/java/htsjdk/samtools/util/SequenceUtil.java +++ b/src/main/java/htsjdk/samtools/util/SequenceUtil.java @@ -32,6 +32,7 @@ import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.SAMTag; +import htsjdk.samtools.fastq.FastqConstants; import java.io.File; import java.math.BigInteger; @@ -1005,7 +1006,7 @@ public static String getSamReadNameFromFastqHeader(final String fastqHeader) { // NOTE: the while loop isn't necessarily the most efficient way to handle this but we don't // expect this to ever happen more than once, just trapping pathological cases - while ((readName.endsWith("/1") || readName.endsWith("/2"))) { + while ((readName.endsWith(FastqConstants.FIRST_OF_PAIR) || readName.endsWith(FastqConstants.SECOND_OF_PAIR))) { // If this is an unpaired run we want to make sure that "/1" isn't tacked on the end of the read name, // as this can cause problems down the road (ex. in Picard's MergeBamAlignment). readName = readName.substring(0, readName.length() - 2); diff --git a/src/test/java/htsjdk/samtools/fastq/FastqCodecTest.java b/src/test/java/htsjdk/samtools/fastq/FastqCodecTest.java new file mode 100644 index 000000000..cfe36532b --- /dev/null +++ b/src/test/java/htsjdk/samtools/fastq/FastqCodecTest.java @@ -0,0 +1,75 @@ +/* + * The MIT License + * + * Copyright (c) 2016 Daniel Gomez-Sanchez + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools.fastq; + +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMRecordSetBuilder; +import org.testng.Assert; +import org.testng.annotations.Test; + +/** + * @author Daniel Gomez-Sanchez (magicDGS) + */ +public class FastqCodecTest { + + @Test + public void testAsFastqRecord() throws Exception { + final SAMRecord record = new SAMRecordSetBuilder().addFrag("test", 0, 1, false, false, "10M", null, 2); + record.setReadPairedFlag(true); + // test first of pair encoding + record.setFirstOfPairFlag(true); + testRecord(record.getReadName() + FastqConstants.FIRST_OF_PAIR, FastqCodec.asFastqRecord(record), record); + record.setFirstOfPairFlag(false); + record.setSecondOfPairFlag(true); + testRecord(record.getReadName() + FastqConstants.SECOND_OF_PAIR, FastqCodec.asFastqRecord(record), record); + record.setSecondOfPairFlag(false); + testRecord(record.getReadName(), FastqCodec.asFastqRecord(record), record); + } + + private void testRecord(final String expectedReadName, final FastqRecord fastqRecord, final SAMRecord samRecord) { + Assert.assertEquals(fastqRecord.getReadName(), expectedReadName); + Assert.assertEquals(fastqRecord.getBaseQualities(), samRecord.getBaseQualities()); + Assert.assertEquals(fastqRecord.getReadBases(), samRecord.getReadBases()); + Assert.assertNull(fastqRecord.getBaseQualityHeader()); + } + + @Test + public void testAsSAMRecord() throws Exception { + // create a random record + final SAMRecord samRecord = new SAMRecordSetBuilder().addFrag("test", 0, 1, false, false, "10M", null, 2); + FastqRecord fastqRecord = new FastqRecord(samRecord.getReadName(), samRecord.getReadBases(), "", samRecord.getBaseQualities()); + testConvertedSAMRecord(FastqCodec.asSAMRecord(fastqRecord, samRecord.getHeader()), samRecord); + fastqRecord = new FastqRecord(samRecord.getReadName() + FastqConstants.FIRST_OF_PAIR, samRecord.getReadBases(), "", samRecord.getBaseQualities()); + testConvertedSAMRecord(FastqCodec.asSAMRecord(fastqRecord, samRecord.getHeader()), samRecord); + fastqRecord = new FastqRecord(samRecord.getReadName() + FastqConstants.SECOND_OF_PAIR, samRecord.getReadBases(), "", samRecord.getBaseQualities()); + testConvertedSAMRecord(FastqCodec.asSAMRecord(fastqRecord, samRecord.getHeader()), samRecord); + } + + private void testConvertedSAMRecord(final SAMRecord converted, final SAMRecord original) { + Assert.assertEquals(converted.getReadName(), original.getReadName()); + Assert.assertEquals(converted.getBaseQualities(), original.getBaseQualities()); + Assert.assertEquals(converted.getReadBases(), original.getReadBases()); + Assert.assertTrue(converted.getReadUnmappedFlag()); + } +} \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/fastq/FastqRecordTest.java b/src/test/java/htsjdk/samtools/fastq/FastqRecordTest.java index f6f238eab..97a3d3c8d 100644 --- a/src/test/java/htsjdk/samtools/fastq/FastqRecordTest.java +++ b/src/test/java/htsjdk/samtools/fastq/FastqRecordTest.java @@ -15,7 +15,7 @@ public void testBasic() { Assert.assertNull(fastqRecord.getBaseQualityHeader()); - Assert.assertEquals(fastqRecord.getReadHeader(), seqHeaderPrefix); + Assert.assertEquals(fastqRecord.getReadName(), seqHeaderPrefix); Assert.assertEquals(fastqRecord.getBaseQualityString(), qualLine); Assert.assertEquals(fastqRecord.getReadString(), seqLine); Assert.assertNotNull(fastqRecord.toString());//just check not nullness @@ -25,9 +25,9 @@ public void testBasic() { Assert.assertEquals(fastqRecord, fastqRecord); Assert.assertNotEquals(fastqRecord, "fred"); Assert.assertNotEquals("fred", fastqRecord); - Assert.assertEquals(fastqRecord.length(), seqLine.length()); + Assert.assertEquals(fastqRecord.getReadLength(), seqLine.length()); Assert.assertEquals(fastqRecord.getBaseQualityString().length(), fastqRecord.getReadString().length()); - Assert.assertEquals(fastqRecord.getReadString().length(), fastqRecord.length()); + Assert.assertEquals(fastqRecord.getReadString().length(), fastqRecord.getReadLength()); } @Test @@ -37,7 +37,7 @@ public void testBasicEmptyHeaderPrefix() { final String qualHeaderPrefix = ""; final String qualLine = ";<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"; final FastqRecord fastqRecord = new FastqRecord(seqHeaderPrefix, seqLine, qualHeaderPrefix, qualLine); - Assert.assertNull(fastqRecord.getReadHeader()); + Assert.assertNull(fastqRecord.getReadName()); Assert.assertNull(fastqRecord.getBaseQualityHeader()); } @@ -57,6 +57,11 @@ public void testCopy() { Assert.assertSame(fastqRecord.getBaseQualityHeader(), fastqRecordCopy.getBaseQualityHeader()); } + @Test(expectedExceptions = IllegalArgumentException.class) + public void testNullCopy() { + new FastqRecord(null); + } + @Test public void testNullSeq() { final String seqHeaderPrefix = "header"; From 0f19663867389730636ff8aadac8178407a329c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Fri, 24 Feb 2017 16:57:17 +0100 Subject: [PATCH 2/6] Addressing comments --- .../htsjdk/samtools/fastq/BasicFastqWriter.java | 2 +- .../fastq/{FastqCodec.java => FastqEncoder.java} | 16 ++++++++++++++-- .../java/htsjdk/samtools/fastq/FastqRecord.java | 21 +++++++++++++++++---- .../{FastqCodecTest.java => FastqEncoderTest.java} | 14 +++++++------- 4 files changed, 39 insertions(+), 14 deletions(-) rename src/main/java/htsjdk/samtools/fastq/{FastqCodec.java => FastqEncoder.java} (88%) rename src/test/java/htsjdk/samtools/fastq/{FastqCodecTest.java => FastqEncoderTest.java} (86%) diff --git a/src/main/java/htsjdk/samtools/fastq/BasicFastqWriter.java b/src/main/java/htsjdk/samtools/fastq/BasicFastqWriter.java index ab4ad94f9..d535cc178 100644 --- a/src/main/java/htsjdk/samtools/fastq/BasicFastqWriter.java +++ b/src/main/java/htsjdk/samtools/fastq/BasicFastqWriter.java @@ -58,7 +58,7 @@ public BasicFastqWriter(final PrintStream writer) { @Override public void write(final FastqRecord rec) { - writer.println(FastqCodec.encode(rec)); + writer.println(FastqEncoder.encode(rec)); if (writer.checkError()) { throw new SAMException("Error in writing fastq file " + path); } diff --git a/src/main/java/htsjdk/samtools/fastq/FastqCodec.java b/src/main/java/htsjdk/samtools/fastq/FastqEncoder.java similarity index 88% rename from src/main/java/htsjdk/samtools/fastq/FastqCodec.java rename to src/main/java/htsjdk/samtools/fastq/FastqEncoder.java index 772781fc3..61fb3ec19 100644 --- a/src/main/java/htsjdk/samtools/fastq/FastqCodec.java +++ b/src/main/java/htsjdk/samtools/fastq/FastqEncoder.java @@ -28,11 +28,14 @@ import htsjdk.samtools.util.SequenceUtil; /** - * Codec for encode records into FASTQ format. + * Codec for encoding records into FASTQ format. * * @author Daniel Gomez-Sanchez (magicDGS) */ -public class FastqCodec { +public final class FastqEncoder { + + // cannot be instantiated because it is an utility class + private FastqEncoder() {} /** * Encodes a FastqRecord in the String FASTQ format. @@ -51,6 +54,15 @@ public static String encode(final FastqRecord record) { } /** + * Encodes a SAMRecord in the String FASTQ format. + * @see #encode(FastqRecord) + * @see #asSAMRecord(FastqRecord, SAMFileHeader) + */ + public static String encode(final SAMRecord record) { + return encode(asFastqRecord(record)); + } + + /** * Converts a {@link SAMRecord} into a {@link FastqRecord}. */ public static FastqRecord asFastqRecord(final SAMRecord record) { diff --git a/src/main/java/htsjdk/samtools/fastq/FastqRecord.java b/src/main/java/htsjdk/samtools/fastq/FastqRecord.java index 074b6c128..9974c259b 100755 --- a/src/main/java/htsjdk/samtools/fastq/FastqRecord.java +++ b/src/main/java/htsjdk/samtools/fastq/FastqRecord.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools.fastq; +import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMUtils; import htsjdk.samtools.util.StringUtil; @@ -90,7 +91,7 @@ public FastqRecord(final FastqRecord other) { /** * @return the read name - * @deprecated use {@link #getReadName()} instead + * @deprecated since 02/2017. Use {@link #getReadName()} instead */ @Deprecated public String getReadHeader() { @@ -163,7 +164,7 @@ public String getBaseQualityHeader() { /** * shortcut to getReadString().length() * - * @deprecated use {@link #getReadLength()} instead + * @deprecated since 02/2017. Use {@link #getReadLength()} instead */ @Deprecated public int length() { @@ -218,9 +219,21 @@ public boolean equals(Object obj) { return true; } - /** Simple toString() that gives a read name and length */ + /** + * Returns the record as the String FASTQ format. + * @see FastqEncoder#encode(FastqRecord) + */ + public String toFastQString() { + return FastqEncoder.encode(this); + } + + /** + * Returns {@link #toFastQString()} + */ @Override public String toString() { - return String.format("%s: %s bp", readName, getReadLength()); + // TODO: this should be change in the future for a simpler and more informative form such as + // TODO: return String.format("%s: %s bp", readName, getReadLength()); + return toFastQString(); } } diff --git a/src/test/java/htsjdk/samtools/fastq/FastqCodecTest.java b/src/test/java/htsjdk/samtools/fastq/FastqEncoderTest.java similarity index 86% rename from src/test/java/htsjdk/samtools/fastq/FastqCodecTest.java rename to src/test/java/htsjdk/samtools/fastq/FastqEncoderTest.java index cfe36532b..72e59cff7 100644 --- a/src/test/java/htsjdk/samtools/fastq/FastqCodecTest.java +++ b/src/test/java/htsjdk/samtools/fastq/FastqEncoderTest.java @@ -31,7 +31,7 @@ /** * @author Daniel Gomez-Sanchez (magicDGS) */ -public class FastqCodecTest { +public class FastqEncoderTest { @Test public void testAsFastqRecord() throws Exception { @@ -39,12 +39,12 @@ public void testAsFastqRecord() throws Exception { record.setReadPairedFlag(true); // test first of pair encoding record.setFirstOfPairFlag(true); - testRecord(record.getReadName() + FastqConstants.FIRST_OF_PAIR, FastqCodec.asFastqRecord(record), record); + testRecord(record.getReadName() + FastqConstants.FIRST_OF_PAIR, FastqEncoder.asFastqRecord(record), record); record.setFirstOfPairFlag(false); record.setSecondOfPairFlag(true); - testRecord(record.getReadName() + FastqConstants.SECOND_OF_PAIR, FastqCodec.asFastqRecord(record), record); + testRecord(record.getReadName() + FastqConstants.SECOND_OF_PAIR, FastqEncoder.asFastqRecord(record), record); record.setSecondOfPairFlag(false); - testRecord(record.getReadName(), FastqCodec.asFastqRecord(record), record); + testRecord(record.getReadName(), FastqEncoder.asFastqRecord(record), record); } private void testRecord(final String expectedReadName, final FastqRecord fastqRecord, final SAMRecord samRecord) { @@ -59,11 +59,11 @@ public void testAsSAMRecord() throws Exception { // create a random record final SAMRecord samRecord = new SAMRecordSetBuilder().addFrag("test", 0, 1, false, false, "10M", null, 2); FastqRecord fastqRecord = new FastqRecord(samRecord.getReadName(), samRecord.getReadBases(), "", samRecord.getBaseQualities()); - testConvertedSAMRecord(FastqCodec.asSAMRecord(fastqRecord, samRecord.getHeader()), samRecord); + testConvertedSAMRecord(FastqEncoder.asSAMRecord(fastqRecord, samRecord.getHeader()), samRecord); fastqRecord = new FastqRecord(samRecord.getReadName() + FastqConstants.FIRST_OF_PAIR, samRecord.getReadBases(), "", samRecord.getBaseQualities()); - testConvertedSAMRecord(FastqCodec.asSAMRecord(fastqRecord, samRecord.getHeader()), samRecord); + testConvertedSAMRecord(FastqEncoder.asSAMRecord(fastqRecord, samRecord.getHeader()), samRecord); fastqRecord = new FastqRecord(samRecord.getReadName() + FastqConstants.SECOND_OF_PAIR, samRecord.getReadBases(), "", samRecord.getBaseQualities()); - testConvertedSAMRecord(FastqCodec.asSAMRecord(fastqRecord, samRecord.getHeader()), samRecord); + testConvertedSAMRecord(FastqEncoder.asSAMRecord(fastqRecord, samRecord.getHeader()), samRecord); } private void testConvertedSAMRecord(final SAMRecord converted, final SAMRecord original) { From 6fd2459bbf9762aef07d43cc63c12d4b56785729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Fri, 24 Feb 2017 18:13:03 +0100 Subject: [PATCH 3/6] Add @lindenb suggestion --- .../htsjdk/samtools/fastq/BasicFastqWriter.java | 5 +++- .../java/htsjdk/samtools/fastq/FastqEncoder.java | 29 +++++++++++++++++----- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/src/main/java/htsjdk/samtools/fastq/BasicFastqWriter.java b/src/main/java/htsjdk/samtools/fastq/BasicFastqWriter.java index d535cc178..0c9596a0b 100644 --- a/src/main/java/htsjdk/samtools/fastq/BasicFastqWriter.java +++ b/src/main/java/htsjdk/samtools/fastq/BasicFastqWriter.java @@ -58,7 +58,10 @@ public BasicFastqWriter(final PrintStream writer) { @Override public void write(final FastqRecord rec) { - writer.println(FastqEncoder.encode(rec)); + // encode without creating a String + FastqEncoder.write(writer, rec); + // and print a new line + writer.println(); if (writer.checkError()) { throw new SAMException("Error in writing fastq file " + path); } diff --git a/src/main/java/htsjdk/samtools/fastq/FastqEncoder.java b/src/main/java/htsjdk/samtools/fastq/FastqEncoder.java index 61fb3ec19..70326380b 100644 --- a/src/main/java/htsjdk/samtools/fastq/FastqEncoder.java +++ b/src/main/java/htsjdk/samtools/fastq/FastqEncoder.java @@ -23,10 +23,13 @@ */ package htsjdk.samtools.fastq; +import htsjdk.samtools.SAMException; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.util.SequenceUtil; +import java.io.IOException; + /** * Codec for encoding records into FASTQ format. * @@ -41,16 +44,30 @@ private FastqEncoder() {} * Encodes a FastqRecord in the String FASTQ format. */ public static String encode(final FastqRecord record) { + // reserve some memory based on the read length and read name + final int capacity = record.getReadLength() * 2 + record.getReadName().length() + 5; + return write(new StringBuilder(capacity), record).toString(); + } + + /** + * Writes a FastqRecord into the Appendable output. + * @throws SAMException if any I/O error occurs. + */ + public static Appendable write(final Appendable out,final FastqRecord record) { final String readName = record.getReadName(); final String readString = record.getReadString(); final String qualHeader = record.getBaseQualityHeader(); final String qualityString = record.getBaseQualityString(); - return new StringBuilder() - .append(FastqConstants.SEQUENCE_HEADER).append(readName == null ? "" : readName).append('\n') - .append(readString == null ? "" : readString).append('\n') - .append(FastqConstants.QUALITY_HEADER).append(qualHeader == null ? "" : qualHeader).append('\n') - .append(qualityString == null ? "" : qualityString) - .toString(); + try { + return out.append(FastqConstants.SEQUENCE_HEADER) + .append(readName == null ? "" : readName).append('\n') + .append(readString == null ? "" : readString).append('\n') + .append(FastqConstants.QUALITY_HEADER) + .append(qualHeader == null ? "" : qualHeader).append('\n') + .append(qualityString == null ? "" : qualityString); + } catch (IOException e) { + throw new SAMException(e); + } } /** From d03b390fd8d1994a34c91f2119303faecb5d082f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Mon, 27 Feb 2017 10:37:33 +0100 Subject: [PATCH 4/6] Removed unused import --- src/main/java/htsjdk/samtools/fastq/FastqRecord.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/htsjdk/samtools/fastq/FastqRecord.java b/src/main/java/htsjdk/samtools/fastq/FastqRecord.java index 9974c259b..001cdbcf4 100755 --- a/src/main/java/htsjdk/samtools/fastq/FastqRecord.java +++ b/src/main/java/htsjdk/samtools/fastq/FastqRecord.java @@ -23,7 +23,6 @@ */ package htsjdk.samtools.fastq; -import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMUtils; import htsjdk.samtools.util.StringUtil; From 64e55a30008d42f803c04f9889f0e68390f3bc86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Mon, 27 Feb 2017 10:44:44 +0100 Subject: [PATCH 5/6] Fix NPE --- src/main/java/htsjdk/samtools/fastq/FastqEncoder.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/main/java/htsjdk/samtools/fastq/FastqEncoder.java b/src/main/java/htsjdk/samtools/fastq/FastqEncoder.java index 70326380b..fdbd02dcc 100644 --- a/src/main/java/htsjdk/samtools/fastq/FastqEncoder.java +++ b/src/main/java/htsjdk/samtools/fastq/FastqEncoder.java @@ -44,8 +44,12 @@ private FastqEncoder() {} * Encodes a FastqRecord in the String FASTQ format. */ public static String encode(final FastqRecord record) { - // reserve some memory based on the read length and read name - final int capacity = record.getReadLength() * 2 + record.getReadName().length() + 5; + // reserve some memory based on the read length + int capacity = record.getReadLength() * 2 + 5; + // reserve some memory based on the read name + if (record.getReadName() != null) { + capacity += record.getReadName().length(); + } return write(new StringBuilder(capacity), record).toString(); } From a9904018944190111c0d09631882121dc31cffeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Mon, 27 Feb 2017 10:45:29 +0100 Subject: [PATCH 6/6] Non-null return for new getters --- src/main/java/htsjdk/samtools/fastq/FastqRecord.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/htsjdk/samtools/fastq/FastqRecord.java b/src/main/java/htsjdk/samtools/fastq/FastqRecord.java index 001cdbcf4..9fbcd3912 100755 --- a/src/main/java/htsjdk/samtools/fastq/FastqRecord.java +++ b/src/main/java/htsjdk/samtools/fastq/FastqRecord.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools.fastq; +import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMUtils; import htsjdk.samtools.util.StringUtil; @@ -118,10 +119,10 @@ public String getReadString() { /** * Get the DNA sequence. * - * @return read sequence as ASCII bytes ACGTN=. + * @return read sequence as ASCII bytes ACGTN=; {@link SAMRecord#NULL_SEQUENCE} if no bases are present. */ public byte[] getReadBases() { - return StringUtil.stringToBytes(readString); + return (readString == null) ? SAMRecord.NULL_SEQUENCE : StringUtil.stringToBytes(readString); } /** @@ -136,10 +137,10 @@ public String getBaseQualityString() { /** * Get the base qualities as binary PHRED scores (not ASCII) * - * @return the base quality + * @return the base quality; {@link SAMRecord#NULL_QUALS} if no bases are present. */ public byte[] getBaseQualities() { - return SAMUtils.fastqToPhred(baseQualityString); + return (baseQualityString == null) ? SAMRecord.NULL_QUALS : SAMUtils.fastqToPhred(baseQualityString); } /**