diff --git a/src/main/java/htsjdk/samtools/fastq/FastqReader.java b/src/main/java/htsjdk/samtools/fastq/FastqReader.java index 7988712f3..d5d8f1889 100755 --- a/src/main/java/htsjdk/samtools/fastq/FastqReader.java +++ b/src/main/java/htsjdk/samtools/fastq/FastqReader.java @@ -41,6 +41,22 @@ * directly. It is provided so that this class can be used in Java for-each loop. */ public class FastqReader implements Iterator, Iterable, Closeable { + /** Enum of the types of lines we see in Fastq. */ + protected enum LineType { + SequenceHeader("Sequence Header"), + SequenceLine("Sequence Line"), + QualityHeader("Quality Header"), + QualityLine("Quality Line"); + + private String printable; + + LineType(String printable) { + this.printable = printable; + } + + @Override public String toString() { return this.printable; } + } + final private File fastqFile; final private BufferedReader reader; private FastqRecord nextRecord; @@ -58,10 +74,7 @@ public FastqReader(final File file) { * @param skipBlankLines should we skip blank lines ? */ public FastqReader(final File file, final boolean skipBlankLines) { - this.skipBlankLines=skipBlankLines; - fastqFile = file; - reader = IOUtil.openFileForBufferedReading(fastqFile); - nextRecord = readNextRecord(); + this(file, IOUtil.openFileForBufferedReading(file), skipBlankLines); } public FastqReader(final BufferedReader reader) { @@ -87,7 +100,6 @@ public FastqReader(final File file, final BufferedReader reader) { private FastqRecord readNextRecord() { try { - // Read sequence header final String seqHeader = readLineConditionallySkippingBlanks(); if (seqHeader == null) return null ; @@ -95,23 +107,23 @@ private FastqRecord readNextRecord() { throw new SAMException(error("Missing sequence header")); } if (!seqHeader.startsWith(FastqConstants.SEQUENCE_HEADER)) { - throw new SAMException(error("Sequence header must start with "+ FastqConstants.SEQUENCE_HEADER+": "+seqHeader)); + throw new SAMException(error("Sequence header must start with " + FastqConstants.SEQUENCE_HEADER + ": " + seqHeader)); } // Read sequence line final String seqLine = readLineConditionallySkippingBlanks(); - checkLine(seqLine,"sequence line"); + checkLine(seqLine, LineType.SequenceLine); // Read quality header final String qualHeader = readLineConditionallySkippingBlanks(); - checkLine(qualHeader,"quality header"); + checkLine(qualHeader, LineType.QualityHeader); if (!qualHeader.startsWith(FastqConstants.QUALITY_HEADER)) { - throw new SAMException(error("Quality header must start with "+ FastqConstants.QUALITY_HEADER+": "+qualHeader)); + throw new SAMException(error("Quality header must start with " + FastqConstants.QUALITY_HEADER + ": "+ qualHeader)); } // Read quality line final String qualLine = readLineConditionallySkippingBlanks(); - checkLine(qualLine,"quality line"); + checkLine(qualLine, LineType.QualityLine); // Check sequence and quality lines are same length if (seqLine.length() != qualLine.length()) { @@ -165,21 +177,23 @@ public void close() { try { reader.close(); } catch (IOException e) { - throw new SAMException("IO problem in fastq file "+getAbsolutePath(), e); + throw new SAMException("IO problem in fastq file " + getAbsolutePath(), e); } } - private void checkLine(final String line, final String kind) { + /** Checks that the line is neither null (representing EOF) or empty (blank line in file). */ + protected void checkLine(final String line, final LineType kind) { if (line == null) { - throw new SAMException(error("File is too short - missing "+kind+" line")); + throw new SAMException(error("File is too short - missing " + kind)); } if (StringUtil.isBlank(line)) { - throw new SAMException(error("Missing "+kind)); + throw new SAMException(error("Missing " + kind)); } } - private String error(final String msg) { - return msg + " at line "+line+" in fastq "+getAbsolutePath(); + /** Generates an error message with line number information. */ + protected String error(final String msg) { + return msg + " at line " + line + " in fastq " + getAbsolutePath(); } private String getAbsolutePath() { @@ -198,6 +212,6 @@ private String readLineConditionallySkippingBlanks() throws IOException { @Override public String toString() { - return "FastqReader["+(this.fastqFile == null?"":this.fastqFile)+ " Line:"+getLineNumber()+"]"; + return "FastqReader[" + (this.fastqFile == null ? "" : this.fastqFile) + " Line:" + getLineNumber() + "]"; } } diff --git a/src/test/java/htsjdk/samtools/fastq/FastqRecordTest.java b/src/test/java/htsjdk/samtools/fastq/FastqRecordTest.java index 5ace9ffc3..9a47a8688 100644 --- a/src/test/java/htsjdk/samtools/fastq/FastqRecordTest.java +++ b/src/test/java/htsjdk/samtools/fastq/FastqRecordTest.java @@ -1,9 +1,12 @@ package htsjdk.samtools.fastq; import htsjdk.HtsjdkTest; +import htsjdk.samtools.util.TestUtil; import org.testng.Assert; import org.testng.annotations.Test; +import java.util.ArrayList; + public final class FastqRecordTest extends HtsjdkTest { @Test @@ -207,4 +210,14 @@ public void testNotEqualLengths() { new FastqRecord("header", seqLine1, "qualHeaderPrefix", qualLine1); //Note: this does not blow up now but it will once we enforce that seqLine and qualLine be the same length } + + @Test + public void testFastqSerialize() throws Exception { + final ArrayList records = new ArrayList<>(); + records.add(new FastqRecord("q1", "ACGTACGT", "", "########")); + records.add(new FastqRecord("q2", "CCAGCGTAATA", "", "????????###")); + records.add(new FastqRecord("q3", "NNNNNNNNNNNN", "", "############")); + + Assert.assertEquals(TestUtil.serializeAndDeserialize(records),records); + } } diff --git a/src/test/java/htsjdk/samtools/fastq/FastqWriterTest.java b/src/test/java/htsjdk/samtools/fastq/FastqWriterTest.java deleted file mode 100644 index 22549e904..000000000 --- a/src/test/java/htsjdk/samtools/fastq/FastqWriterTest.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * The MIT License - * - * Pierre Lindenbaum PhD - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package htsjdk.samtools.fastq; - -import htsjdk.HtsjdkTest; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import htsjdk.samtools.util.TestUtil; - -import java.io.File; -import java.util.ArrayList; - -/** - * test fastq - */ -public class FastqWriterTest extends HtsjdkTest { - private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools/util/QualityEncodingDetectorTest"); - - @DataProvider(name = "fastqsource") - public Object[][] createTestData() { - return new Object[][]{ - {"solexa_full_range_as_solexa.fastq"}, - {"5k-30BB2AAXX.3.aligned.sam.fastq"} - }; - } - - @Test(dataProvider = "fastqsource") - public void testReadReadWriteFastq(final String basename) throws Exception { - final File tmpFile = File.createTempFile("test.", ".fastq"); - tmpFile.deleteOnExit(); - final FastqReader fastqReader = new FastqReader(new File(TEST_DATA_DIR,basename)); - final FastqWriterFactory writerFactory = new FastqWriterFactory(); - final FastqWriter fastqWriter = writerFactory.newWriter(tmpFile); - for(final FastqRecord rec: fastqReader) fastqWriter.write(rec); - fastqWriter.close(); - fastqReader.close(); - } - - @Test(dataProvider = "fastqsource") - public void testFastqSerialize(final String basename) throws Exception { - //write - final ArrayList records = new ArrayList<>(); - final FastqReader fastqReader = new FastqReader(new File(TEST_DATA_DIR,basename)); - for(final FastqRecord rec: fastqReader) { - records.add(rec); - if(records.size()>100) break; - } - fastqReader.close(); - Assert.assertEquals(TestUtil.serializeAndDeserialize(records),records); - } -} diff --git a/src/test/scala/htsjdk/UnitSpec.scala b/src/test/scala/htsjdk/UnitSpec.scala index db533a12e..a2995d56c 100644 --- a/src/test/scala/htsjdk/UnitSpec.scala +++ b/src/test/scala/htsjdk/UnitSpec.scala @@ -1,6 +1,25 @@ package htsjdk +import java.nio.file.{Files, Path} + import org.scalatest.{FlatSpec, Matchers} /** Base class for all Scala tests. */ -class UnitSpec extends FlatSpec with Matchers +class UnitSpec extends FlatSpec with Matchers { + /** Make a temporary file that will get cleaned up at the end of testing. */ + protected def makeTempFile(prefix: String, suffix: String): Path = { + val path = Files.createTempFile(prefix, suffix) + path.toFile.deleteOnExit() + path + } + + /** Implicit conversion from Java to Scala iterator. */ + implicit def javaIteratorAsScalaIterator[A](iter: java.util.Iterator[A]): Iterator[A] = { + scala.collection.JavaConverters.asScalaIterator(iter) + } + + /** Implicit conversion from Java to Scala iterable. */ + implicit def javaIterableAsScalaIterable[A](iterable: java.lang.Iterable[A]): Iterable[A] = { + scala.collection.JavaConverters.iterableAsScalaIterable(iterable) + } +} diff --git a/src/test/scala/htsjdk/samtools/fastq/FastqReaderWriterTest.scala b/src/test/scala/htsjdk/samtools/fastq/FastqReaderWriterTest.scala new file mode 100644 index 000000000..60e08efbd --- /dev/null +++ b/src/test/scala/htsjdk/samtools/fastq/FastqReaderWriterTest.scala @@ -0,0 +1,153 @@ +package htsjdk.samtools.fastq + +import java.io.{BufferedReader, File, StringReader} + +import htsjdk.UnitSpec +import htsjdk.samtools.SAMUtils +import htsjdk.samtools.util.IOUtil + +import scala.util.Random + +class FastqReaderWriterTest extends UnitSpec { + private val rng = new Random() + private val Bases = Array('A', 'C', 'G', 'T') + + /** Generates a random string of bases of the desired length. */ + def bases(length: Int): String = { + val chs = new Array[Char](length) + chs.indices.foreach(i => chs(i) = Bases(rng.nextInt(Bases.length))) + new String(chs) + } + + /** Generates a FastqRecord with random bases at a given length. */ + def fq(name: String, length: Int, qual: Int = 30): FastqRecord = { + new FastqRecord(name, bases(length), "", SAMUtils.phredToFastq(qual).toString * length) + } + + "FastqWriter" should "write four lines per record to file" in { + val path = makeTempFile("test.", ".fastq") + val out = new FastqWriterFactory().newWriter(path.toFile) + val recs = Seq(fq("q1", 50), fq("q2", 48), fq("q3", 55)) + val Seq(q1, q2, q3) = recs + + recs.foreach(rec => out.write(rec)) + out.close() + + val lines = IOUtil.slurpLines(path.toFile) + lines should have size 12 + + lines.get(0) shouldBe "@q1" + lines.get(1) shouldBe q1.getReadString + lines.get(4) shouldBe "@q2" + lines.get(5) shouldBe q2.getReadString + lines.get(8) shouldBe "@q3" + lines.get(9) shouldBe q3.getReadString + } + + it should "write a record with only a single base" in { + val path = makeTempFile("test.", ".fastq") + val out = new FastqWriterFactory().newWriter(path.toFile) + out.write(fq("q1", 1)) + out.close() + val lines = IOUtil.slurpLines(path.toFile) + lines.get(1) should have length 1 + lines.get(3) should have length 1 + } + + it should "write a record with zero-length bases and quals" in { + val path = makeTempFile("test.", ".fastq") + val out = new FastqWriterFactory().newWriter(path.toFile) + out.write(fq("q1", 0)) + out.close() + val lines = IOUtil.slurpLines(path.toFile) + lines.get(1) should have length 0 + lines.get(3) should have length 0 + } + + + "FastqReader" should "read back a fastq file written by FastqWriter" in { + val path = makeTempFile("test.", ".fastq") + val out = new FastqWriterFactory().newWriter(path.toFile) + val recs = Seq(fq("q1", 50), fq("q2", 100), fq("q3", 150)) + recs.foreach(rec => out.write(rec)) + out.close() + + val in = new FastqReader(path.toFile) + val recs2 = in.iterator().toList + in.close() + recs2 should contain theSameElementsInOrderAs recs + } + + it should "throw an exception if the input fastq is garbled" in { + val fastq = + """ + |@q1 + |AACCGGTT + |+ + |######## + |@q2 + |ACGT + |#### + """.stripMargin.trim + + val in = new FastqReader(null, new BufferedReader(new StringReader(fastq))) + an[Exception] shouldBe thrownBy { in.next() } + } + + it should "throw an exception if the input file doesn't exist" in { + an[Exception] shouldBe thrownBy { new FastqReader(new File("/some/path/that/shouldnt/exist.fq"))} + } + + it should "read an empty file just fine" in { + val path = makeTempFile("empty.", ".fastq") + val in = new FastqReader(path.toFile) + while (in.hasNext) in.next() + an[Exception] shouldBe thrownBy { in.next() } + in.close() + } + + it should "fail on a truncated file" in { + val fastq = + """ + |@q1 + |AACCGGTT + |+ + |######## + """.stripMargin.trim + + Range.inclusive(1, 3).foreach { n => + val text = fastq.lines.take(n).mkString("\n") + val reader = new BufferedReader(new StringReader(text)) + an[Exception] shouldBe thrownBy { new FastqReader(null, reader).iterator().toSeq } + } + } + + it should "fail if the seq and qual lines are different lengths" in { + val fastq = + """ + |@q1 + |AACC + |+ + |######## + """.stripMargin.trim + + val reader = new BufferedReader(new StringReader(fastq)) + an[Exception] shouldBe thrownBy { new FastqReader(null, reader).iterator().toSeq } + } + + it should "fail if either header line is empty" in { + val fastq = + """ + |@q1 + |AACC + |+q1 + |######## + """.stripMargin.trim + + val noSeqHeader = new BufferedReader(new StringReader(fastq.replace("@q1", ""))) + val noQualHeader = new BufferedReader(new StringReader(fastq.replace("+q1", ""))) + an[Exception] shouldBe thrownBy { new FastqReader(noSeqHeader).iterator().toSeq } + an[Exception] shouldBe thrownBy { new FastqReader(noQualHeader).iterator().toSeq } + } + +}