From 260a62edadfb32ba0b9aabe72d0e8a5dc943bdd5 Mon Sep 17 00:00:00 2001 From: George Powley Date: Thu, 8 Dec 2016 13:48:46 -0500 Subject: [PATCH] add inflater factory --- src/main/java/htsjdk/samtools/BAMFileReader.java | 183 ++++++++++++++++++--- .../java/htsjdk/samtools/SamReaderFactory.java | 42 ++++- .../util/AsyncBlockCompressedInputStream.java | 20 ++- .../samtools/util/BlockCompressedInputStream.java | 68 +++++++- .../java/htsjdk/samtools/util/BlockGunzipper.java | 28 +++- .../htsjdk/samtools/util/zip/DeflaterFactory.java | 6 +- .../htsjdk/samtools/util/zip/InflaterFactory.java | 49 ++++++ .../java/htsjdk/samtools/SamReaderFactoryTest.java | 30 ++++ .../util/BlockCompressedInputStreamTest.java | 98 ++++++++++- .../util/BlockCompressedOutputStreamTest.java | 8 +- 10 files changed, 486 insertions(+), 46 deletions(-) create mode 100644 src/main/java/htsjdk/samtools/util/zip/InflaterFactory.java diff --git a/src/main/java/htsjdk/samtools/BAMFileReader.java b/src/main/java/htsjdk/samtools/BAMFileReader.java index 2de91c176..c2f9d4ea3 100644 --- a/src/main/java/htsjdk/samtools/BAMFileReader.java +++ b/src/main/java/htsjdk/samtools/BAMFileReader.java @@ -25,13 +25,8 @@ import htsjdk.samtools.seekablestream.SeekableStream; -import htsjdk.samtools.util.AsyncBlockCompressedInputStream; -import htsjdk.samtools.util.BinaryCodec; -import htsjdk.samtools.util.BlockCompressedInputStream; -import htsjdk.samtools.util.CloseableIterator; -import htsjdk.samtools.util.CoordMath; -import htsjdk.samtools.util.RuntimeIOException; -import htsjdk.samtools.util.StringLineReader; +import htsjdk.samtools.util.*; +import htsjdk.samtools.util.zip.InflaterFactory; import java.io.DataInputStream; import java.io.File; @@ -92,40 +87,94 @@ /** * Prepare to read BAM from a stream (not seekable) * @param stream source of bytes. + * @param indexFile BAM index file * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. + * @param useAsynchronousIO if true, use asynchronous I/O * @param validationStringency Controls how to handle invalidate reads or header lines. + * @param samRecordFactory SAM record factory + * @throws IOException */ BAMFileReader(final InputStream stream, final File indexFile, final boolean eagerDecode, final boolean useAsynchronousIO, final ValidationStringency validationStringency, - final SAMRecordFactory factory) - throws IOException { + final SAMRecordFactory samRecordFactory) + throws IOException { + this(stream, indexFile, eagerDecode, useAsynchronousIO, validationStringency, samRecordFactory, + BlockGunzipper.getDefaultInflaterFactory()); + } + + /** + * Prepare to read BAM from a stream (not seekable) + * @param stream source of bytes. + * @param indexFile BAM index file + * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. + * @param useAsynchronousIO if true, use asynchronous I/O + * @param validationStringency Controls how to handle invalidate reads or header lines. + * @param samRecordFactory SAM record factory + * @param inflaterFactory InflaterFactory used by BlockCompressedInputStream + * @throws IOException + */ + BAMFileReader(final InputStream stream, + final File indexFile, + final boolean eagerDecode, + final boolean useAsynchronousIO, + final ValidationStringency validationStringency, + final SAMRecordFactory samRecordFactory, + final InflaterFactory inflaterFactory) + throws IOException { mIndexFile = indexFile; mIsSeekable = false; - mCompressedInputStream = useAsynchronousIO ? new AsyncBlockCompressedInputStream(stream) : new BlockCompressedInputStream(stream); + mCompressedInputStream = useAsynchronousIO ? new AsyncBlockCompressedInputStream(stream, inflaterFactory) : new BlockCompressedInputStream(stream, inflaterFactory); mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream)); this.eagerDecode = eagerDecode; this.mValidationStringency = validationStringency; - this.samRecordFactory = factory; + this.samRecordFactory = samRecordFactory; this.mFileHeader = readHeader(this.mStream, this.mValidationStringency, null); } /** * Prepare to read BAM from a file (seekable) * @param file source of bytes. + * @param indexFile BAM index file + * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. + * @param useAsynchronousIO if true, use asynchronous I/O + * @param validationStringency Controls how to handle invalidate reads or header lines. + * @param samRecordFactory SAM record factory + * @throws IOException + */ + BAMFileReader(final File file, + final File indexFile, + final boolean eagerDecode, + final boolean useAsynchronousIO, + final ValidationStringency validationStringency, + final SAMRecordFactory samRecordFactory) + throws IOException { + this(file, indexFile, eagerDecode, useAsynchronousIO, validationStringency, samRecordFactory, BlockGunzipper.getDefaultInflaterFactory()); + } + + /** + * Prepare to read BAM from a file (seekable) + * @param file source of bytes. + * @param indexFile BAM index file * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. + * @param useAsynchronousIO if true, use asynchronous I/O * @param validationStringency Controls how to handle invalidate reads or header lines. + * @param samRecordFactory SAM record factory + * @param inflaterFactory InflaterFactory used by BlockCompressedInputStream + * @throws IOException */ BAMFileReader(final File file, final File indexFile, final boolean eagerDecode, final boolean useAsynchronousIO, final ValidationStringency validationStringency, - final SAMRecordFactory factory) + final SAMRecordFactory samRecordFactory, + final InflaterFactory inflaterFactory) throws IOException { - this(useAsynchronousIO ? new AsyncBlockCompressedInputStream(file) : new BlockCompressedInputStream(file), indexFile!=null ? indexFile : SamFiles.findIndex(file), eagerDecode, useAsynchronousIO, file.getAbsolutePath(), validationStringency, factory); + this(useAsynchronousIO ? new AsyncBlockCompressedInputStream(file, inflaterFactory) : new BlockCompressedInputStream(file, inflaterFactory), + indexFile!=null ? indexFile : SamFiles.findIndex(file), eagerDecode, useAsynchronousIO, file.getAbsolutePath(), validationStringency, samRecordFactory); if (mIndexFile != null && mIndexFile.lastModified() < file.lastModified()) { System.err.println("WARNING: BAM index file " + mIndexFile.getAbsolutePath() + " is older than BAM " + file.getAbsolutePath()); @@ -134,33 +183,110 @@ mStream.setInputFileName(file.getAbsolutePath()); } + /** + * Prepare to read BAM from a stream (seekable) + * @param strm source of bytes + * @param indexFile BAM index file + * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. + * @param useAsynchronousIO if true, use asynchronous I/O + * @param validationStringency Controls how to handle invalidate reads or header lines. + * @param samRecordFactory SAM record factory + * @throws IOException + */ BAMFileReader(final SeekableStream strm, final File indexFile, final boolean eagerDecode, final boolean useAsynchronousIO, final ValidationStringency validationStringency, - final SAMRecordFactory factory) + final SAMRecordFactory samRecordFactory) throws IOException { - this(useAsynchronousIO ? new AsyncBlockCompressedInputStream(strm) : new BlockCompressedInputStream(strm), indexFile, eagerDecode, useAsynchronousIO, strm.getSource(), validationStringency, factory); + this(strm, indexFile, eagerDecode, useAsynchronousIO, validationStringency, samRecordFactory, BlockGunzipper.getDefaultInflaterFactory()); } + /** + * Prepare to read BAM from a stream (seekable) + * @param strm source of bytes + * @param indexFile BAM index file + * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. + * @param useAsynchronousIO if true, use asynchronous I/O + * @param validationStringency Controls how to handle invalidate reads or header lines. + * @param samRecordFactory SAM record factory + * @param inflaterFactory InflaterFactory used by BlockCompressedInputStream + * @throws IOException + */ + BAMFileReader(final SeekableStream strm, + final File indexFile, + final boolean eagerDecode, + final boolean useAsynchronousIO, + final ValidationStringency validationStringency, + final SAMRecordFactory samRecordFactory, + final InflaterFactory inflaterFactory) + throws IOException { + this(useAsynchronousIO ? new AsyncBlockCompressedInputStream(strm, inflaterFactory) : new BlockCompressedInputStream(strm, inflaterFactory), + indexFile, eagerDecode, useAsynchronousIO, strm.getSource(), validationStringency, samRecordFactory); + } + + /** + * Prepare to read BAM from a stream (seekable) + * @param strm source of bytes + * @param indexStream BAM index stream + * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. + * @param useAsynchronousIO if true, use asynchronous I/O + * @param validationStringency Controls how to handle invalidate reads or header lines. + * @param samRecordFactory SAM record factory + * @throws IOException + */ BAMFileReader(final SeekableStream strm, final SeekableStream indexStream, final boolean eagerDecode, final boolean useAsynchronousIO, final ValidationStringency validationStringency, - final SAMRecordFactory factory) + final SAMRecordFactory samRecordFactory) throws IOException { - this(useAsynchronousIO ? new AsyncBlockCompressedInputStream(strm) : new BlockCompressedInputStream(strm), indexStream, eagerDecode, useAsynchronousIO, strm.getSource(), validationStringency, factory); + this(strm, indexStream, eagerDecode, useAsynchronousIO, validationStringency, samRecordFactory, BlockGunzipper.getDefaultInflaterFactory()); } + /** + * Prepare to read BAM from a stream (seekable) + * @param strm source of bytes + * @param indexStream BAM index stream + * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. + * @param useAsynchronousIO if true, use asynchronous I/O + * @param validationStringency Controls how to handle invalidate reads or header lines. + * @param samRecordFactory SAM record factory + * @param inflaterFactory InflaterFactory used by BlockCompressedInputStream + * @throws IOException + */ + BAMFileReader(final SeekableStream strm, + final SeekableStream indexStream, + final boolean eagerDecode, + final boolean useAsynchronousIO, + final ValidationStringency validationStringency, + final SAMRecordFactory samRecordFactory, + final InflaterFactory inflaterFactory) + throws IOException { + this(useAsynchronousIO ? new AsyncBlockCompressedInputStream(strm, inflaterFactory) : new BlockCompressedInputStream(strm, inflaterFactory), + indexStream, eagerDecode, useAsynchronousIO, strm.getSource(), validationStringency, samRecordFactory); + } + + /** + * Prepare to read BAM from a compressed stream (seekable) + * @param compressedInputStream source of bytes + * @param indexFile BAM index file + * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. + * @param useAsynchronousIO if true, use asynchronous I/O + * @param source string used when reporting errors + * @param validationStringency Controls how to handle invalidate reads or header lines. + * @param samRecordFactory SAM record factory + * @throws IOException + */ private BAMFileReader(final BlockCompressedInputStream compressedInputStream, final File indexFile, final boolean eagerDecode, final boolean useAsynchronousIO, final String source, final ValidationStringency validationStringency, - final SAMRecordFactory factory) + final SAMRecordFactory samRecordFactory) throws IOException { mIndexFile = indexFile; mIsSeekable = true; @@ -168,18 +294,29 @@ private BAMFileReader(final BlockCompressedInputStream compressedInputStream, mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream)); this.eagerDecode = eagerDecode; this.mValidationStringency = validationStringency; - this.samRecordFactory = factory; + this.samRecordFactory = samRecordFactory; this.mFileHeader = readHeader(this.mStream, this.mValidationStringency, source); mFirstRecordPointer = mCompressedInputStream.getFilePointer(); - } + } + /** + * Prepare to read BAM from a compressed stream (seekable) + * @param compressedInputStream source of bytes + * @param indexStream BAM index stream + * @param eagerDecode if true, decode all BAM fields as reading rather than lazily. + * @param useAsynchronousIO if true, use asynchronous I/O + * @param source string used when reporting errors + * @param validationStringency Controls how to handle invalidate reads or header lines. + * @param samRecordFactory SAM record factory + * @throws IOException + */ private BAMFileReader(final BlockCompressedInputStream compressedInputStream, final SeekableStream indexStream, final boolean eagerDecode, final boolean useAsynchronousIO, final String source, final ValidationStringency validationStringency, - final SAMRecordFactory factory) + final SAMRecordFactory samRecordFactory) throws IOException { mIndexStream = indexStream; mIsSeekable = true; @@ -187,7 +324,7 @@ private BAMFileReader(final BlockCompressedInputStream compressedInputStream, mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream)); this.eagerDecode = eagerDecode; this.mValidationStringency = validationStringency; - this.samRecordFactory = factory; + this.samRecordFactory = samRecordFactory; this.mFileHeader = readHeader(this.mStream, this.mValidationStringency, source); mFirstRecordPointer = mCompressedInputStream.getFilePointer(); } @@ -234,7 +371,7 @@ protected void enableIndexMemoryMapping(final boolean enabled) { this.mCompressedInputStream.setCheckCrcs(enabled); } - @Override void setSAMRecordFactory(final SAMRecordFactory factory) { this.samRecordFactory = factory; } + @Override void setSAMRecordFactory(final SAMRecordFactory samRecordFactory) { this.samRecordFactory = samRecordFactory; } @Override public SamReader.Type type() { diff --git a/src/main/java/htsjdk/samtools/SamReaderFactory.java b/src/main/java/htsjdk/samtools/SamReaderFactory.java index 8f203d5c0..466258aec 100644 --- a/src/main/java/htsjdk/samtools/SamReaderFactory.java +++ b/src/main/java/htsjdk/samtools/SamReaderFactory.java @@ -29,6 +29,7 @@ import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.sra.SRAAccession; import htsjdk.samtools.util.*; +import htsjdk.samtools.util.zip.InflaterFactory; import java.io.File; import java.io.IOException; @@ -115,6 +116,13 @@ public SamReader open(final Path path, /** Set this factory's {@link htsjdk.samtools.SAMRecordFactory} to the provided one, then returns itself. */ abstract public SamReaderFactory samRecordFactory(final SAMRecordFactory samRecordFactory); + /** + * Set this factory's {@link htsjdk.samtools.util.zip.InflaterFactory} to the provided one, then returns itself. + * Note: The inflaterFactory provided here is only used for BAM decompression implemented with {@link BAMFileReader}, + * it is not used for CRAM or other formats like a gzipped SAM file. + */ + abstract public SamReaderFactory inflaterFactory(final InflaterFactory inflaterFactory); + /** Enables the provided {@link Option}s, then returns itself. */ abstract public SamReaderFactory enable(final Option... options); @@ -146,12 +154,14 @@ public SamReader open(final Path path, abstract public SamReaderFactory setUseAsyncIo(final boolean asynchronousIO); private static SamReaderFactoryImpl DEFAULT = - new SamReaderFactoryImpl(Option.DEFAULTS, defaultValidationStringency, DefaultSAMRecordFactory.getInstance()); + new SamReaderFactoryImpl(Option.DEFAULTS, defaultValidationStringency, + DefaultSAMRecordFactory.getInstance(), BlockGunzipper.getDefaultInflaterFactory()); public static void setDefaultValidationStringency(final ValidationStringency defaultValidationStringency) { SamReaderFactory.defaultValidationStringency = defaultValidationStringency; // The default may have changed, so reset the default SamReader - DEFAULT = new SamReaderFactoryImpl(Option.DEFAULTS, defaultValidationStringency, DefaultSAMRecordFactory.getInstance()); + DEFAULT = new SamReaderFactoryImpl(Option.DEFAULTS, defaultValidationStringency, + DefaultSAMRecordFactory.getInstance(), BlockGunzipper.getDefaultInflaterFactory()); } /** Creates a copy of the default {@link SamReaderFactory}. */ @@ -164,7 +174,8 @@ public static SamReaderFactory makeDefault() { * no path wrapper, and {@link htsjdk.samtools.DefaultSAMRecordFactory}. */ public static SamReaderFactory make() { - return new SamReaderFactoryImpl(EnumSet.noneOf(Option.class), ValidationStringency.DEFAULT_STRINGENCY, DefaultSAMRecordFactory.getInstance()); + return new SamReaderFactoryImpl(EnumSet.noneOf(Option.class), ValidationStringency.DEFAULT_STRINGENCY, + DefaultSAMRecordFactory.getInstance(), BlockGunzipper.getDefaultInflaterFactory()); } private static class SamReaderFactoryImpl extends SamReaderFactory { @@ -175,12 +186,14 @@ public static SamReaderFactory make() { private SAMRecordFactory samRecordFactory; private CustomReaderFactory customReaderFactory; private CRAMReferenceSource referenceSource; + private InflaterFactory inflaterFactory; - private SamReaderFactoryImpl(final EnumSet