From 179b5e1a98dc358595fb85edf00f473f2418edec Mon Sep 17 00:00:00 2001 From: JP Martin Date: Mon, 28 Nov 2016 16:29:03 -0800 Subject: [PATCH 1/4] Support index as Path Includes passing test that accesses an indexed BAM from Google Cloud Storage. Sample output: [TestNG] Running: /usr/local/google/home/jpmartin/.IdeaIC2016.2/system/temp-testng-customsuite.xml Count is: 916 =============================================== Default Suite Total tests run: 1, Failures: 0, Skips: 0 =============================================== Process finished with exit code 0 --- build.gradle | 6 ++- src/main/java/htsjdk/samtools/BAMFileReader.java | 43 ++++++++++++++++++++++ .../java/htsjdk/samtools/SamReaderFactory.java | 19 +++++++++- 3 files changed, 65 insertions(+), 3 deletions(-) diff --git a/build.gradle b/build.gradle index 9e8f35154..e19094763 100644 --- a/build.gradle +++ b/build.gradle @@ -42,6 +42,10 @@ dependencies { compile "org.tukaani:xz:1.5" compile "gov.nih.nlm.ncbi:ngs-java:1.2.4" + // temporary, for testing + compile 'com.google.cloud:google-cloud-nio:0.5.1:shaded' + + testCompile "org.testng:testng:6.9.9" } @@ -50,7 +54,7 @@ targetCompatibility = 1.8 final isRelease = Boolean.getBoolean("release") final gitVersion = gitVersion().replaceAll(".dirty", "") -version = isRelease ? gitVersion : gitVersion + "-SNAPSHOT" +version = isRelease ? gitVersion : gitVersion + "-B-" + "-SNAPSHOT" logger.info("build for version:" + version) group = 'com.github.samtools' diff --git a/src/main/java/htsjdk/samtools/BAMFileReader.java b/src/main/java/htsjdk/samtools/BAMFileReader.java index 98bb74f63..c5b7b28b9 100644 --- a/src/main/java/htsjdk/samtools/BAMFileReader.java +++ b/src/main/java/htsjdk/samtools/BAMFileReader.java @@ -116,6 +116,24 @@ this.mFileHeader = readHeader(this.mStream, this.mValidationStringency, null); } + BAMFileReader(final InputStream stream, + final SeekableStream indexStream, + final boolean eagerDecode, + final boolean useAsynchronousIO, + final ValidationStringency validationStringency, + final SAMRecordFactory factory) + throws IOException { + this.useAsynchronousIO = useAsynchronousIO; + mIndexStream = indexStream; + mIsSeekable = true; + mCompressedInputStream = new BlockCompressedInputStream(stream); + mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream)); + this.eagerDecode = eagerDecode; + this.mValidationStringency = validationStringency; + this.samRecordFactory = factory; + this.mFileHeader = readHeader(this.mStream, this.mValidationStringency, null); + } + /** * Prepare to read BAM from a file (seekable) * @param file source of bytes. @@ -138,6 +156,31 @@ mStream.setInputFileName(file.getAbsolutePath()); } + + + private BAMFileReader(final File file, + final SeekableStream indexStream, + final boolean eagerDecode, + final boolean useAsynchronousIO, + final ValidationStringency validationStringency, + final SAMRecordFactory factory, + String onlyThereSoTheSignatureIsntAmbiguous) + throws IOException { + this(new BlockCompressedInputStream(file), indexStream, eagerDecode, useAsynchronousIO, indexStream.getSource(), validationStringency, factory); + // Provide better error message when there is an error reading. + mStream.setInputFileName(file.getAbsolutePath()); + } + + static BAMFileReader fromFileAndSeekable(final File file, + final SeekableStream indexStream, + final boolean eagerDecode, + final boolean useAsynchronousIO, + final ValidationStringency validationStringency, + final SAMRecordFactory factory) + throws IOException { + return new BAMFileReader(file, indexStream, eagerDecode, useAsynchronousIO, validationStringency, factory, "please"); + } + BAMFileReader(final SeekableStream strm, final File indexFile, final boolean eagerDecode, diff --git a/src/main/java/htsjdk/samtools/SamReaderFactory.java b/src/main/java/htsjdk/samtools/SamReaderFactory.java index bf890d569..6b0d47f28 100644 --- a/src/main/java/htsjdk/samtools/SamReaderFactory.java +++ b/src/main/java/htsjdk/samtools/SamReaderFactory.java @@ -311,10 +311,25 @@ public SamReader open(final SamInputResource resource) { if (SamStreams.isBAMFile(bufferedStream)) { if (sourceFile == null || !sourceFile.isFile()) { // Handle case in which file is a named pipe, e.g. /dev/stdin or created by mkfifo - primitiveSamReader = new BAMFileReader(bufferedStream, indexFile, false, asynchronousIO, validationStringency, this.samRecordFactory); + if (indexFile!=null) { + primitiveSamReader = new BAMFileReader(bufferedStream, indexFile, false, asynchronousIO, validationStringency, this.samRecordFactory); + } else { + // index is not representable as a file (perhaps it's a NIO Path), get the stream instead. + // do not close bufferedStream because that's the same stream we're about to pass on to the BAMFileReader + // (asUnbufferedSeekableStream does not re-open). Instead, rewind it to the beginning. + SeekableStream sourceSeekable = data.asUnbufferedSeekableStream(); + sourceSeekable.seek(0); + primitiveSamReader = new BAMFileReader( + sourceSeekable, indexMaybe.asUnbufferedSeekableStream(), false, asynchronousIO, validationStringency, this.samRecordFactory); + } } else { bufferedStream.close(); - primitiveSamReader = new BAMFileReader(sourceFile, indexFile, false, asynchronousIO, validationStringency, this.samRecordFactory); + if (indexFile!=null) { + primitiveSamReader = new BAMFileReader(sourceFile, indexFile, false, asynchronousIO, validationStringency, this.samRecordFactory); + } else { + // index is not representable as a file (perhaps it's a NIO Path), get the stream instead. + primitiveSamReader = BAMFileReader.fromFileAndSeekable(sourceFile, indexMaybe.asUnbufferedSeekableStream(), false, asynchronousIO, validationStringency, this.samRecordFactory); + } } } else if (BlockCompressedInputStream.isValidFile(bufferedStream)) { primitiveSamReader = new SAMTextReader(new BlockCompressedInputStream(bufferedStream), validationStringency, this.samRecordFactory); From 99353cc33b2378719babed75d0b297ba8d924289 Mon Sep 17 00:00:00 2001 From: JP Martin Date: Mon, 28 Nov 2016 17:00:06 -0800 Subject: [PATCH 2/4] Remove temp test so I can remove its dependency --- build.gradle | 6 +----- src/main/java/htsjdk/samtools/BAMFileReader.java | 18 ------------------ 2 files changed, 1 insertion(+), 23 deletions(-) diff --git a/build.gradle b/build.gradle index e19094763..9e8f35154 100644 --- a/build.gradle +++ b/build.gradle @@ -42,10 +42,6 @@ dependencies { compile "org.tukaani:xz:1.5" compile "gov.nih.nlm.ncbi:ngs-java:1.2.4" - // temporary, for testing - compile 'com.google.cloud:google-cloud-nio:0.5.1:shaded' - - testCompile "org.testng:testng:6.9.9" } @@ -54,7 +50,7 @@ targetCompatibility = 1.8 final isRelease = Boolean.getBoolean("release") final gitVersion = gitVersion().replaceAll(".dirty", "") -version = isRelease ? gitVersion : gitVersion + "-B-" + "-SNAPSHOT" +version = isRelease ? gitVersion : gitVersion + "-SNAPSHOT" logger.info("build for version:" + version) group = 'com.github.samtools' diff --git a/src/main/java/htsjdk/samtools/BAMFileReader.java b/src/main/java/htsjdk/samtools/BAMFileReader.java index c5b7b28b9..f7c061631 100644 --- a/src/main/java/htsjdk/samtools/BAMFileReader.java +++ b/src/main/java/htsjdk/samtools/BAMFileReader.java @@ -116,24 +116,6 @@ this.mFileHeader = readHeader(this.mStream, this.mValidationStringency, null); } - BAMFileReader(final InputStream stream, - final SeekableStream indexStream, - final boolean eagerDecode, - final boolean useAsynchronousIO, - final ValidationStringency validationStringency, - final SAMRecordFactory factory) - throws IOException { - this.useAsynchronousIO = useAsynchronousIO; - mIndexStream = indexStream; - mIsSeekable = true; - mCompressedInputStream = new BlockCompressedInputStream(stream); - mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream)); - this.eagerDecode = eagerDecode; - this.mValidationStringency = validationStringency; - this.samRecordFactory = factory; - this.mFileHeader = readHeader(this.mStream, this.mValidationStringency, null); - } - /** * Prepare to read BAM from a file (seekable) * @param file source of bytes. From d9d25a94885e444da973af3ca8edde6e40af495c Mon Sep 17 00:00:00 2001 From: JP Martin Date: Mon, 28 Nov 2016 17:35:13 -0800 Subject: [PATCH 3/4] Bugfix handing of input type and null combinations --- src/main/java/htsjdk/samtools/BAMFileReader.java | 2 +- .../java/htsjdk/samtools/SamReaderFactory.java | 26 ++++++++++++---------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/main/java/htsjdk/samtools/BAMFileReader.java b/src/main/java/htsjdk/samtools/BAMFileReader.java index f7c061631..da0204804 100644 --- a/src/main/java/htsjdk/samtools/BAMFileReader.java +++ b/src/main/java/htsjdk/samtools/BAMFileReader.java @@ -212,7 +212,7 @@ private BAMFileReader(final BlockCompressedInputStream compressedInputStream, final SAMRecordFactory factory) throws IOException { mIndexStream = indexStream; - mIsSeekable = true; + mIsSeekable = null != indexStream; mCompressedInputStream = compressedInputStream; mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream)); this.eagerDecode = eagerDecode; diff --git a/src/main/java/htsjdk/samtools/SamReaderFactory.java b/src/main/java/htsjdk/samtools/SamReaderFactory.java index 6b0d47f28..affc3f75f 100644 --- a/src/main/java/htsjdk/samtools/SamReaderFactory.java +++ b/src/main/java/htsjdk/samtools/SamReaderFactory.java @@ -314,22 +314,24 @@ public SamReader open(final SamInputResource resource) { if (indexFile!=null) { primitiveSamReader = new BAMFileReader(bufferedStream, indexFile, false, asynchronousIO, validationStringency, this.samRecordFactory); } else { - // index is not representable as a file (perhaps it's a NIO Path), get the stream instead. - // do not close bufferedStream because that's the same stream we're about to pass on to the BAMFileReader - // (asUnbufferedSeekableStream does not re-open). Instead, rewind it to the beginning. + final SeekableStream indexSeekable = indexMaybe == null ? null : indexMaybe.asUnbufferedSeekableStream(); + // do not close bufferedStream, it's the same stream we're getting here. SeekableStream sourceSeekable = data.asUnbufferedSeekableStream(); - sourceSeekable.seek(0); - primitiveSamReader = new BAMFileReader( - sourceSeekable, indexMaybe.asUnbufferedSeekableStream(), false, asynchronousIO, validationStringency, this.samRecordFactory); + if (null != sourceSeekable && null != indexSeekable) { + // let's make sure the user can seek in this file. + // need to return to the beginning because it's the same stream we used earlier + // and read a bit from. + sourceSeekable.seek(0); + primitiveSamReader = new BAMFileReader( + sourceSeekable, indexSeekable, false, asynchronousIO, validationStringency, this.samRecordFactory); + } else { + primitiveSamReader = new BAMFileReader( + bufferedStream, indexFile, false, asynchronousIO, validationStringency, this.samRecordFactory); + } } } else { bufferedStream.close(); - if (indexFile!=null) { - primitiveSamReader = new BAMFileReader(sourceFile, indexFile, false, asynchronousIO, validationStringency, this.samRecordFactory); - } else { - // index is not representable as a file (perhaps it's a NIO Path), get the stream instead. - primitiveSamReader = BAMFileReader.fromFileAndSeekable(sourceFile, indexMaybe.asUnbufferedSeekableStream(), false, asynchronousIO, validationStringency, this.samRecordFactory); - } + primitiveSamReader = new BAMFileReader(sourceFile, indexFile, false, asynchronousIO, validationStringency, this.samRecordFactory); } } else if (BlockCompressedInputStream.isValidFile(bufferedStream)) { primitiveSamReader = new SAMTextReader(new BlockCompressedInputStream(bufferedStream), validationStringency, this.samRecordFactory); From 0708f2826e033d05e13175371d4a11ab4a3380c0 Mon Sep 17 00:00:00 2001 From: JP Martin Date: Wed, 30 Nov 2016 09:40:27 -0800 Subject: [PATCH 4/4] Simplify code Simplified "if" logic, removed unused code, set mIsSeekable in all constructors consistently. --- src/main/java/htsjdk/samtools/BAMFileReader.java | 27 +------------------ .../java/htsjdk/samtools/SamReaderFactory.java | 31 +++++++++++----------- 2 files changed, 16 insertions(+), 42 deletions(-) diff --git a/src/main/java/htsjdk/samtools/BAMFileReader.java b/src/main/java/htsjdk/samtools/BAMFileReader.java index da0204804..98bb74f63 100644 --- a/src/main/java/htsjdk/samtools/BAMFileReader.java +++ b/src/main/java/htsjdk/samtools/BAMFileReader.java @@ -138,31 +138,6 @@ mStream.setInputFileName(file.getAbsolutePath()); } - - - private BAMFileReader(final File file, - final SeekableStream indexStream, - final boolean eagerDecode, - final boolean useAsynchronousIO, - final ValidationStringency validationStringency, - final SAMRecordFactory factory, - String onlyThereSoTheSignatureIsntAmbiguous) - throws IOException { - this(new BlockCompressedInputStream(file), indexStream, eagerDecode, useAsynchronousIO, indexStream.getSource(), validationStringency, factory); - // Provide better error message when there is an error reading. - mStream.setInputFileName(file.getAbsolutePath()); - } - - static BAMFileReader fromFileAndSeekable(final File file, - final SeekableStream indexStream, - final boolean eagerDecode, - final boolean useAsynchronousIO, - final ValidationStringency validationStringency, - final SAMRecordFactory factory) - throws IOException { - return new BAMFileReader(file, indexStream, eagerDecode, useAsynchronousIO, validationStringency, factory, "please"); - } - BAMFileReader(final SeekableStream strm, final File indexFile, final boolean eagerDecode, @@ -212,7 +187,7 @@ private BAMFileReader(final BlockCompressedInputStream compressedInputStream, final SAMRecordFactory factory) throws IOException { mIndexStream = indexStream; - mIsSeekable = null != indexStream; + mIsSeekable = true; mCompressedInputStream = compressedInputStream; mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream)); this.eagerDecode = eagerDecode; diff --git a/src/main/java/htsjdk/samtools/SamReaderFactory.java b/src/main/java/htsjdk/samtools/SamReaderFactory.java index affc3f75f..2b57cbfb2 100644 --- a/src/main/java/htsjdk/samtools/SamReaderFactory.java +++ b/src/main/java/htsjdk/samtools/SamReaderFactory.java @@ -307,27 +307,26 @@ public SamReader open(final SamInputResource resource) { Math.max(Defaults.BUFFER_SIZE, BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE) ); File sourceFile = data.asFile(); + // calling asFile is safe even if indexMaybe is a Google Cloud Storage bucket + // (in that case we just get null) final File indexFile = indexMaybe == null ? null : indexMaybe.asFile(); if (SamStreams.isBAMFile(bufferedStream)) { if (sourceFile == null || !sourceFile.isFile()) { - // Handle case in which file is a named pipe, e.g. /dev/stdin or created by mkfifo - if (indexFile!=null) { + // check whether we can seek + final SeekableStream indexSeekable = indexMaybe == null ? null : indexMaybe.asUnbufferedSeekableStream(); + // do not close bufferedStream, it's the same stream we're getting here. + SeekableStream sourceSeekable = data.asUnbufferedSeekableStream(); + if (indexFile!=null || null == sourceSeekable || null == indexSeekable) { + // not seekable. + // it's OK that we consumed a bit of the stream already, this ctor expects it. primitiveSamReader = new BAMFileReader(bufferedStream, indexFile, false, asynchronousIO, validationStringency, this.samRecordFactory); } else { - final SeekableStream indexSeekable = indexMaybe == null ? null : indexMaybe.asUnbufferedSeekableStream(); - // do not close bufferedStream, it's the same stream we're getting here. - SeekableStream sourceSeekable = data.asUnbufferedSeekableStream(); - if (null != sourceSeekable && null != indexSeekable) { - // let's make sure the user can seek in this file. - // need to return to the beginning because it's the same stream we used earlier - // and read a bit from. - sourceSeekable.seek(0); - primitiveSamReader = new BAMFileReader( - sourceSeekable, indexSeekable, false, asynchronousIO, validationStringency, this.samRecordFactory); - } else { - primitiveSamReader = new BAMFileReader( - bufferedStream, indexFile, false, asynchronousIO, validationStringency, this.samRecordFactory); - } + // seekable. + // need to return to the beginning because it's the same stream we used earlier + // and read a bit from, and that form of the ctor expects the stream to start at 0. + sourceSeekable.seek(0); + primitiveSamReader = new BAMFileReader( + sourceSeekable, indexSeekable, false, asynchronousIO, validationStringency, this.samRecordFactory); } } else { bufferedStream.close();