From d75605ee264937bd5f066f9d9334bc4df02aac69 Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Wed, 1 Feb 2017 17:59:35 -0500 Subject: [PATCH 1/3] supporting NIO wrappers in tribble adding an overload of AbstractFeatureReader.getFeatureReader() that takes wrappers for gcs prefetching this required adding matching overloads in a number of other places removed an unused Field from SamReaderFactor test for both tabix and tribble indexes --- .../java/htsjdk/samtools/SamReaderFactory.java | 2 - .../seekablestream/ISeekableStreamFactory.java | 10 ++ .../seekablestream/SeekableStreamFactory.java | 14 +- .../samtools/util/BlockCompressedInputStream.java | 2 +- .../java/htsjdk/tribble/AbstractFeatureReader.java | 63 +++++--- src/main/java/htsjdk/tribble/FeatureCodec.java | 2 +- .../java/htsjdk/tribble/TabixFeatureReader.java | 41 ++++-- .../tribble/TribbleIndexedFeatureReader.java | 53 +++++-- .../java/htsjdk/tribble/index/IndexFactory.java | 24 +++- .../java/htsjdk/tribble/readers/TabixReader.java | 28 +++- .../java/htsjdk/tribble/util/ParsingUtils.java | 22 ++- .../htsjdk/tribble/AbstractFeatureReaderTest.java | 158 ++++++++++++++++++++- .../tribble/TribbleIndexFeatureReaderTest.java | 2 +- .../baseVariants.mangled.vcf.gz | Bin 0 -> 517 bytes .../baseVariants.mangled.vcf.gz.tbi | Bin 0 -> 155 bytes .../AbstractFeatureReaderTest/baseVariants.vcf | 37 +++++ .../AbstractFeatureReaderTest/baseVariants.vcf.gz | Bin 0 -> 516 bytes .../baseVariants.vcf.gz.tbi | Bin 0 -> 154 bytes .../AbstractFeatureReaderTest/baseVariants.vcf.idx | Bin 0 -> 507 bytes .../mangledBaseVariants.vcf | 37 +++++ .../mangledBaseVariants.vcf.idx | Bin 0 -> 509 bytes 21 files changed, 435 insertions(+), 60 deletions(-) create mode 100644 src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.mangled.vcf.gz create mode 100644 src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.mangled.vcf.gz.tbi create mode 100644 src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf create mode 100644 src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf.gz create mode 100644 src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf.gz.tbi create mode 100644 src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf.idx create mode 100644 src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/mangledBaseVariants.vcf create mode 100644 src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/mangledBaseVariants.vcf.idx diff --git a/src/main/java/htsjdk/samtools/SamReaderFactory.java b/src/main/java/htsjdk/samtools/SamReaderFactory.java index 8f203d5c0..9e135ce70 100644 --- a/src/main/java/htsjdk/samtools/SamReaderFactory.java +++ b/src/main/java/htsjdk/samtools/SamReaderFactory.java @@ -77,8 +77,6 @@ private static ValidationStringency defaultValidationStringency = ValidationStringency.DEFAULT_STRINGENCY; - private Function pathWrapper = Function.identity(); - abstract public SamReader open(final File file); /** diff --git a/src/main/java/htsjdk/samtools/seekablestream/ISeekableStreamFactory.java b/src/main/java/htsjdk/samtools/seekablestream/ISeekableStreamFactory.java index dff28b0a0..8415b27b2 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/ISeekableStreamFactory.java +++ b/src/main/java/htsjdk/samtools/seekablestream/ISeekableStreamFactory.java @@ -2,6 +2,8 @@ import java.io.IOException; import java.net.URL; +import java.nio.channels.SeekableByteChannel; +import java.util.function.Function; /** * Factory for creating {@link SeekableStream}s based on URLs/paths. @@ -30,4 +32,12 @@ * @return */ public SeekableStream getBufferedStream(SeekableStream stream, int bufferSize); + + default SeekableStream getStreamFor(String path, Function wrapper) throws IOException { + if(wrapper != null) { + throw new UnsupportedOperationException("This factory doesn't support adding wrappers"); + } else { + return this.getStreamFor(path); + } + } } diff --git a/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java b/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java index ec8b9526e..77b7b95d7 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java +++ b/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java @@ -27,6 +27,8 @@ import java.io.File; import java.io.IOException; import java.net.URL; +import java.nio.channels.SeekableByteChannel; +import java.util.function.Function; /** * Singleton class for getting {@link SeekableStream}s from URL/paths @@ -65,11 +67,19 @@ public static boolean isFilePath(final String path) { private static class DefaultSeekableStreamFactory implements ISeekableStreamFactory { + @Override public SeekableStream getStreamFor(final URL url) throws IOException { return getStreamFor(url.toExternalForm()); } + @Override public SeekableStream getStreamFor(final String path) throws IOException { + return getStreamFor(path, null); + } + + @Override + public SeekableStream getStreamFor(final String path, + Function wrapper) throws IOException { // todo -- add support for SeekableBlockInputStream if (path.startsWith("http:") || path.startsWith("https:")) { @@ -80,16 +90,18 @@ public SeekableStream getStreamFor(final String path) throws IOException { } else if (path.startsWith("file:")) { return new SeekableFileStream(new File(new URL(path).getPath())); } else if (IOUtil.hasScheme(path)) { - return new SeekablePathStream(IOUtil.getPath(path)); + return new SeekablePathStream(IOUtil.getPath(path), wrapper); } else { return new SeekableFileStream(new File(path)); } } + @Override public SeekableStream getBufferedStream(SeekableStream stream){ return getBufferedStream(stream, SeekableBufferedStream.DEFAULT_BUFFER_SIZE); } + @Override public SeekableStream getBufferedStream(SeekableStream stream, int bufferSize){ if (bufferSize == 0) return stream; else return new SeekableBufferedStream(stream, bufferSize); diff --git a/src/main/java/htsjdk/samtools/util/BlockCompressedInputStream.java b/src/main/java/htsjdk/samtools/util/BlockCompressedInputStream.java index 8b5e922db..a74fab722 100755 --- a/src/main/java/htsjdk/samtools/util/BlockCompressedInputStream.java +++ b/src/main/java/htsjdk/samtools/util/BlockCompressedInputStream.java @@ -407,7 +407,7 @@ private void checkAndRethrowDecompressionException() throws IOException { /** * Attempt to reuse the buffer of the given block * @param block owning block - * @return null decompressiong buffer to resuse, null if no buffer is available + * @return null decompressing buffer to reuse, null if no buffer is available */ private byte[] getBufferForReuse(DecompressedBlock block) { if (block == null) return null; diff --git a/src/main/java/htsjdk/tribble/AbstractFeatureReader.java b/src/main/java/htsjdk/tribble/AbstractFeatureReader.java index 80d9a6c62..545ff560f 100644 --- a/src/main/java/htsjdk/tribble/AbstractFeatureReader.java +++ b/src/main/java/htsjdk/tribble/AbstractFeatureReader.java @@ -25,11 +25,13 @@ import java.io.File; import java.io.IOException; import java.net.URI; +import java.nio.channels.SeekableByteChannel; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.Iterator; import java.util.Set; +import java.util.function.Function; /** * jrobinso @@ -42,6 +44,8 @@ // the path to underlying data source String path; + final Function wrapper; + final Function indexWrapper; // the query source, codec, and header // protected final QuerySource querySource; @@ -60,11 +64,20 @@ } /** - * {@link #getFeatureReader(String, String, FeatureCodec, boolean)} with {@code null} for indexResource + * {@link #getFeatureReader(String, String, FeatureCodec, boolean, Function, Function)} with {@code null} for indexResource, wrapper, and indexWrapper * @throws TribbleException */ public static AbstractFeatureReader getFeatureReader(final String featureResource, final FeatureCodec codec, final boolean requireIndex) throws TribbleException { - return getFeatureReader(featureResource, null, codec, requireIndex); + return getFeatureReader(featureResource, null, codec, requireIndex, null, null); + } + + + /** + * {@link #getFeatureReader(String, String, FeatureCodec, boolean, Function, Function)} with {@code null} for wrapper, and indexWrapper + * @throws TribbleException + */ + public static AbstractFeatureReader getFeatureReader(final String featureResource, String indexResource, final FeatureCodec codec, final boolean requireIndex) throws TribbleException { + return getFeatureReader(featureResource, indexResource, codec, requireIndex, null, null); } /** @@ -73,25 +86,26 @@ * @param indexResource the index for the feature file. If null, will auto-generate (if necessary) * @param codec * @param requireIndex whether an index is required for this file + * @param wrapper + * @param indexWrapper * @return * @throws TribbleException */ - public static AbstractFeatureReader getFeatureReader(final String featureResource, String indexResource, final FeatureCodec codec, final boolean requireIndex) throws TribbleException { - + public static AbstractFeatureReader getFeatureReader(final String featureResource, String indexResource, final FeatureCodec codec, final boolean requireIndex, Function wrapper, Function indexWrapper) throws TribbleException { try { // Test for tabix index if (methods.isTabix(featureResource, indexResource)) { if ( ! (codec instanceof AsciiFeatureCodec) ) throw new TribbleException("Tabix indexed files only work with ASCII codecs, but received non-Ascii codec " + codec.getClass().getSimpleName()); - return new TabixFeatureReader(featureResource, indexResource, (AsciiFeatureCodec) codec); + return new TabixFeatureReader<>(featureResource, indexResource, (AsciiFeatureCodec) codec, wrapper, indexWrapper); } // Not tabix => tribble index file (might be gzipped, but not block gzipped) else { - return new TribbleIndexedFeatureReader(featureResource, indexResource, codec, requireIndex); + return new TribbleIndexedFeatureReader<>(featureResource, indexResource, codec, requireIndex, wrapper, indexWrapper); } - } catch (IOException e) { + } catch (final IOException e) { throw new TribbleException.MalformedFeatureFile("Unable to create BasicFeatureReader using feature file ", featureResource, e); - } catch (TribbleException e) { + } catch (final TribbleException e) { e.setSource(featureResource); throw e; } @@ -108,16 +122,24 @@ */ public static AbstractFeatureReader getFeatureReader(final String featureResource, final FeatureCodec codec, final Index index) throws TribbleException { try { - return new TribbleIndexedFeatureReader(featureResource, codec, index); - } catch (IOException e) { + return new TribbleIndexedFeatureReader<>(featureResource, codec, index); + } catch (final IOException e) { throw new TribbleException.MalformedFeatureFile("Unable to create AbstractFeatureReader using feature file ", featureResource, e); } } protected AbstractFeatureReader(final String path, final FeatureCodec codec) { + this(path, codec, null, null); + } + + protected AbstractFeatureReader(final String path, final FeatureCodec codec, + final Function wrapper, + final Function indexWrapper) { this.path = path; this.codec = codec; + this.wrapper = wrapper; + this.indexWrapper = indexWrapper; } /** @@ -169,25 +191,30 @@ public static boolean hasBlockCompressedExtension (final URI uri) { * * @return the header object we've read-in */ + @Override public Object getHeader() { return header.getHeaderValue(); } static class EmptyIterator implements CloseableTribbleIterator { - public Iterator iterator() { return this; } - public boolean hasNext() { return false; } - public T next() { return null; } - public void remove() { } + @Override public Iterator iterator() { return this; } + @Override public boolean hasNext() { return false; } + @Override public T next() { return null; } + @Override public void remove() { } @Override public void close() { } } + public static boolean isTabix(String resourcePath, String indexPath) throws IOException { + if(indexPath == null){ + indexPath = ParsingUtils.appendToPath(resourcePath, TabixUtils.STANDARD_INDEX_EXTENSION); + } + return hasBlockCompressedExtension(resourcePath) && ParsingUtils.resourceExists(indexPath); + } + public static class ComponentMethods{ public boolean isTabix(String resourcePath, String indexPath) throws IOException{ - if(indexPath == null){ - indexPath = ParsingUtils.appendToPath(resourcePath, TabixUtils.STANDARD_INDEX_EXTENSION); - } - return hasBlockCompressedExtension(resourcePath) && ParsingUtils.resourceExists(indexPath); + return AbstractFeatureReader.isTabix(resourcePath, indexPath); } } } diff --git a/src/main/java/htsjdk/tribble/FeatureCodec.java b/src/main/java/htsjdk/tribble/FeatureCodec.java index f14191a67..7fe6a2a16 100644 --- a/src/main/java/htsjdk/tribble/FeatureCodec.java +++ b/src/main/java/htsjdk/tribble/FeatureCodec.java @@ -125,7 +125,7 @@ * Define the tabix format for the feature, used for indexing. Default implementation throws an exception. * * Note that only {@link AsciiFeatureCodec} could read tabix files as defined in - * {@link AbstractFeatureReader#getFeatureReader(String, String, FeatureCodec, boolean)} + * {@link AbstractFeatureReader#getFeatureReader(String, String, FeatureCodec, boolean, java.util.function.Function, java.util.function.Function)} * * @return the format to use with tabix * @throws TribbleException if the format is not defined diff --git a/src/main/java/htsjdk/tribble/TabixFeatureReader.java b/src/main/java/htsjdk/tribble/TabixFeatureReader.java index 5d90295de..63ebd41c2 100644 --- a/src/main/java/htsjdk/tribble/TabixFeatureReader.java +++ b/src/main/java/htsjdk/tribble/TabixFeatureReader.java @@ -23,6 +23,7 @@ */ package htsjdk.tribble; +import htsjdk.samtools.seekablestream.SeekableStreamFactory; import htsjdk.samtools.util.BlockCompressedInputStream; import htsjdk.samtools.util.RuntimeIOException; import htsjdk.tribble.readers.*; @@ -30,9 +31,11 @@ import java.io.IOException; import java.io.InputStream; +import java.nio.channels.SeekableByteChannel; import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.function.Function; /** * @author Jim Robinson @@ -50,10 +53,7 @@ * @throws IOException */ public TabixFeatureReader(final String featureFile, final AsciiFeatureCodec codec) throws IOException { - super(featureFile, codec); - tabixReader = new TabixReader(featureFile); - sequenceNames = new ArrayList(tabixReader.getChromosomes()); - readHeader(); + this(featureFile, null, codec, null, null); } /** @@ -64,9 +64,23 @@ public TabixFeatureReader(final String featureFile, final AsciiFeatureCodec code * @throws IOException */ public TabixFeatureReader(final String featureFile, final String indexFile, final AsciiFeatureCodec codec) throws IOException { - super(featureFile, codec); - tabixReader = new TabixReader(featureFile, indexFile); - sequenceNames = new ArrayList(tabixReader.getChromosomes()); + this(featureFile, indexFile, codec, null, null); + } + + /** + * + * @param featureFile - path to a feature file. Can be a local file, http url, or ftp url + * @param indexFile - path to the index file. + * @param codec + * @param wrapper + * @param indexWrapper @throws IOException + */ + public TabixFeatureReader(final String featureFile, final String indexFile, final AsciiFeatureCodec codec, + final Function wrapper, + final Function indexWrapper) throws IOException { + super(featureFile, codec, wrapper, indexWrapper); + tabixReader = new TabixReader(featureFile, indexFile, wrapper, indexWrapper); + sequenceNames = new ArrayList<>(tabixReader.getChromosomes()); readHeader(); } @@ -80,7 +94,7 @@ public TabixFeatureReader(final String featureFile, final String indexFile, fina private void readHeader() throws IOException { SOURCE source = null; try { - source = codec.makeSourceFromStream(new PositionalBufferedStream(new BlockCompressedInputStream(ParsingUtils.openInputStream(path)))); + source = codec.makeSourceFromStream(new PositionalBufferedStream(new BlockCompressedInputStream(ParsingUtils.openInputStream(path, wrapper)))); header = codec.readHeader(source); } catch (Exception e) { throw new TribbleException.MalformedFeatureFile("Unable to parse header with error: " + e.getMessage(), path, e); @@ -97,6 +111,7 @@ public boolean hasIndex(){ } + @Override public List getSequenceNames() { return sequenceNames; } @@ -110,6 +125,7 @@ public boolean hasIndex(){ * @return * @throws IOException */ + @Override public CloseableTribbleIterator query(final String chr, final int start, final int end) throws IOException { final List mp = getSequenceNames(); if (mp == null) throw new TribbleException.TabixReaderFailure("Unable to find sequence named " + chr + @@ -121,13 +137,15 @@ public boolean hasIndex(){ return new FeatureIterator(lineReader, start - 1, end); } + @Override public CloseableTribbleIterator iterator() throws IOException { - final InputStream is = new BlockCompressedInputStream(ParsingUtils.openInputStream(path)); + final InputStream is = new BlockCompressedInputStream(ParsingUtils.openInputStream(path, wrapper)); final PositionalBufferedStream stream = new PositionalBufferedStream(is); final LineReader reader = new SynchronousLineReader(stream); return new FeatureIterator(reader, 0, Integer.MAX_VALUE); } + @Override public void close() throws IOException { tabixReader.close(); } @@ -184,10 +202,12 @@ protected void readNextRecord() throws IOException { } + @Override public boolean hasNext() { return currentRecord != null; } + @Override public T next() { T ret = currentRecord; try { @@ -200,14 +220,17 @@ public T next() { } + @Override public void remove() { throw new UnsupportedOperationException("Remove is not supported in Iterators"); } + @Override public void close() { lineReader.close(); } + @Override public Iterator iterator() { return this; } diff --git a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java index f991936e1..e16d605fa 100644 --- a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java +++ b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java @@ -39,9 +39,11 @@ import java.net.URI; import java.net.URISyntaxException; import java.net.URLEncoder; +import java.nio.channels.SeekableByteChannel; import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.function.Function; import java.util.zip.GZIPInputStream; /** @@ -80,8 +82,13 @@ * @throws IOException */ public TribbleIndexedFeatureReader(final String featurePath, final FeatureCodec codec, final boolean requireIndex) throws IOException { + this(featurePath, codec, requireIndex, null, null); + } - super(featurePath, codec); + public TribbleIndexedFeatureReader(final String featurePath, final FeatureCodec codec, final boolean requireIndex, + Function wrapper, + Function indexWrapper) throws IOException { + super(featurePath, codec, wrapper, indexWrapper); if (requireIndex) { this.loadIndex(); @@ -104,9 +111,22 @@ public TribbleIndexedFeatureReader(final String featurePath, final FeatureCodec< * @throws IOException */ public TribbleIndexedFeatureReader(final String featureFile, final String indexFile, final FeatureCodec codec, final boolean requireIndex) throws IOException { - this(featureFile, codec, false); // required to read the header + this(featureFile, indexFile, codec, requireIndex, null, null); + } + + /** + * @param featureFile - path to the feature file, can be a local file path, http url, or ftp url + * @param indexFile - path to the index file + * @param codec - codec to decode the features + * @param requireIndex - true if the reader will be queries for specific ranges. An index (idx) file must exist + * @throws IOException + */ + public TribbleIndexedFeatureReader(final String featureFile, final String indexFile, final FeatureCodec codec, final boolean requireIndex, + Function wrapper, + Function indexWrapper) throws IOException { + this(featureFile, codec, false, wrapper, indexWrapper); // required to read the header if (indexFile != null && ParsingUtils.resourceExists(indexFile)) { - index = IndexFactory.loadIndex(indexFile); + index = IndexFactory.loadIndex(indexFile, indexWrapper); this.needCheckForIndex = false; } else { if (requireIndex) { @@ -118,6 +138,8 @@ public TribbleIndexedFeatureReader(final String featureFile, final String indexF } } + + /** * @param featureFile - path to the feature file, can be a local file path, http url, or ftp url * @param codec - codec to decode the features @@ -139,12 +161,12 @@ public TribbleIndexedFeatureReader(final String featureFile, final FeatureCodec< private void loadIndex() throws IOException{ String indexFile = Tribble.indexFile(this.path); if (ParsingUtils.resourceExists(indexFile)) { - index = IndexFactory.loadIndex(indexFile); + index = IndexFactory.loadIndex(indexFile, indexWrapper); } else { // See if the index itself is gzipped indexFile = ParsingUtils.appendToPath(indexFile, ".gz"); if (ParsingUtils.resourceExists(indexFile)) { - index = IndexFactory.loadIndex(indexFile); + index = IndexFactory.loadIndex(indexFile, indexWrapper); } } this.needCheckForIndex = false; @@ -164,11 +186,11 @@ private SeekableStream getSeekableStream() throws IOException { final SeekableStream result; if (reuseStreamInQuery()) { // if the stream points to an underlying file, only create the underlying seekable stream once - if (seekableStream == null) seekableStream = SeekableStreamFactory.getInstance().getStreamFor(path); + if (seekableStream == null) seekableStream = SeekableStreamFactory.getInstance().getStreamFor(path, wrapper); result = seekableStream; } else { // we are not reusing the stream, so make a fresh copy each time we request it - result = SeekableStreamFactory.getInstance().getStreamFor(path); + result = SeekableStreamFactory.getInstance().getStreamFor(path, wrapper); } return result; @@ -183,6 +205,7 @@ private boolean reuseStreamInQuery() { return pathIsRegularFile; } + @Override public void close() throws IOException { // close the seekable stream if that's necessary if (seekableStream != null) seekableStream.close(); @@ -193,6 +216,7 @@ public void close() throws IOException { * * @return list of strings of the contig names */ + @Override public List getSequenceNames() { return !this.hasIndex() ? new ArrayList() : new ArrayList(index.getSequenceNames()); } @@ -218,7 +242,7 @@ private void readHeader() throws IOException { InputStream is = null; PositionalBufferedStream pbs = null; try { - is = ParsingUtils.openInputStream(path); + is = ParsingUtils.openInputStream(path, wrapper); if (hasBlockCompressedExtension(new URI(URLEncoder.encode(path, "UTF-8")))) { // TODO -- warning I don't think this can work, the buffered input stream screws up position is = new GZIPInputStream(new BufferedInputStream(is)); @@ -252,6 +276,7 @@ private void readHeader() throws IOException { * @return an iterator of records in this interval * @throws IOException */ + @Override public CloseableTribbleIterator query(final String chr, final int start, final int end) throws IOException { if (!this.hasIndex()) { @@ -271,6 +296,7 @@ private void readHeader() throws IOException { * @return Return an iterator to iterate over the entire file * @throws IOException */ + @Override public CloseableTribbleIterator iterator() throws IOException { return new WFIterator(); } @@ -288,7 +314,7 @@ private void readHeader() throws IOException { * @throws IOException */ public WFIterator() throws IOException { - final InputStream inputStream = ParsingUtils.openInputStream(path); + final InputStream inputStream = ParsingUtils.openInputStream(path, wrapper); final PositionalBufferedStream pbs; if (hasBlockCompressedExtension(path)) { @@ -299,9 +325,9 @@ public WFIterator() throws IOException { } else { pbs = new PositionalBufferedStream(inputStream, 512000); } - /** + /* * The header was already read from the original source in the constructor; don't read it again, since some codecs keep state - * about its initializagtion. Instead, skip that part of the stream. + * about its initialization. Instead, skip that part of the stream. */ pbs.skip(header.getHeaderEnd()); source = codec.makeSourceFromStream(pbs); @@ -399,10 +425,12 @@ public QueryIterator(final String chr, final int start, final int end, final Lis } + @Override public boolean hasNext() { return currentRecord != null; } + @Override public T next() { final T ret = currentRecord; try { @@ -486,11 +514,13 @@ private void readNextRecord() throws IOException { } + @Override public void remove() { throw new UnsupportedOperationException("Remove is not supported."); } + @Override public void close() { // Note that this depends on BlockStreamWrapper not actually closing the underlying stream codec.close(source); @@ -504,6 +534,7 @@ public void close() { } } + @Override public Iterator iterator() { return this; } diff --git a/src/main/java/htsjdk/tribble/index/IndexFactory.java b/src/main/java/htsjdk/tribble/index/IndexFactory.java index 4e23e934d..b7af13e54 100644 --- a/src/main/java/htsjdk/tribble/index/IndexFactory.java +++ b/src/main/java/htsjdk/tribble/index/IndexFactory.java @@ -51,9 +51,11 @@ import java.io.IOException; import java.io.InputStream; import java.lang.reflect.Constructor; +import java.nio.channels.SeekableByteChannel; import java.util.HashMap; import java.util.Iterator; import java.util.Map; +import java.util.function.Function; import java.util.zip.GZIPInputStream; /** @@ -90,9 +92,7 @@ public int getDefaultBinSize() { public IndexCreator getIndexCreator() { try { return indexCreatorClass.newInstance(); - } catch ( final InstantiationException e ) { - throw new TribbleException("Couldn't make index creator in " + this, e); - } catch ( final IllegalAccessException e ) { + } catch ( final InstantiationException | IllegalAccessException e ) { throw new TribbleException("Couldn't make index creator in " + this, e); } } @@ -162,11 +162,23 @@ public static IndexType getIndexType(final BufferedInputStream is) { * @param indexFile from which to load the index */ public static Index loadIndex(final String indexFile) { + return loadIndex(indexFile, null); + } + + /** + * Load in index from the specified file. The type of index (LinearIndex or IntervalTreeIndex) is determined + * at run time by reading the type flag in the file. + * + * @param indexFile from which to load the index + */ + public static Index loadIndex(final String indexFile, Function indexWrapper) { // Must be buffered, because getIndexType uses mark and reset - try (BufferedInputStream bufferedInputStream = new BufferedInputStream(indexFileInputStream(indexFile), Defaults.NON_ZERO_BUFFER_SIZE)){ + try (BufferedInputStream bufferedInputStream = new BufferedInputStream(indexFileInputStream(indexFile, indexWrapper), Defaults.NON_ZERO_BUFFER_SIZE)) { final Class indexClass = IndexType.getIndexType(bufferedInputStream).getIndexType(); final Constructor ctor = indexClass.getConstructor(InputStream.class); return ctor.newInstance(bufferedInputStream); + } catch (final TribbleException ex) { + throw ex; } catch (final IOException ex) { throw new TribbleException.UnableToReadIndexFile("Unable to read index file", indexFile, ex); } catch (final Exception ex) { @@ -174,8 +186,8 @@ public static Index loadIndex(final String indexFile) { } } - private static InputStream indexFileInputStream(final String indexFile) throws IOException { - final InputStream inputStreamInitial = ParsingUtils.openInputStream(indexFile); + private static InputStream indexFileInputStream(final String indexFile, Function indexWrapper) throws IOException { + final InputStream inputStreamInitial = ParsingUtils.openInputStream(indexFile, indexWrapper); if (indexFile.endsWith(".gz")) { return new GZIPInputStream(inputStreamInitial); } diff --git a/src/main/java/htsjdk/tribble/readers/TabixReader.java b/src/main/java/htsjdk/tribble/readers/TabixReader.java index 8867d076b..e098e7bf3 100644 --- a/src/main/java/htsjdk/tribble/readers/TabixReader.java +++ b/src/main/java/htsjdk/tribble/readers/TabixReader.java @@ -34,19 +34,22 @@ import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.channels.SeekableByteChannel; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.Set; +import java.util.function.Function; /** * @author Heng Li */ public class TabixReader { - private String mFn; - private String mIdxFn; - private BlockCompressedInputStream mFp; + private final String mFn; + private final String mIdxFn; + private final Function mIdxWrpr; + private final BlockCompressedInputStream mFp; private int mPreset; private int mSc; @@ -111,6 +114,17 @@ public TabixReader(final String fn, final String idxFn) throws IOException { } /** + * @param fn File name of the data file + * @param idxFn Full path to the index file. Auto-generated if null + */ + public TabixReader(final String fn, final String idxFn, + Function wrapper, + Function indexWrapper) throws IOException { + this(fn, idxFn, SeekableStreamFactory.getInstance().getBufferedStream(SeekableStreamFactory.getInstance().getStreamFor(fn, wrapper)), indexWrapper); + } + + + /** * @param fn File name of the data file (used for error messages only) * @param stream Seekable stream from which the data is read */ @@ -124,8 +138,14 @@ public TabixReader(final String fn, SeekableStream stream) throws IOException { * @param stream Seekable stream from which the data is read */ public TabixReader(final String fn, final String idxFn, SeekableStream stream) throws IOException { + this(fn, idxFn, stream, null); + } + + + public TabixReader(final String fn, final String idxFn, SeekableStream stream, Function idxWrpr) throws IOException { mFn = fn; mFp = new BlockCompressedInputStream(stream); + mIdxWrpr = idxWrpr; if(idxFn == null){ mIdxFn = ParsingUtils.appendToPath(fn, TabixUtils.STANDARD_INDEX_EXTENSION); } else { @@ -239,7 +259,7 @@ private void readIndex(SeekableStream fp) throws IOException { */ private void readIndex() throws IOException { ISeekableStreamFactory ssf = SeekableStreamFactory.getInstance(); - readIndex(ssf.getBufferedStream(ssf.getStreamFor(mIdxFn), 128000)); + readIndex(ssf.getBufferedStream(ssf.getStreamFor(mIdxFn, mIdxWrpr), 128000)); } /** diff --git a/src/main/java/htsjdk/tribble/util/ParsingUtils.java b/src/main/java/htsjdk/tribble/util/ParsingUtils.java index 38cf8ab7f..73657ad45 100644 --- a/src/main/java/htsjdk/tribble/util/ParsingUtils.java +++ b/src/main/java/htsjdk/tribble/util/ParsingUtils.java @@ -23,6 +23,7 @@ */ package htsjdk.tribble.util; +import htsjdk.samtools.seekablestream.SeekablePathStream; import htsjdk.samtools.util.IOUtil; import java.awt.Color; import java.io.File; @@ -32,6 +33,7 @@ import java.lang.reflect.Constructor; import java.net.MalformedURLException; import java.net.URL; +import java.nio.channels.SeekableByteChannel; import java.nio.file.Files; import java.util.ArrayList; import java.util.Arrays; @@ -42,6 +44,7 @@ import java.util.List; import java.util.Map; import java.util.WeakHashMap; +import java.util.function.Function; /** * @author jrobinso @@ -80,9 +83,7 @@ public static InputStream openInputStream(String path) throws IOException { - - InputStream inputStream; - + final InputStream inputStream; if (path.startsWith("http:") || path.startsWith("https:") || path.startsWith("ftp:")) { inputStream = getURLHelper(new URL(path)).openInputStream(); } else if (IOUtil.hasScheme(path)) { @@ -95,6 +96,21 @@ public static InputStream openInputStream(String path) return inputStream; } + public static InputStream openInputStream(String path, Function wrapper) + throws IOException { + + final InputStream inputStream; + if (path.startsWith("http:") || path.startsWith("https:") || path.startsWith("ftp:")) { + inputStream = getURLHelper(new URL(path)).openInputStream(); + } else if (IOUtil.hasScheme(path)) { + inputStream = new SeekablePathStream(IOUtil.getPath(path), wrapper); + } else { + File file = new File(path); + inputStream = new FileInputStream(file); + } + return inputStream; + } + //public static String join(String separator, Collection strings) { // return join( separator, strings.toArray(new String[0]) ); //} diff --git a/src/test/java/htsjdk/tribble/AbstractFeatureReaderTest.java b/src/test/java/htsjdk/tribble/AbstractFeatureReaderTest.java index 6d65e9dfc..75b4a6269 100644 --- a/src/test/java/htsjdk/tribble/AbstractFeatureReaderTest.java +++ b/src/test/java/htsjdk/tribble/AbstractFeatureReaderTest.java @@ -1,5 +1,8 @@ package htsjdk.tribble; +import com.google.common.jimfs.Configuration; +import com.google.common.jimfs.Jimfs; +import htsjdk.samtools.FileTruncatedException; import htsjdk.samtools.util.TestUtil; import htsjdk.tribble.bed.BEDCodec; import htsjdk.tribble.bed.BEDFeature; @@ -15,6 +18,10 @@ import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; +import java.nio.ByteBuffer; +import java.nio.channels.SeekableByteChannel; +import java.nio.file.*; +import java.util.function.Function; import static org.testng.Assert.*; @@ -27,6 +34,19 @@ final static String HTTP_INDEXED_VCF_PATH = TestUtil.BASE_URL_FOR_HTTP_TESTS + "ex2.vcf"; final static String LOCAL_MIRROR_HTTP_INDEXED_VCF_PATH = VariantBaseTest.variantTestDataRoot + "ex2.vcf"; + //the "mangled" versions of the files have an extra byte added to the front of the file that makes them invalid + private static final String TEST_PATH = "src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/"; + private static final String MANGLED_VCF = TEST_PATH + "mangledBaseVariants.vcf"; + private static final String MANGLED_VCF_INDEX = TEST_PATH + "mangledBaseVariants.vcf.idx"; + private static final String VCF = TEST_PATH + "baseVariants.vcf"; + private static final String VCF_INDEX = TEST_PATH + "baseVariants.vcf.idx"; + private static final String VCF_TABIX = TEST_PATH + "baseVariants.vcf.gz"; + private static final String VCF_TABIX_INDEX = TEST_PATH + "baseVariants.vcf.gz.tbi"; + private static final String MANGLED_VCF_TABIX = TEST_PATH + "baseVariants.mangled.vcf.gz"; + private static final String MANGLED_VCF_TABIX_INDEX = TEST_PATH + "baseVariants.mangled.vcf.gz.tbi"; + + private static final Function WRAPPER = SkippingByteChannel::new; + /** * Asserts readability and correctness of VCF over HTTP. The VCF is indexed and requires and index. */ @@ -65,12 +85,12 @@ public void testLoadBEDFTP() throws Exception { }; } - @Test(enabled = true, dataProvider = "blockCompressedExtensionExtensionStrings") + @Test(dataProvider = "blockCompressedExtensionExtensionStrings") public void testBlockCompressionExtensionString(final String testString, final boolean expected) { Assert.assertEquals(AbstractFeatureReader.hasBlockCompressedExtension(testString), expected); } - @Test(enabled = true, dataProvider = "blockCompressedExtensionExtensionStrings") + @Test(dataProvider = "blockCompressedExtensionExtensionStrings") public void testBlockCompressionExtensionFile(final String testString, final boolean expected) { Assert.assertEquals(AbstractFeatureReader.hasBlockCompressedExtension(new File(testString)), expected); } @@ -103,10 +123,142 @@ public void testBlockCompressionExtensionFile(final String testString, final boo }; } - @Test(enabled = true, dataProvider = "blockCompressedExtensionExtensionURIStrings") + @Test(dataProvider = "blockCompressedExtensionExtensionURIStrings") public void testBlockCompressionExtension(final String testURIString, final boolean expected) throws URISyntaxException { URI testURI = URI.create(testURIString); Assert.assertEquals(AbstractFeatureReader.hasBlockCompressedExtension(testURI), expected); } + + @DataProvider(name = "vcfFileAndWrapperCombinations") + private static Object[][] vcfFileAndWrapperCombinations(){ + return new Object[][] { + {VCF, VCF_INDEX, null, null}, + {MANGLED_VCF, MANGLED_VCF_INDEX, WRAPPER, WRAPPER}, + {VCF, MANGLED_VCF_INDEX, null, WRAPPER}, + {MANGLED_VCF, VCF_INDEX, WRAPPER, null}, + {MANGLED_VCF_TABIX, MANGLED_VCF_TABIX_INDEX, WRAPPER, WRAPPER}, + {VCF_TABIX, MANGLED_VCF_TABIX_INDEX, null, WRAPPER}, + {MANGLED_VCF_TABIX, VCF_TABIX_INDEX, WRAPPER, null}, + {VCF_TABIX, VCF_TABIX_INDEX, null, null}, + }; + } + + @Test(dataProvider = "vcfFileAndWrapperCombinations") + public void testGetFeatureReaderWithPathAndWrappers(String file, String index, + Function wrapper, + Function indexWrapper) throws IOException, URISyntaxException { + try(FileSystem fs = Jimfs.newFileSystem("test", Configuration.unix())) { + final AbstractFeatureReader featureReader = getFeatureReader(file, index, wrapper, + indexWrapper, + new VCFCodec(), + fs); + Assert.assertTrue(featureReader.hasIndex()); + Assert.assertEquals(featureReader.iterator().toList().size(), 26); + Assert.assertEquals(featureReader.query("1", 190, 210).toList().size(), 3); + Assert.assertEquals(featureReader.query("2", 190, 210).toList().size(), 1); + } + } + + @DataProvider(name = "failsWithoutWrappers") + private static Object[][] failsWithoutWrappers(){ + return new Object[][] { + {MANGLED_VCF, VCF_INDEX, new VCFCodec()}, + {VCF, MANGLED_VCF_INDEX, new VCFCodec()}, + }; + } + + @Test(dataProvider = "failsWithoutWrappers", expectedExceptions = {TribbleException.class, FileTruncatedException.class}) + public void testFailureIfNoWrapper(String file, String index, FeatureCodec codec) throws IOException, URISyntaxException { + try(FileSystem fs = Jimfs.newFileSystem("test", Configuration.unix())) { + getFeatureReader(file, index, null, null, new VCFCodec(), fs); + } + } + + private static AbstractFeatureReader getFeatureReader(String vcf, String index, + Function wrapper, + Function indexWrapper, + FeatureCodec codec, + FileSystem fileSystem) throws IOException, URISyntaxException { + final Path vcfInJimfs = getTribbleFileInJimfs(vcf, index, fileSystem); + return AbstractFeatureReader.getFeatureReader( + vcfInJimfs.toUri().toString(), + null, + codec, + true, + wrapper, + indexWrapper); + } + + /** + * skip the first byte of a SeekableByteChannel + */ + private static class SkippingByteChannel implements SeekableByteChannel{ + private final int toSkip; + private final SeekableByteChannel input; + + private SkippingByteChannel(SeekableByteChannel input) { + this.toSkip = 1; + try { + this.input = input; + input.position(toSkip); + } catch (final IOException e){ + throw new RuntimeException(e); + } + } + + @Override + public boolean isOpen() { + return input.isOpen(); + } + + @Override + public void close() throws IOException { + input.close(); + } + + @Override + public int read(ByteBuffer dst) throws IOException { + return input.read(dst); + } + + @Override + public int write(ByteBuffer src) throws IOException { + throw new UnsupportedOperationException("Read only"); + } + + @Override + public long position() throws IOException { + return input.position() - toSkip; + } + + @Override + public SeekableByteChannel position(long newPosition) throws IOException { + if (newPosition < 0 ){ + throw new RuntimeException("negative position not allowed"); + } + return input.position( newPosition + toSkip); + } + + @Override + public long size() throws IOException { + return input.size() - toSkip; + } + + @Override + public SeekableByteChannel truncate(long size) throws IOException { + return input.truncate(size + toSkip); + } + }; + + private static Path getTribbleFileInJimfs(String vcf, String index, FileSystem fileSystem) throws IOException, URISyntaxException { + final FileSystem fs = fileSystem; + final Path root = fs.getPath("/"); + final Path vcfPath = Paths.get(vcf); + final Path idxPath = Paths.get(index); + final Path idxDestination = Paths.get(AbstractFeatureReader.isTabix(vcf, index) ? Tribble.tabixIndexFile(vcf) : Tribble.indexFile(vcf)); + Files.copy(idxPath, root.resolve(idxDestination.getFileName().toString())); + return Files.copy(vcfPath, root.resolve(vcfPath.getFileName().toString())); + } + } diff --git a/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java b/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java index 2c48c6301..0223d41cd 100644 --- a/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java +++ b/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java @@ -30,7 +30,7 @@ public void testIndexedGZIPVCF(final String testPath, final int expectedCount) throws IOException { final VCFCodec codec = new VCFCodec(); try (final TribbleIndexedFeatureReader featureReader = - new TribbleIndexedFeatureReader(testPath, codec, false)) { + new TribbleIndexedFeatureReader<>(testPath, codec, false)) { final CloseableTribbleIterator localIterator = featureReader.iterator(); int count = 0; for (final Feature feat : featureReader.iterator()) { diff --git a/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.mangled.vcf.gz b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.mangled.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..98f276a1742b76e68957ee55058eb8ced757e47a GIT binary patch literal 517 zcmV+g0{Z=EABzYC000000RIL6LPG)o=K-aX&5qhI6h_yVr?7@?gq&YMZ7M}Dp^+$1 zptCp0g``dFjBV)9r>~(?RYM(Ask_MM9)I@d^!xj&#=VdY)A7?JJp}`=*Y9VGbQ!@``|0XmwolqtikHf~3KEH|AO*I%d`FaMek~}{urn#y z3CRwT6Z4E3|Kf!9bfouxv#}v$)z~?W?9MWvQeO$w(n4SwRrNCeY33$FY2qIV|n+*&uWITo) z%w`7OBF{*w zMK`3-bRc7K3-Ycd3M&b{5L!D4M(}Ly$%81MAg$KWBqnbe +##INFO= +##INFO= +##INFO= +##contig= +##contig= +##contig= +##contig= +##source=SelectVariants +#CHROM POS ID REF ALT QUAL FILTER INFO +1 100 a G A 232.46 PASS . +1 199 b GG G 232.46 PASS . +1 200 c G A 232.46 PASS . +1 203 d GGGG G 232.46 PASS . +1 280 e G A 232.46 PASS . +1 284 f GGG G 232.46 PASS . +1 285 g G A 232.46 PASS . +1 286 h G A 232.46 PASS . +1 999 i G A 232.46 PASS . +1 1000 j G A 232.46 PASS . +1 1000 k GGGG G 232.46 PASS . +1 1076 l G A 232.46 PASS . +1 1150 m G A 232.46 PASS . +1 1176 n G A 232.46 PASS . +2 200 o G A 232.46 PASS . +2 525 p G A 232.46 PASS . +2 548 q GGG G 232.46 PASS . +2 640 r G A 232.46 PASS . +2 700 s G A 232.46 PASS . +3 1 t G A 232.46 PASS . +3 300 u G A 232.46 PASS . +3 300 v GGGG G 232.46 PASS . +3 400 w G A 232.46 PASS . +4 600 x G A 232.46 PASS . +4 775 y G A 232.46 PASS . +4 776 z GGGG G 232.46 PASS . diff --git a/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf.gz b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..03ad18b9c5af02edd38e12280dd31dc6cc735407 GIT binary patch literal 516 zcmV+f0{i_RiwFb&00000{{{d;LjnNj0i}}7j@mF3M%S08u!e1foL@j~Dn&4%ktk50 zvp30wq)qIMZRpUauc1>_LmgD9yU6DrfA;6}`}?ZKy^syl@zW$d1p}|w?`Mm28Nbh_ zaWZih$7Y8zPMqzz#W>>{4=9}}s$5pBt^|)ClDftkNiL3DlZulAa-q$%a){ZQAj?aV z%(sMH=bX!kE|DnKAQ7@aNthyar8E`~AKt{LH}PpLn6`$e5}%LA^YedaE`Ddacdon@ zn$?6~2Z`9f#;8k7e)%r`l<;=@>FQs$Pufs@leb#=JLxlWDzpv%~eD5W>hy9fPyF!&29ag;g68`DCrpO~x|*Q(c_ zd|WR-!D_jIYzphg6wI5z=P$_|(rmtcT*IZ9o(nFez+eUmc)m9XB3LDx4Gb=1Jcb?2 zW(M9O&q%pd@~97mk?h)t0@}4v0DI`NLO66-5tLRkHfOKww=%WC*KYK!>+mieMo`;D zH>A*XAY*Y0@~$NcD+#?2T003w@NDhLgD9XNt=7;aDGTwz1#MmTjdAQGCwoUe1O|S$ zXaShN6<(}lI1J(3K_d8JkNOFu8g&>y1pojaiwFb&00000{{{d;LjnLB00RI300000 G0001{IqPl! literal 0 HcmV?d00001 diff --git a/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf.gz.tbi b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..3441492b698a9be1194468842960f0d7737ca2f4 GIT binary patch literal 154 zcmb2|=3rp}f&Xj_PR>jWwG7;apHfm16A}`blH4?SnN9`FV(#FT?2+h^^f^#8&q<*4 zZ9r?nS%(LvoQuR`=kzm*D!g_z<6pK<^sG%uVVKUdgI5%0vj_j@198Q4o^4Gsd@mdG mklQWg^M#``H_rIZcUFjj=kdvZC5#LV@@Te8Gcbc410n#JhcVp% literal 0 HcmV?d00001 diff --git a/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf.idx b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/baseVariants.vcf.idx new file mode 100644 index 0000000000000000000000000000000000000000..6d5e546e29c1b5361724d7589fce1c8864bf4911 GIT binary patch literal 507 zcmZWmOHRWu5S{W7thfoaosZqJK~WQ+5)D#GTXs>Bni2>JvYp-laTadFl2dR7Y>1-@ zMw+qT%=5EnoJ_;J0RZf4M+?b?M8$KBUPSZA3nkG5ud1BaYc7x!1yWopB)Du^QE-VG zu|c_LvUOQYRjO9;-2=Sdlr=}Qwo>IX6J=JbRaQM|sXUXs%gEujxQFvYeZRKP(|5Eo z?=g0)J)BOGGXjKSjDLZ%3*24cb>JcjlEpkqucP=fxk_hI5Q95pE@pJ>bdk95hjyEU zX&5a54;f=V!-S1T)W?*Jz$m1az6|EmAf7~;NU=wW?~ZKjB_}tFI01*kfo_4o#}R;O u${=GW+TZqmwlL_DPR}x5Va$xgR$n`o+q0~eS#9;JOM3rUM$=a9tiJ)ydS_h# literal 0 HcmV?d00001 diff --git a/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/mangledBaseVariants.vcf b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/mangledBaseVariants.vcf new file mode 100644 index 000000000..8a6df0d2e --- /dev/null +++ b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/mangledBaseVariants.vcf @@ -0,0 +1,37 @@ +!##fileformat=VCFv4.2 +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##contig= +##contig= +##contig= +##source=SelectVariants +#CHROM POS ID REF ALT QUAL FILTER INFO +1 100 a G A 232.46 PASS . +1 199 b GG G 232.46 PASS . +1 200 c G A 232.46 PASS . +1 203 d GGGG G 232.46 PASS . +1 280 e G A 232.46 PASS . +1 284 f GGG G 232.46 PASS . +1 285 g G A 232.46 PASS . +1 286 h G A 232.46 PASS . +1 999 i G A 232.46 PASS . +1 1000 j G A 232.46 PASS . +1 1000 k GGGG G 232.46 PASS . +1 1076 l G A 232.46 PASS . +1 1150 m G A 232.46 PASS . +1 1176 n G A 232.46 PASS . +2 200 o G A 232.46 PASS . +2 525 p G A 232.46 PASS . +2 548 q GGG G 232.46 PASS . +2 640 r G A 232.46 PASS . +2 700 s G A 232.46 PASS . +3 1 t G A 232.46 PASS . +3 300 u G A 232.46 PASS . +3 300 v GGGG G 232.46 PASS . +3 400 w G A 232.46 PASS . +4 600 x G A 232.46 PASS . +4 775 y G A 232.46 PASS . +4 776 z GGGG G 232.46 PASS . diff --git a/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/mangledBaseVariants.vcf.idx b/src/test/resources/htsjdk/tribble/AbstractFeatureReaderTest/mangledBaseVariants.vcf.idx new file mode 100644 index 0000000000000000000000000000000000000000..4a20ddcf88650308f2369f05af477693d26addd7 GIT binary patch literal 509 zcmZWmOHRWu5S{W7hy{1x0MvHU+ASLtC81QJK`Lp>E=p2U0wF=R!wnE;;WjKe1!us9 zxQbw;8T-vVKYPX}@ie&Y0l=Ch_iOt+ zeMc+v9%F~vgQ*{%5g-&}{0p2eaJyj8f{W0L7xOT=3ZqGUnan~j0=Lgx%;?x@BXQvm z?IsSAAY20OGscDt6E+^vA*N&mMj^5Er8l2?ksoRz#RE!)?#RZiq<_7LVsI$z=@tll y8~~W63^I1C{cY!G3%xe!Y+2?jjG2+x>TAn#w=An=R$KjQlY@UOqiL&l)<@rWj%SDf literal 0 HcmV?d00001 From 47b79d22135bb2cce60bf0992b76976345e13476 Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Fri, 3 Feb 2017 14:50:26 -0500 Subject: [PATCH 2/3] responding to comments --- .../seekablestream/ISeekableStreamFactory.java | 12 ++++ .../seekablestream/SeekableStreamFactory.java | 8 +++ .../java/htsjdk/tribble/AbstractFeatureReader.java | 14 +++-- .../java/htsjdk/tribble/TabixFeatureReader.java | 12 ++-- .../tribble/TribbleIndexedFeatureReader.java | 3 +- .../java/htsjdk/tribble/index/IndexFactory.java | 2 + .../java/htsjdk/tribble/readers/TabixReader.java | 71 ++++++++++++---------- .../java/htsjdk/tribble/util/ParsingUtils.java | 26 ++++---- .../htsjdk/tribble/AbstractFeatureReaderTest.java | 32 ++++++---- 9 files changed, 114 insertions(+), 66 deletions(-) diff --git a/src/main/java/htsjdk/samtools/seekablestream/ISeekableStreamFactory.java b/src/main/java/htsjdk/samtools/seekablestream/ISeekableStreamFactory.java index 8415b27b2..71807cf46 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/ISeekableStreamFactory.java +++ b/src/main/java/htsjdk/samtools/seekablestream/ISeekableStreamFactory.java @@ -33,6 +33,18 @@ */ public SeekableStream getBufferedStream(SeekableStream stream, int bufferSize); + /** + * Open a stream from the input path, applying the wrapper to the stream. + * + * The wrapper allows applying operations directly to the byte stream so that things like caching, prefetching, or decryption + * can be done at the raw byte level. + * + * The default implementation throws if wrapper != null, but implementations may support this wrapping operation + * + * @param path a uri like String representing a resource to open + * @param wrapper a wrapper to apply to the stream + * @return a stream opened path + */ default SeekableStream getStreamFor(String path, Function wrapper) throws IOException { if(wrapper != null) { throw new UnsupportedOperationException("This factory doesn't support adding wrappers"); diff --git a/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java b/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java index 77b7b95d7..19d40e5e3 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java +++ b/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java @@ -77,6 +77,14 @@ public SeekableStream getStreamFor(final String path) throws IOException { return getStreamFor(path, null); } + /** + * The wrapper will only be applied to the stream if the stream is treated as a {@link java.nio.file.Path} + * + * This currently means any uri with a scheme that is not http, https, ftp, or file will have the wrapper applied to it + * + * @param path a uri like String representing a resource to open + * @param wrapper a wrapper to apply to the stream allowing direct transformations on the byte stream to be applied + */ @Override public SeekableStream getStreamFor(final String path, Function wrapper) throws IOException { diff --git a/src/main/java/htsjdk/tribble/AbstractFeatureReader.java b/src/main/java/htsjdk/tribble/AbstractFeatureReader.java index 545ff560f..d65783fee 100644 --- a/src/main/java/htsjdk/tribble/AbstractFeatureReader.java +++ b/src/main/java/htsjdk/tribble/AbstractFeatureReader.java @@ -44,7 +44,10 @@ // the path to underlying data source String path; + + // a wrapper to apply to the raw stream of the Feature file to allow features like prefetching and caching to be injected final Function wrapper; + // a wrapper to apply to the raw stream of the index file final Function indexWrapper; // the query source, codec, and header @@ -84,11 +87,14 @@ * * @param featureResource the feature file to create from * @param indexResource the index for the feature file. If null, will auto-generate (if necessary) - * @param codec + * @param codec the codec to use to decode the individual features * @param requireIndex whether an index is required for this file - * @param wrapper - * @param indexWrapper - * @return + * @param wrapper a wrapper to apply to the byte stream from the featureResource allowing injecting features + * like caching and prefetching of the stream, may be null, will only be applied if featureResource + * is a uri representing a {@link java.nio.file.Path} + * @param indexWrapper a wrapper to apply to the byte stream from the indexResource, may be null, will only be + * applied if indexResource is a uri representing a {@link java.nio.file.Path} + * * @throws TribbleException */ public static AbstractFeatureReader getFeatureReader(final String featureResource, String indexResource, final FeatureCodec codec, final boolean requireIndex, Function wrapper, Function indexWrapper) throws TribbleException { diff --git a/src/main/java/htsjdk/tribble/TabixFeatureReader.java b/src/main/java/htsjdk/tribble/TabixFeatureReader.java index 63ebd41c2..889fdc22b 100644 --- a/src/main/java/htsjdk/tribble/TabixFeatureReader.java +++ b/src/main/java/htsjdk/tribble/TabixFeatureReader.java @@ -69,11 +69,13 @@ public TabixFeatureReader(final String featureFile, final String indexFile, fina /** * - * @param featureFile - path to a feature file. Can be a local file, http url, or ftp url - * @param indexFile - path to the index file. - * @param codec - * @param wrapper - * @param indexWrapper @throws IOException + * @param featureFile path to a feature file. Can be a local file, http url, or ftp url + * @param indexFile path to the index file. + * @param wrapper a wrapper to apply to the byte stream from the featureResource allowing injecting features + * like caching and prefetching of the stream, may be null, will only be applied if featureFile + * is a uri representing a {@link java.nio.file.Path} + * @param indexWrapper a wrapper to apply to the byte stream from the indexResource, may be null, will only be + * applied if indexFile is a uri representing a {@link java.nio.file.Path} */ public TabixFeatureReader(final String featureFile, final String indexFile, final AsciiFeatureCodec codec, final Function wrapper, diff --git a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java index e16d605fa..365cc281a 100644 --- a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java +++ b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java @@ -115,7 +115,8 @@ public TribbleIndexedFeatureReader(final String featureFile, final String indexF } /** - * @param featureFile - path to the feature file, can be a local file path, http url, or ftp url + * @param featureFile - path to the feature file, can be a local file path, http url, or ftp url, or any other + * uri supported by a {@link java.nio.file.Path} plugin * @param indexFile - path to the index file * @param codec - codec to decode the features * @param requireIndex - true if the reader will be queries for specific ranges. An index (idx) file must exist diff --git a/src/main/java/htsjdk/tribble/index/IndexFactory.java b/src/main/java/htsjdk/tribble/index/IndexFactory.java index b7af13e54..f53d9a82d 100644 --- a/src/main/java/htsjdk/tribble/index/IndexFactory.java +++ b/src/main/java/htsjdk/tribble/index/IndexFactory.java @@ -170,6 +170,8 @@ public static Index loadIndex(final String indexFile) { * at run time by reading the type flag in the file. * * @param indexFile from which to load the index + * @param indexWrapper a wrapper to apply to the raw byte stream of the index file, only applied to uri's loaded as + * {@link java.nio.file.Path} */ public static Index loadIndex(final String indexFile, Function indexWrapper) { // Must be buffered, because getIndexType uses mark and reset diff --git a/src/main/java/htsjdk/tribble/readers/TabixReader.java b/src/main/java/htsjdk/tribble/readers/TabixReader.java index e098e7bf3..b4882e543 100644 --- a/src/main/java/htsjdk/tribble/readers/TabixReader.java +++ b/src/main/java/htsjdk/tribble/readers/TabixReader.java @@ -46,9 +46,9 @@ * @author Heng Li */ public class TabixReader { - private final String mFn; - private final String mIdxFn; - private final Function mIdxWrpr; + private final String mFilePath; + private final String mIndexPath; + private final Function mIndexWrapper; private final BlockCompressedInputStream mFp; private int mPreset; @@ -99,57 +99,64 @@ private static boolean less64(final long u, final long v) { // unsigned 64-bit c } /** - * @param fn File name of the data file + * @param filePath path to the data file/uri */ - public TabixReader(final String fn) throws IOException { - this(fn, null, SeekableStreamFactory.getInstance().getBufferedStream(SeekableStreamFactory.getInstance().getStreamFor(fn))); + public TabixReader(final String filePath) throws IOException { + this(filePath, null, SeekableStreamFactory.getInstance().getBufferedStream(SeekableStreamFactory.getInstance().getStreamFor(filePath))); } /** - * @param fn File name of the data file - * @param idxFn Full path to the index file. Auto-generated if null + * @param filePath path to the of the data file/uri + * @param indexPath Full path to the index file. Auto-generated if null */ - public TabixReader(final String fn, final String idxFn) throws IOException { - this(fn, idxFn, SeekableStreamFactory.getInstance().getBufferedStream(SeekableStreamFactory.getInstance().getStreamFor(fn))); + public TabixReader(final String filePath, final String indexPath) throws IOException { + this(filePath, indexPath, SeekableStreamFactory.getInstance().getBufferedStream(SeekableStreamFactory.getInstance().getStreamFor(filePath))); } /** - * @param fn File name of the data file - * @param idxFn Full path to the index file. Auto-generated if null + * @param filePath path to the data file/uri + * @param indexPath Full path to the index file. Auto-generated if null + * @param wrapper a wrapper to apply to the raw byte stream of the data file if is a uri representing a {@link java.nio.file.Path} + * @param indexWrapper a wrapper to apply to the raw byte stream of the index file if it is a uri representing a {@link java.nio.file.Path} */ - public TabixReader(final String fn, final String idxFn, - Function wrapper, - Function indexWrapper) throws IOException { - this(fn, idxFn, SeekableStreamFactory.getInstance().getBufferedStream(SeekableStreamFactory.getInstance().getStreamFor(fn, wrapper)), indexWrapper); + public TabixReader(final String filePath, final String indexPath, + final Function wrapper, + final Function indexWrapper) throws IOException { + this(filePath, indexPath, SeekableStreamFactory.getInstance().getBufferedStream(SeekableStreamFactory.getInstance().getStreamFor(filePath, wrapper)), indexWrapper); } /** - * @param fn File name of the data file (used for error messages only) + * @param filePath Path to the data file (used for error messages only) * @param stream Seekable stream from which the data is read */ - public TabixReader(final String fn, SeekableStream stream) throws IOException { - this(fn, null, stream); + public TabixReader(final String filePath, SeekableStream stream) throws IOException { + this(filePath, null, stream); } /** - * @param fn File name of the data file (used for error messages only) - * @param idxFn Full path to the index file. Auto-generated if null + * @param filePath Path to the data file (used for error messages only) + * @param indexPath Full path to the index file. Auto-generated if null * @param stream Seekable stream from which the data is read */ - public TabixReader(final String fn, final String idxFn, SeekableStream stream) throws IOException { - this(fn, idxFn, stream, null); + public TabixReader(final String filePath, final String indexPath, SeekableStream stream) throws IOException { + this(filePath, indexPath, stream, null); } - - public TabixReader(final String fn, final String idxFn, SeekableStream stream, Function idxWrpr) throws IOException { - mFn = fn; + /** + * @param filePath Path to the data file (used for error messages only) + * @param indexPath Full path to the index file. Auto-generated if null + * @param indexWrapper a wrapper to apply to the raw byte stream of the index file if it is a uri representing a {@link java.nio.file.Path} + * @param stream Seekable stream from which the data is read + */ + public TabixReader(final String filePath, final String indexPath, SeekableStream stream, Function indexWrapper) throws IOException { + mFilePath = filePath; mFp = new BlockCompressedInputStream(stream); - mIdxWrpr = idxWrpr; - if(idxFn == null){ - mIdxFn = ParsingUtils.appendToPath(fn, TabixUtils.STANDARD_INDEX_EXTENSION); + mIndexWrapper = indexWrapper; + if(indexPath == null){ + mIndexPath = ParsingUtils.appendToPath(filePath, TabixUtils.STANDARD_INDEX_EXTENSION); } else { - mIdxFn = idxFn; + mIndexPath = indexPath; } readIndex(); } @@ -157,7 +164,7 @@ public TabixReader(final String fn, final String idxFn, SeekableStream stream, F /** return the source (filename/URL) of that reader */ public String getSource() { - return this.mFn; + return this.mFilePath; } private static int reg2bins(final int beg, final int _end, final int[] list) { @@ -259,7 +266,7 @@ private void readIndex(SeekableStream fp) throws IOException { */ private void readIndex() throws IOException { ISeekableStreamFactory ssf = SeekableStreamFactory.getInstance(); - readIndex(ssf.getBufferedStream(ssf.getStreamFor(mIdxFn, mIdxWrpr), 128000)); + readIndex(ssf.getBufferedStream(ssf.getStreamFor(mIndexPath, mIndexWrapper), 128000)); } /** diff --git a/src/main/java/htsjdk/tribble/util/ParsingUtils.java b/src/main/java/htsjdk/tribble/util/ParsingUtils.java index 73657ad45..70c3a3d1d 100644 --- a/src/main/java/htsjdk/tribble/util/ParsingUtils.java +++ b/src/main/java/htsjdk/tribble/util/ParsingUtils.java @@ -81,21 +81,25 @@ } + /** + * @return an input stream from the given path + * @throws IOException + */ public static InputStream openInputStream(String path) throws IOException { - final InputStream inputStream; - if (path.startsWith("http:") || path.startsWith("https:") || path.startsWith("ftp:")) { - inputStream = getURLHelper(new URL(path)).openInputStream(); - } else if (IOUtil.hasScheme(path)) { - inputStream = Files.newInputStream(IOUtil.getPath(path)); - } else { - File file = new File(path); - inputStream = new FileInputStream(file); - } - - return inputStream; + return openInputStream(path, null); } + /** + * open an input stream from the given path and wrap the raw byte stream with a wrapper if given + * + * the wrapper will only be applied to paths that are not http, https, ftp, or file, i.e. any {@link java.nio.file.Path} + * using a custom filesystem plugin + * @param path a uri like string + * @param wrapper to wrap the input stream in, may be used to implement caching or prefetching, etc + * @return + * @throws IOException + */ public static InputStream openInputStream(String path, Function wrapper) throws IOException { diff --git a/src/test/java/htsjdk/tribble/AbstractFeatureReaderTest.java b/src/test/java/htsjdk/tribble/AbstractFeatureReaderTest.java index 75b4a6269..da0f84301 100644 --- a/src/test/java/htsjdk/tribble/AbstractFeatureReaderTest.java +++ b/src/test/java/htsjdk/tribble/AbstractFeatureReaderTest.java @@ -40,11 +40,12 @@ private static final String MANGLED_VCF_INDEX = TEST_PATH + "mangledBaseVariants.vcf.idx"; private static final String VCF = TEST_PATH + "baseVariants.vcf"; private static final String VCF_INDEX = TEST_PATH + "baseVariants.vcf.idx"; - private static final String VCF_TABIX = TEST_PATH + "baseVariants.vcf.gz"; + private static final String VCF_TABIX_BLOCK_GZIPPED = TEST_PATH + "baseVariants.vcf.gz"; private static final String VCF_TABIX_INDEX = TEST_PATH + "baseVariants.vcf.gz.tbi"; - private static final String MANGLED_VCF_TABIX = TEST_PATH + "baseVariants.mangled.vcf.gz"; + private static final String MANGLED_VCF_TABIX_BLOCK_GZIPPED = TEST_PATH + "baseVariants.mangled.vcf.gz"; private static final String MANGLED_VCF_TABIX_INDEX = TEST_PATH + "baseVariants.mangled.vcf.gz.tbi"; + //wrapper which skips the first byte of a file and leaves the rest unchanged private static final Function WRAPPER = SkippingByteChannel::new; /** @@ -137,10 +138,10 @@ public void testBlockCompressionExtension(final String testURIString, final bool {MANGLED_VCF, MANGLED_VCF_INDEX, WRAPPER, WRAPPER}, {VCF, MANGLED_VCF_INDEX, null, WRAPPER}, {MANGLED_VCF, VCF_INDEX, WRAPPER, null}, - {MANGLED_VCF_TABIX, MANGLED_VCF_TABIX_INDEX, WRAPPER, WRAPPER}, - {VCF_TABIX, MANGLED_VCF_TABIX_INDEX, null, WRAPPER}, - {MANGLED_VCF_TABIX, VCF_TABIX_INDEX, WRAPPER, null}, - {VCF_TABIX, VCF_TABIX_INDEX, null, null}, + {MANGLED_VCF_TABIX_BLOCK_GZIPPED, MANGLED_VCF_TABIX_INDEX, WRAPPER, WRAPPER}, + {VCF_TABIX_BLOCK_GZIPPED, MANGLED_VCF_TABIX_INDEX, null, WRAPPER}, + {MANGLED_VCF_TABIX_BLOCK_GZIPPED, VCF_TABIX_INDEX, WRAPPER, null}, + {VCF_TABIX_BLOCK_GZIPPED, VCF_TABIX_INDEX, null, null}, }; } @@ -148,11 +149,11 @@ public void testBlockCompressionExtension(final String testURIString, final bool public void testGetFeatureReaderWithPathAndWrappers(String file, String index, Function wrapper, Function indexWrapper) throws IOException, URISyntaxException { - try(FileSystem fs = Jimfs.newFileSystem("test", Configuration.unix())) { + try(FileSystem fs = Jimfs.newFileSystem("test", Configuration.unix()); final AbstractFeatureReader featureReader = getFeatureReader(file, index, wrapper, indexWrapper, new VCFCodec(), - fs); + fs)){ Assert.assertTrue(featureReader.hasIndex()); Assert.assertEquals(featureReader.iterator().toList().size(), 26); Assert.assertEquals(featureReader.query("1", 190, 210).toList().size(), 3); @@ -163,15 +164,20 @@ public void testGetFeatureReaderWithPathAndWrappers(String file, String index, @DataProvider(name = "failsWithoutWrappers") private static Object[][] failsWithoutWrappers(){ return new Object[][] { - {MANGLED_VCF, VCF_INDEX, new VCFCodec()}, - {VCF, MANGLED_VCF_INDEX, new VCFCodec()}, + {MANGLED_VCF, MANGLED_VCF_INDEX}, + {VCF, MANGLED_VCF_INDEX}, + {MANGLED_VCF, VCF_INDEX}, + {MANGLED_VCF_TABIX_BLOCK_GZIPPED, MANGLED_VCF_TABIX_INDEX}, + {VCF_TABIX_BLOCK_GZIPPED, MANGLED_VCF_TABIX_INDEX}, + {MANGLED_VCF_TABIX_BLOCK_GZIPPED, VCF_TABIX_INDEX}, }; } @Test(dataProvider = "failsWithoutWrappers", expectedExceptions = {TribbleException.class, FileTruncatedException.class}) - public void testFailureIfNoWrapper(String file, String index, FeatureCodec codec) throws IOException, URISyntaxException { - try(FileSystem fs = Jimfs.newFileSystem("test", Configuration.unix())) { - getFeatureReader(file, index, null, null, new VCFCodec(), fs); + public void testFailureIfNoWrapper(String file, String index) throws IOException, URISyntaxException { + try(final FileSystem fs = Jimfs.newFileSystem("test", Configuration.unix()); + final FeatureReader reader = getFeatureReader(file, index, null, null, new VCFCodec(), fs)){ + // should have exploded by now } } From 05692a69ab21c9a07557c46316d95b2bfed2d5a4 Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Fri, 3 Feb 2017 15:27:08 -0500 Subject: [PATCH 3/3] updating BCF2Codec.canDecode() to deal with NIO but not wrappers --- src/main/java/htsjdk/variant/bcf2/BCF2Codec.java | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java b/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java index a9457643f..4926c80fe 100644 --- a/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java +++ b/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java @@ -25,6 +25,7 @@ package htsjdk.variant.bcf2; +import htsjdk.samtools.util.IOUtil; import htsjdk.tribble.BinaryFeatureCodec; import htsjdk.tribble.Feature; import htsjdk.tribble.FeatureCodecHeader; @@ -44,10 +45,8 @@ import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderLineType; -import java.io.ByteArrayInputStream; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; +import java.io.*; +import java.nio.file.Files; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -207,21 +206,11 @@ public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream @Override public boolean canDecode( final String path ) { - FileInputStream fis = null; - try { - fis = new FileInputStream(path); + try (InputStream fis = Files.newInputStream(IOUtil.getPath(path)) ){ final BCFVersion version = BCFVersion.readBCFVersion(fis); return version != null && version.getMajorVersion() == ALLOWED_MAJOR_VERSION; - } catch ( FileNotFoundException e ) { - return false; - } catch ( IOException e ) { + } catch ( final IOException e ) { return false; - } finally { - try { - if ( fis != null ) fis.close(); - } catch ( IOException e ) { - // do nothing - } } }