From 73845d630afe3f7e8caabe4388f6d816d89789ee Mon Sep 17 00:00:00 2001 From: JP Martin Date: Wed, 14 Dec 2016 17:04:35 -0800 Subject: [PATCH 1/3] Add optional wrapper for the underlying SeekableByteChannel This allows users to provide their own buffering or prefetching, without them having to change htsjdk. --- .../java/htsjdk/samtools/SamInputResource.java | 30 +++++++++++++++++-- .../java/htsjdk/samtools/SamReaderFactory.java | 35 ++++++++++++++++++---- .../seekablestream/SeekablePathStream.java | 11 +++++++ .../java/htsjdk/samtools/SamReaderFactoryTest.java | 3 +- 4 files changed, 70 insertions(+), 9 deletions(-) diff --git a/src/main/java/htsjdk/samtools/SamInputResource.java b/src/main/java/htsjdk/samtools/SamInputResource.java index f25d97bb6..496d1a363 100644 --- a/src/main/java/htsjdk/samtools/SamInputResource.java +++ b/src/main/java/htsjdk/samtools/SamInputResource.java @@ -39,9 +39,11 @@ import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; +import java.nio.channels.SeekableByteChannel; import java.nio.file.FileSystemNotFoundException; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.function.Function; /** * Describes a SAM-like resource, including its data (where the records are), and optionally an index. @@ -89,7 +91,9 @@ public String toString() { public static SamInputResource of(final File file) { return new SamInputResource(new FileInputResource(file)); } /** Creates a {@link SamInputResource} reading from the provided resource, with no index. */ - public static SamInputResource of(final Path path) { return new SamInputResource(new PathInputResource(path)); } + public static SamInputResource of(final Path path, Function wrapper) { + return new SamInputResource(new PathInputResource(path, wrapper)); + } /** Creates a {@link SamInputResource} reading from the provided resource, with no index. */ public static SamInputResource of(final InputStream inputStream) { return new SamInputResource(new InputStreamInputResource(inputStream)); } @@ -121,7 +125,7 @@ public SamInputResource index(final File file) { /** Updates the index to point at the provided resource, then returns itself. */ public SamInputResource index(final Path path) { - this.index = new PathInputResource(path); + this.index = new PathInputResource(path, Function.identity()); return this; } @@ -268,11 +272,12 @@ public SRAAccession asSRAAccession() { class PathInputResource extends InputResource { final Path pathResource; + final Function wrapper; final Lazy lazySeekableStream = new Lazy(new Lazy.LazyInitializer() { @Override public SeekableStream make() { try { - return new SeekablePathStream(pathResource); + return new SeekablePathStream(pathResource, wrapper); } catch (final IOException e) { throw new RuntimeIOException(e); } @@ -281,8 +286,27 @@ public SeekableStream make() { PathInputResource(final Path pathResource) { + this(pathResource, Function.identity()); + } + + PathInputResource(final Path pathResource, Function wrapper) { super(Type.PATH); this.pathResource = pathResource; + this.wrapper = wrapper; + } + + /** Returns a modified PathInputResource with the specific wrapper. After calling this, + * do NOT use the original object anymore. + * + * @param wrapper + * @return wrapped PathInputResource + */ + PathInputResource wrap(Function wrapper) { + if (lazySeekableStream.isInitialized()) { + // otherwise it'll be opened twice and that'll break + throw new IllegalStateException("Only call wrap before opening the resource"); + } + return new PathInputResource(this.pathResource, wrapper); } @Override diff --git a/src/main/java/htsjdk/samtools/SamReaderFactory.java b/src/main/java/htsjdk/samtools/SamReaderFactory.java index 8769f4879..51afea475 100644 --- a/src/main/java/htsjdk/samtools/SamReaderFactory.java +++ b/src/main/java/htsjdk/samtools/SamReaderFactory.java @@ -33,9 +33,11 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.nio.channels.SeekableByteChannel; import java.nio.file.Path; import java.util.Collections; import java.util.EnumSet; +import java.util.function.Function; import java.util.zip.GZIPInputStream; /** @@ -74,11 +76,13 @@ public abstract class SamReaderFactory { private static ValidationStringency defaultValidationStringency = ValidationStringency.DEFAULT_STRINGENCY; - + + protected Function pathWrapper; + abstract public SamReader open(final File file); public SamReader open(final Path path) { - final SamInputResource r = SamInputResource.of(path); + final SamInputResource r = SamInputResource.of(path, getPathWrapper()); final Path indexMaybe = SamFiles.findIndex(path); if (indexMaybe != null) r.index(indexMaybe); return open(r); @@ -102,6 +106,21 @@ public SamReader open(final Path path) { /** Sets a specific Option to a boolean value. * */ abstract public SamReaderFactory setOption(final Option option, boolean value); + /** Sets a wrapper to modify the SeekableByteChannel from an opened Path, e.g. to add + * buffering or prefetching. This only works on Path inputs since we need a SeekableByteChannel. + * + * @param wrapper how to modify the SeekableByteChannel (Function.identity to unset) + * @return this + */ + public SamReaderFactory setPathWrapper(Function wrapper) { + this.pathWrapper = wrapper; + return this; + } + + public Function getPathWrapper() { + return pathWrapper; + } + /** Sets the specified reference sequence * */ abstract public SamReaderFactory referenceSequence(File referenceSequence); @@ -138,8 +157,8 @@ public static SamReaderFactory makeDefault() { } /** - * Creates an "empty" factory with no enabled {@link Option}s, {@link ValidationStringency#DEFAULT_STRINGENCY}, and - * {@link htsjdk.samtools.DefaultSAMRecordFactory}. + * Creates an "empty" factory with no enabled {@link Option}s, {@link ValidationStringency#DEFAULT_STRINGENCY}, + * no path wrapper, and {@link htsjdk.samtools.DefaultSAMRecordFactory}. */ public static SamReaderFactory make() { return new SamReaderFactoryImpl(EnumSet.noneOf(Option.class), ValidationStringency.DEFAULT_STRINGENCY, DefaultSAMRecordFactory.getInstance()); @@ -155,10 +174,15 @@ public static SamReaderFactory make() { private CRAMReferenceSource referenceSource; private SamReaderFactoryImpl(final EnumSet