From 725151914dcc05ba82c64d60362392e34330ce8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Tue, 26 Jul 2016 12:37:50 +0200 Subject: [PATCH 1/5] added getTabixFormat method to FeatureCodec interface to index files with tabix when the format is defined in implementors --- .../java/htsjdk/tribble/AbstractFeatureCodec.java | 9 +++++++ src/main/java/htsjdk/tribble/FeatureCodec.java | 12 +++++++++ src/main/java/htsjdk/tribble/bed/BEDCodec.java | 6 +++++ .../java/htsjdk/tribble/index/IndexFactory.java | 31 +++++++++++++++++++--- .../java/htsjdk/variant/vcf/AbstractVCFCodec.java | 6 +++++ 5 files changed, 61 insertions(+), 3 deletions(-) diff --git a/src/main/java/htsjdk/tribble/AbstractFeatureCodec.java b/src/main/java/htsjdk/tribble/AbstractFeatureCodec.java index a1e2771f7..e0f4e2727 100644 --- a/src/main/java/htsjdk/tribble/AbstractFeatureCodec.java +++ b/src/main/java/htsjdk/tribble/AbstractFeatureCodec.java @@ -23,6 +23,8 @@ */ package htsjdk.tribble; +import htsjdk.tribble.index.tabix.TabixFormat; + import java.io.IOException; /** @@ -47,4 +49,11 @@ public Feature decodeLoc(final SOURCE source) throws IOException { public Class getFeatureType() { return myClass; } + + /** + * Default implementation throws an exception + */ + public TabixFormat getTabixFormat() { + throw new TribbleException(this.getClass().getSimpleName() + "does not have defined tabix format"); + } } diff --git a/src/main/java/htsjdk/tribble/FeatureCodec.java b/src/main/java/htsjdk/tribble/FeatureCodec.java index b45d8cf8c..f527eb0da 100644 --- a/src/main/java/htsjdk/tribble/FeatureCodec.java +++ b/src/main/java/htsjdk/tribble/FeatureCodec.java @@ -19,6 +19,7 @@ package htsjdk.tribble; import htsjdk.samtools.util.LocationAware; +import htsjdk.tribble.index.tabix.TabixFormat; import java.io.IOException; import java.io.InputStream; @@ -119,4 +120,15 @@ * @return true if potentialInput can be parsed, false otherwise */ public boolean canDecode(final String path); + + /** + * Define the tabix format for the feature, used for indexing. + * + * Note that only {@link AsciiFeatureCodec} could read tabix files as defined in + * {@link AbstractFeatureReader#getFeatureReader(String, String, FeatureCodec, boolean)} + * + * @return the format to use with tabix + * @throws TribbleException if the format is not defined + */ + public TabixFormat getTabixFormat(); } diff --git a/src/main/java/htsjdk/tribble/bed/BEDCodec.java b/src/main/java/htsjdk/tribble/bed/BEDCodec.java index 0e9185025..544992afe 100644 --- a/src/main/java/htsjdk/tribble/bed/BEDCodec.java +++ b/src/main/java/htsjdk/tribble/bed/BEDCodec.java @@ -25,6 +25,7 @@ import htsjdk.tribble.AsciiFeatureCodec; import htsjdk.tribble.annotation.Strand; +import htsjdk.tribble.index.tabix.TabixFormat; import htsjdk.tribble.readers.LineIterator; import htsjdk.tribble.util.ParsingUtils; @@ -224,4 +225,9 @@ public int value() { } } + @Override + public TabixFormat getTabixFormat() { + return TabixFormat.BED; + } + } diff --git a/src/main/java/htsjdk/tribble/index/IndexFactory.java b/src/main/java/htsjdk/tribble/index/IndexFactory.java index 85fbd72c7..ee1fee38b 100644 --- a/src/main/java/htsjdk/tribble/index/IndexFactory.java +++ b/src/main/java/htsjdk/tribble/index/IndexFactory.java @@ -260,11 +260,25 @@ public static LinearIndex createLinearIndex(final File inputFile, final FeatureC public static Index createIndex(final File inputFile, final FeatureCodec codec, final IndexType type) { + return createIndex(inputFile, codec, type, null); + } + + /** + * Create a index of the specified type with default binning parameters + * + * @param inputFile the input file to load features from + * @param codec the codec to use for decoding records + * @param type the type of index to create + * @param + */ + public static Index createIndex(final File inputFile, + final FeatureCodec codec, + final IndexType type, + final SAMSequenceDictionary sequenceDictionary) { switch (type) { case INTERVAL_TREE: return createIntervalIndex(inputFile, codec); case LINEAR: return createLinearIndex(inputFile, codec); - // Tabix index initialization requires additional information, so this construction method won't work. - case TABIX: throw new UnsupportedOperationException("Tabix indices cannot be created through a generic interface"); + case TABIX: return createTabixIndex(inputFile, codec, sequenceDictionary); } throw new IllegalArgumentException("Unrecognized IndexType " + type); } @@ -318,7 +332,18 @@ public static void writeIndex(final Index idx, final File idxFile) throws IOExce return (TabixIndex)createIndex(inputFile, new FeatureIterator(inputFile, codec), indexCreator); } - + /** + * @param inputFile The file to be indexed. + * @param codec Mechanism for reading inputFile. + * @param sequenceDictionary May be null, but if present may reduce memory footprint for index creation. Features + * in inputFile must be in the order defined by sequenceDictionary, if it is present. + * + */ + public static TabixIndex createTabixIndex(final File inputFile, + final FeatureCodec codec, + final SAMSequenceDictionary sequenceDictionary) { + return createTabixIndex(inputFile, codec, codec.getTabixFormat(), sequenceDictionary); + } private static Index createIndex(final File inputFile, final FeatureIterator iterator, final IndexCreator creator) { Feature lastFeature = null; diff --git a/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java b/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java index 7b157ca7c..16857b4e6 100644 --- a/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java +++ b/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java @@ -30,6 +30,7 @@ import htsjdk.tribble.Feature; import htsjdk.tribble.NameAwareCodec; import htsjdk.tribble.TribbleException; +import htsjdk.tribble.index.tabix.TabixFormat; import htsjdk.tribble.util.ParsingUtils; import htsjdk.variant.utils.GeneralUtils; import htsjdk.variant.variantcontext.Allele; @@ -782,4 +783,9 @@ protected void generateException(String message) { protected static void generateException(String message, int lineNo) { throw new TribbleException(String.format("The provided VCF file is malformed at approximately line number %d: %s", lineNo, message)); } + + @Override + public TabixFormat getTabixFormat() { + return TabixFormat.VCF; + } } From e241a8e26b0e7915c63154f8b5f1f6d0e4dad812 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Tue, 26 Jul 2016 12:46:38 +0200 Subject: [PATCH 2/5] changed as a default method in FeatureCodec --- src/main/java/htsjdk/tribble/AbstractFeatureCodec.java | 6 ------ src/main/java/htsjdk/tribble/FeatureCodec.java | 6 ++++-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/main/java/htsjdk/tribble/AbstractFeatureCodec.java b/src/main/java/htsjdk/tribble/AbstractFeatureCodec.java index e0f4e2727..899e73014 100644 --- a/src/main/java/htsjdk/tribble/AbstractFeatureCodec.java +++ b/src/main/java/htsjdk/tribble/AbstractFeatureCodec.java @@ -50,10 +50,4 @@ public Feature decodeLoc(final SOURCE source) throws IOException { return myClass; } - /** - * Default implementation throws an exception - */ - public TabixFormat getTabixFormat() { - throw new TribbleException(this.getClass().getSimpleName() + "does not have defined tabix format"); - } } diff --git a/src/main/java/htsjdk/tribble/FeatureCodec.java b/src/main/java/htsjdk/tribble/FeatureCodec.java index f527eb0da..f14191a67 100644 --- a/src/main/java/htsjdk/tribble/FeatureCodec.java +++ b/src/main/java/htsjdk/tribble/FeatureCodec.java @@ -122,7 +122,7 @@ public boolean canDecode(final String path); /** - * Define the tabix format for the feature, used for indexing. + * Define the tabix format for the feature, used for indexing. Default implementation throws an exception. * * Note that only {@link AsciiFeatureCodec} could read tabix files as defined in * {@link AbstractFeatureReader#getFeatureReader(String, String, FeatureCodec, boolean)} @@ -130,5 +130,7 @@ * @return the format to use with tabix * @throws TribbleException if the format is not defined */ - public TabixFormat getTabixFormat(); + default public TabixFormat getTabixFormat() { + throw new TribbleException(this.getClass().getSimpleName() + "does not have defined tabix format"); + } } From afa1838e4d1d739eb40fff9fd19f64d058951f26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Tue, 26 Jul 2016 13:25:09 +0200 Subject: [PATCH 3/5] added javadoc param description --- src/main/java/htsjdk/tribble/index/IndexFactory.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/htsjdk/tribble/index/IndexFactory.java b/src/main/java/htsjdk/tribble/index/IndexFactory.java index ee1fee38b..8ff57aa97 100644 --- a/src/main/java/htsjdk/tribble/index/IndexFactory.java +++ b/src/main/java/htsjdk/tribble/index/IndexFactory.java @@ -269,7 +269,7 @@ public static LinearIndex createLinearIndex(final File inputFile, final FeatureC * @param inputFile the input file to load features from * @param codec the codec to use for decoding records * @param type the type of index to create - * @param + * @param sequenceDictionary May be null, but if present may reduce memory footprint for tabix index creation */ public static Index createIndex(final File inputFile, final FeatureCodec codec, From 0bc504c2ac91845c2999e3ee702d254b98798b4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Tue, 26 Jul 2016 14:34:48 +0200 Subject: [PATCH 4/5] added tests and final getTabixFormat for binary codecs --- src/main/java/htsjdk/tribble/BinaryFeatureCodec.java | 10 ++++++++++ src/test/java/htsjdk/tribble/BinaryFeaturesTest.java | 5 +++++ src/test/java/htsjdk/tribble/bed/BEDCodecTest.java | 6 ++++++ src/test/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java | 8 ++++++++ 4 files changed, 29 insertions(+) diff --git a/src/main/java/htsjdk/tribble/BinaryFeatureCodec.java b/src/main/java/htsjdk/tribble/BinaryFeatureCodec.java index dfe1c9174..a7c2b47d7 100644 --- a/src/main/java/htsjdk/tribble/BinaryFeatureCodec.java +++ b/src/main/java/htsjdk/tribble/BinaryFeatureCodec.java @@ -3,6 +3,7 @@ import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.LocationAware; import htsjdk.samtools.util.RuntimeIOException; +import htsjdk.tribble.index.tabix.TabixFormat; import htsjdk.tribble.readers.PositionalBufferedStream; import java.io.IOException; @@ -40,4 +41,13 @@ public boolean isDone(final PositionalBufferedStream source) { throw new RuntimeIOException("Failure reading from stream.", e); } } + + /** + * Marked as final because binary features could not be tabix indexed + */ + @Override + public final TabixFormat getTabixFormat() { + throw new TribbleException("Binary codecs does not support tabix"); + } + } diff --git a/src/test/java/htsjdk/tribble/BinaryFeaturesTest.java b/src/test/java/htsjdk/tribble/BinaryFeaturesTest.java index 946609725..eff8939d8 100644 --- a/src/test/java/htsjdk/tribble/BinaryFeaturesTest.java +++ b/src/test/java/htsjdk/tribble/BinaryFeaturesTest.java @@ -54,4 +54,9 @@ public void testBinaryCodec(final File source, final FeatureCodec Date: Wed, 10 Aug 2016 12:22:50 +0200 Subject: [PATCH 5/5] addressed comments --- src/main/java/htsjdk/tribble/AbstractFeatureCodec.java | 3 --- src/main/java/htsjdk/tribble/BinaryFeatureCodec.java | 1 - src/main/java/htsjdk/tribble/bed/BEDCodec.java | 1 - src/main/java/htsjdk/tribble/index/IndexFactory.java | 4 ++-- src/test/java/htsjdk/tribble/bed/BEDCodecTest.java | 1 - src/test/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java | 1 - 6 files changed, 2 insertions(+), 9 deletions(-) diff --git a/src/main/java/htsjdk/tribble/AbstractFeatureCodec.java b/src/main/java/htsjdk/tribble/AbstractFeatureCodec.java index 899e73014..a1e2771f7 100644 --- a/src/main/java/htsjdk/tribble/AbstractFeatureCodec.java +++ b/src/main/java/htsjdk/tribble/AbstractFeatureCodec.java @@ -23,8 +23,6 @@ */ package htsjdk.tribble; -import htsjdk.tribble.index.tabix.TabixFormat; - import java.io.IOException; /** @@ -49,5 +47,4 @@ public Feature decodeLoc(final SOURCE source) throws IOException { public Class getFeatureType() { return myClass; } - } diff --git a/src/main/java/htsjdk/tribble/BinaryFeatureCodec.java b/src/main/java/htsjdk/tribble/BinaryFeatureCodec.java index a7c2b47d7..dbd0afc47 100644 --- a/src/main/java/htsjdk/tribble/BinaryFeatureCodec.java +++ b/src/main/java/htsjdk/tribble/BinaryFeatureCodec.java @@ -49,5 +49,4 @@ public boolean isDone(final PositionalBufferedStream source) { public final TabixFormat getTabixFormat() { throw new TribbleException("Binary codecs does not support tabix"); } - } diff --git a/src/main/java/htsjdk/tribble/bed/BEDCodec.java b/src/main/java/htsjdk/tribble/bed/BEDCodec.java index 544992afe..62d202c19 100644 --- a/src/main/java/htsjdk/tribble/bed/BEDCodec.java +++ b/src/main/java/htsjdk/tribble/bed/BEDCodec.java @@ -229,5 +229,4 @@ public int value() { public TabixFormat getTabixFormat() { return TabixFormat.BED; } - } diff --git a/src/main/java/htsjdk/tribble/index/IndexFactory.java b/src/main/java/htsjdk/tribble/index/IndexFactory.java index 8ff57aa97..a588220dc 100644 --- a/src/main/java/htsjdk/tribble/index/IndexFactory.java +++ b/src/main/java/htsjdk/tribble/index/IndexFactory.java @@ -264,7 +264,7 @@ public static LinearIndex createLinearIndex(final File inputFile, final FeatureC } /** - * Create a index of the specified type with default binning parameters + * Create an index of the specified type with default binning parameters * * @param inputFile the input file to load features from * @param codec the codec to use for decoding records @@ -334,7 +334,7 @@ public static void writeIndex(final Index idx, final File idxFile) throws IOExce /** * @param inputFile The file to be indexed. - * @param codec Mechanism for reading inputFile. + * @param codec the codec to use for decoding records * @param sequenceDictionary May be null, but if present may reduce memory footprint for index creation. Features * in inputFile must be in the order defined by sequenceDictionary, if it is present. * diff --git a/src/test/java/htsjdk/tribble/bed/BEDCodecTest.java b/src/test/java/htsjdk/tribble/bed/BEDCodecTest.java index 14b1b61e8..c7b21931c 100644 --- a/src/test/java/htsjdk/tribble/bed/BEDCodecTest.java +++ b/src/test/java/htsjdk/tribble/bed/BEDCodecTest.java @@ -220,7 +220,6 @@ private void createIndex(File testFile, File idxFile) throws IOException { stream.close(); } } - } @Test diff --git a/src/test/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java b/src/test/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java index 93f09f6e0..9f81547ed 100644 --- a/src/test/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java +++ b/src/test/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java @@ -57,5 +57,4 @@ public void testGetTabixFormat() { Assert.assertEquals(new VCFCodec().getTabixFormat(), TabixFormat.VCF); Assert.assertEquals(new VCF3Codec().getTabixFormat(), TabixFormat.VCF); } - }