From 7fae80cf63d6566b978bf01ce2eb79c038df9b08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Wed, 27 Jul 2016 15:30:12 +0200 Subject: [PATCH 1/8] added checking for AbstractFeatureReader.BLOCK_COMPRESSED_EXTENSIONS in TribbleIndexedFeatureReader --- src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java index ae278f40a..1e2f35ce0 100644 --- a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java +++ b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java @@ -217,7 +217,7 @@ private void readHeader() throws IOException { PositionalBufferedStream pbs = null; try { is = ParsingUtils.openInputStream(path); - if (isGZIPPath(path)) { + if (hasBlockCompressedExtension(path) || hasBlockCompressedExtension(new URI(path))) { // TODO -- warning I don't think this can work, the buffered input stream screws up position is = new GZIPInputStream(new BufferedInputStream(is)); } From b8390eebbacd200b68d4252bed6509499d9bb150 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Wed, 10 Aug 2016 12:02:31 +0200 Subject: [PATCH 2/8] addressing comment --- src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java index 1e2f35ce0..1bc5646b0 100644 --- a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java +++ b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java @@ -217,7 +217,7 @@ private void readHeader() throws IOException { PositionalBufferedStream pbs = null; try { is = ParsingUtils.openInputStream(path); - if (hasBlockCompressedExtension(path) || hasBlockCompressedExtension(new URI(path))) { + if (hasBlockCompressedExtension(new URI(path))) { // TODO -- warning I don't think this can work, the buffered input stream screws up position is = new GZIPInputStream(new BufferedInputStream(is)); } From 2bd96da1fca56e39054aacbd551e11d46cf08de4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Wed, 10 Aug 2016 12:04:07 +0200 Subject: [PATCH 3/8] fixing WFIterator checking of compressed file --- src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java index 1bc5646b0..aaedb71e3 100644 --- a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java +++ b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java @@ -310,7 +310,7 @@ public WFIterator() throws IOException { final InputStream inputStream = ParsingUtils.openInputStream(path); final PositionalBufferedStream pbs; - if (isGZIPPath(path)) { + if (hasBlockCompressedExtension(path)) { // Gzipped -- we need to buffer the GZIPInputStream methods as this class makes read() calls, // and seekableStream does not support single byte reads final InputStream is = new GZIPInputStream(new BufferedInputStream(inputStream, 512000)); From 326fc2309188157831fa9b71fdc05fdf309f2f34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Wed, 10 Aug 2016 12:05:34 +0200 Subject: [PATCH 4/8] isGZIPPath deprecation --- src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java index aaedb71e3..01ed64647 100644 --- a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java +++ b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java @@ -274,6 +274,7 @@ private void readHeader() throws IOException { } //Visible for testing + @Deprecated static boolean isGZIPPath(final String path) { if (path.toLowerCase().endsWith(".gz")) { return true; From bbbe1a00118f7ce246735b499c7030196014d9c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Wed, 10 Aug 2016 12:09:09 +0200 Subject: [PATCH 5/8] addedd test for new functionality --- .../htsjdk/tribble/TribbleIndexFeatureReaderTest.java | 3 ++- src/test/resources/htsjdk/tribble/test.vcf.bgz | Bin 0 -> 849 bytes 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 src/test/resources/htsjdk/tribble/test.vcf.bgz diff --git a/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java b/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java index 76bd41068..cb74baac0 100644 --- a/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java +++ b/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java @@ -64,7 +64,8 @@ public void testGZExtension(final String testString, final boolean expected) thr public Object[][] createFeatureFileStrings() { return new Object[][]{ {TestUtils.DATA_DIR + "test.vcf", 5}, - {TestUtils.DATA_DIR + "test.vcf.gz", 5} + {TestUtils.DATA_DIR + "test.vcf.gz", 5}, + {TestUtils.DATA_DIR + "test.vcf.bgz", 5} }; } diff --git a/src/test/resources/htsjdk/tribble/test.vcf.bgz b/src/test/resources/htsjdk/tribble/test.vcf.bgz new file mode 100644 index 0000000000000000000000000000000000000000..44072dc94d11a640ed3c171ebe74ed1b43a75b55 GIT binary patch literal 849 zcmV-X1FrlZiwFb&00000{{{d;LjnLa1C5hiZ{j!X-=_xDMfsidrPY5ebx(GPckzudQ* zp)|@T2#wJQF}tVBQ@vAuexB#m$w-ry#j-9BwamZMR#dA*)vB;(8o3w_hg!W3-*)v7 zBZQ`^D08KkX;zxw7mp8f$_I(mMq0qGEKGXv|Coo?7iOwBn4`~$OWByaP-?C6ZFcq_ zD2Mr;`3WX`w@*frE41?x>W%jtkCKG*t=f{kgt}6@RB8WiR_3LnWtFNz-$5gPmbQqq z|MROgt&UQs#lf#+ZBm)po%teqX^U8F;Np~TRqbOKpDX3h3!@IIhN05CI<1-r{JZXm zQzp(v=2fP&^H*w)j*U;=eSMD$m!HAKrIDEvs>&R1@% zPd4*&j}Y_<*qGY3-a|B_GPSLtgY|lNxFXQC2-ySdx0}V%$yIYoAJYmt+f$nD9Zbfz ztQq-V)=XBNXMY`I^s;^q;ubNDKapc|(T)B1LA?6REx4+kOFMna=kC&4$^E;l^iDr} ztAhN8-o~GODC2Rqet8DVmkpSQVEruu!BY%Ye+EyWOBpl+2AvmJ1ol6V@a0J^Px^%* zfL-b#P-~8uOPB|yAOK_xmceEN78{>V!ljShNf7yHz$W3tWJbU=7SojomtwX8=ob?1 zNMOM*WEkLXIl3-~Sh!R$F@S^C&7%~70f?;&1@_kb>w!BDguRdav9OF0WXLXY^Da1p z6riE~X4fqlW5mb2?ZictG3zjeG2q@?FQoNBoe!lHq5B!CQwpiH=}E1`%`ukC06T(6fKZQ&pKVXp85E^L?C z^7C|8ms%QZIghp{`x^rr#f{atahkq4SrBfEv@b+pPmpdD$cFU~v{81FZ3X}UABzYC b000000RIL6LPG)o8vp|U0000000000n9!Tm literal 0 HcmV?d00001 From 5dd44c85ec59a52824e9a79a0c16714fa86e2c7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Wed, 10 Aug 2016 12:50:14 +0200 Subject: [PATCH 6/8] fixing URL encoding --- src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java index 01ed64647..7783fa247 100644 --- a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java +++ b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java @@ -37,6 +37,7 @@ import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; +import java.net.URLEncoder; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -217,7 +218,7 @@ private void readHeader() throws IOException { PositionalBufferedStream pbs = null; try { is = ParsingUtils.openInputStream(path); - if (hasBlockCompressedExtension(new URI(path))) { + if (hasBlockCompressedExtension(new URI(URLEncoder.encode(path, "UTF-8")))) { // TODO -- warning I don't think this can work, the buffered input stream screws up position is = new GZIPInputStream(new BufferedInputStream(is)); } From a21f906f597102e53ab4766ad2364f876850e24e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Wed, 10 Aug 2016 13:59:52 +0200 Subject: [PATCH 7/8] added no-remote test file with spaces --- .../tribble/TribbleIndexFeatureReaderTest.java | 3 ++- .../resources/htsjdk/tribble/test with spaces.vcf | 24 ++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 src/test/resources/htsjdk/tribble/test with spaces.vcf diff --git a/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java b/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java index cb74baac0..afdd827e6 100644 --- a/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java +++ b/src/test/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java @@ -65,7 +65,8 @@ public void testGZExtension(final String testString, final boolean expected) thr return new Object[][]{ {TestUtils.DATA_DIR + "test.vcf", 5}, {TestUtils.DATA_DIR + "test.vcf.gz", 5}, - {TestUtils.DATA_DIR + "test.vcf.bgz", 5} + {TestUtils.DATA_DIR + "test.vcf.bgz", 5}, + {TestUtils.DATA_DIR + "test with spaces.vcf", 5} }; } diff --git a/src/test/resources/htsjdk/tribble/test with spaces.vcf b/src/test/resources/htsjdk/tribble/test with spaces.vcf new file mode 100644 index 000000000..27d45004c --- /dev/null +++ b/src/test/resources/htsjdk/tribble/test with spaces.vcf @@ -0,0 +1,24 @@ +##fileformat=VCFv4.1 +##fileDate=20090805 +##source=myImputationProgramV3.1 +##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta +##contig= +##phasing=partial +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 +20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. +20 17330 . T A 3 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3 +20 1110696 rs6040355 A G,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4 +20 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2 +20 1234567 microsat1 GTC G,GTCT 50 PASS NS=3;DP=9;AA=G GT:GQ:DP 0/1:35:4 0/2:17:2 1/1:40:3 From a4ea9aa5bdef6c30bf24a8776c7835757358836e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Wed, 10 Aug 2016 17:17:06 +0200 Subject: [PATCH 8/8] added comment for deprecated method --- src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java index 7783fa247..514782d1e 100644 --- a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java +++ b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java @@ -33,6 +33,7 @@ import htsjdk.tribble.util.ParsingUtils; import java.io.BufferedInputStream; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.net.URI; @@ -274,6 +275,9 @@ private void readHeader() throws IOException { return new WFIterator(); } + /** + * @deprecated use {@link #hasBlockCompressedExtension(String)} instead + */ //Visible for testing @Deprecated static boolean isGZIPPath(final String path) {