From 07f8467eb2f5a2baab1db7d5d01ed9007952c5a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Wed, 14 Dec 2016 16:07:53 +0100 Subject: [PATCH 1/3] added new methdos to ReferenceSequenceFileFactory --- .../reference/ReferenceSequenceFileFactory.java | 63 ++++++++++++++++------ .../ReferenceSequenceFileFactoryTests.java | 19 +++++++ 2 files changed, 66 insertions(+), 16 deletions(-) diff --git a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java index 5978072d7..8a67bccf5 100644 --- a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java +++ b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java @@ -24,6 +24,8 @@ package htsjdk.samtools.reference; +import htsjdk.samtools.util.IOUtil; + import java.io.File; import java.io.FileNotFoundException; import java.nio.file.Path; @@ -113,24 +115,53 @@ public static ReferenceSequenceFile getReferenceSequenceFile(final Path path, fi * @param preferIndexed if true attempt to return an indexed reader that supports non-linear traversal, else return the non-indexed reader */ public static ReferenceSequenceFile getReferenceSequenceFile(final Path path, final boolean truncateNamesAtWhitespace, final boolean preferIndexed) { - final String name = path.getFileName().toString(); - for (final String ext : FASTA_EXTENSIONS) { - if (name.endsWith(ext)) { - // Using faidx requires truncateNamesAtWhitespace - if (truncateNamesAtWhitespace && preferIndexed && IndexedFastaSequenceFile.canCreateIndexedFastaReader(path)) { - try { - return new IndexedFastaSequenceFile(path); - } - catch (final FileNotFoundException e) { - throw new IllegalStateException("Should never happen, because existence of files has been checked.", e); - } - } - else { - return new FastaSequenceFile(path, truncateNamesAtWhitespace); - } + // this should thrown an exception if the fasta file is not supported + getFastaExtension(path); + // Using faidx requires truncateNamesAtWhitespace + if (truncateNamesAtWhitespace && preferIndexed && IndexedFastaSequenceFile.canCreateIndexedFastaReader(path)) { + try { + return new IndexedFastaSequenceFile(path); + } + catch (final FileNotFoundException e) { + throw new IllegalStateException("Should never happen, because existence of files has been checked.", e); } } + else { + return new FastaSequenceFile(path, truncateNamesAtWhitespace); + } + } + + /** + * Returns the default dictionary name for a FASTA file. + * + * @param file the reference sequence file on disk. + */ + public static File getDefaultDictionaryForReferenceSequence(final File file) { + return getDefaultDictionaryForReferenceSequence(file.toPath()).toFile(); + } + + /** + * Returns the default dictionary name for a FASTA file. + * + * @param path the reference sequence file path. + */ + public static Path getDefaultDictionaryForReferenceSequence(final Path path) { + final String name = path.getFileName().toString(); + final int extensionIndex = name.length() - getFastaExtension(path).length(); + return path.resolveSibling(name.substring(0, extensionIndex) + IOUtil.DICT_FILE_EXTENSION); + } - throw new IllegalArgumentException("File is not a supported reference file type: " + path.toAbsolutePath()); + /** + * Returns the FASTA extension for the path. + * + * @param path the reference sequence file path. + * + * @throws IllegalArgumentException if the file is not a supported reference file. + */ + public static String getFastaExtension(final Path path) { + final String name = path.getFileName().toString(); + return FASTA_EXTENSIONS.stream().filter(name::endsWith).findFirst() + .orElseGet(() -> {throw new IllegalArgumentException("File is not a supported reference file type: " + path.toAbsolutePath());}); } + } diff --git a/src/test/java/htsjdk/samtools/reference/ReferenceSequenceFileFactoryTests.java b/src/test/java/htsjdk/samtools/reference/ReferenceSequenceFileFactoryTests.java index 6eeae7b99..ac6f07200 100644 --- a/src/test/java/htsjdk/samtools/reference/ReferenceSequenceFileFactoryTests.java +++ b/src/test/java/htsjdk/samtools/reference/ReferenceSequenceFileFactoryTests.java @@ -1,6 +1,7 @@ package htsjdk.samtools.reference; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; @@ -36,4 +37,22 @@ Assert.assertTrue(f instanceof IndexedFastaSequenceFile, "Got non-indexed reader by default."); } + + @DataProvider + public Object[][] fastaNames() { + return new Object[][] { + {"break.fa", "break.dict"}, + {"break.txt.txt", "break.txt.dict"}, + {"break.fasta.fasta", "break.fasta.dict"}, + {"break.fa.gz", "break.dict"}, + {"break.txt.gz.txt.gz", "break.txt.gz.dict"}, + {"break.fasta.gz.fasta.gz", "break.fasta.gz.dict"} + }; + } + + @Test(dataProvider = "fastaNames") + public void testGetDefaultDictionaryForReferenceSequence(final String fastaFile, final String expectedDict) throws Exception { + Assert.assertEquals(ReferenceSequenceFileFactory.getDefaultDictionaryForReferenceSequence(new File(fastaFile)), new File(expectedDict)); + } + } From 56f62438d287f71c07e86d96f0510d7d5ec51e8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Wed, 14 Dec 2016 16:12:38 +0100 Subject: [PATCH 2/3] using the methods to simplify code --- .../reference/AbstractFastaSequenceFile.java | 27 ++++++---------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/src/main/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java b/src/main/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java index 86f11fead..badcf1987 100644 --- a/src/main/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java +++ b/src/main/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java @@ -93,29 +93,16 @@ protected static Path findSequenceDictionary(final Path path) { if (path == null) { return null; } - // Try and locate the dictionary - Path dictionary = path.toAbsolutePath(); - Path dictionaryExt = path.toAbsolutePath(); - boolean fileTypeSupported = false; - for (final String extension : ReferenceSequenceFileFactory.FASTA_EXTENSIONS) { - String filename = dictionary.getFileName().toString(); - if (filename.endsWith(extension)) { - dictionaryExt = dictionary.resolveSibling(filename + IOUtil - .DICT_FILE_EXTENSION); - String filenameNoExt = filename.substring(0, filename.lastIndexOf(extension)); - dictionary = dictionary.resolveSibling(filenameNoExt+ IOUtil.DICT_FILE_EXTENSION); - fileTypeSupported = true; - break; - } - } - if (!fileTypeSupported) - throw new IllegalArgumentException("File is not a supported reference file type: " + path.toAbsolutePath()); - - if (Files.exists(dictionary)) + // Try and locate the dictionary with the default method + final Path dictionary = ReferenceSequenceFileFactory.getDefaultDictionaryForReferenceSequence(path); path.toAbsolutePath(); + if (Files.exists(dictionary)) { return dictionary; + } // try without removing the file extension - if (Files.exists(dictionaryExt)) + final Path dictionaryExt = path.resolveSibling(path.getFileName().toString() + IOUtil.DICT_FILE_EXTENSION); + if (Files.exists(dictionaryExt)) { return dictionaryExt; + } else return null; } From 3f30e346a2c6866187e8983b6b41c5fca01e987b Mon Sep 17 00:00:00 2001 From: magicDGS Date: Tue, 27 Dec 2016 12:04:24 +0100 Subject: [PATCH 3/3] address comments --- .../java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java | 3 +-- .../htsjdk/samtools/reference/ReferenceSequenceFileFactoryTests.java | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java index 8a67bccf5..2b0b1e7fc 100644 --- a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java +++ b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java @@ -125,8 +125,7 @@ public static ReferenceSequenceFile getReferenceSequenceFile(final Path path, fi catch (final FileNotFoundException e) { throw new IllegalStateException("Should never happen, because existence of files has been checked.", e); } - } - else { + } else { return new FastaSequenceFile(path, truncateNamesAtWhitespace); } } diff --git a/src/test/java/htsjdk/samtools/reference/ReferenceSequenceFileFactoryTests.java b/src/test/java/htsjdk/samtools/reference/ReferenceSequenceFileFactoryTests.java index ac6f07200..9ac2ea343 100644 --- a/src/test/java/htsjdk/samtools/reference/ReferenceSequenceFileFactoryTests.java +++ b/src/test/java/htsjdk/samtools/reference/ReferenceSequenceFileFactoryTests.java @@ -54,5 +54,4 @@ public void testGetDefaultDictionaryForReferenceSequence(final String fastaFile, final String expectedDict) throws Exception { Assert.assertEquals(ReferenceSequenceFileFactory.getDefaultDictionaryForReferenceSequence(new File(fastaFile)), new File(expectedDict)); } - }