From 34d67f9053315ecd56b25d253957bef17e6c0868 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Wed, 14 Dec 2016 14:28:51 +0100 Subject: [PATCH 1/3] default output for CreateSequenceDictionary --- src/main/java/picard/sam/CreateSequenceDictionary.java | 12 ++++++++++-- src/test/java/picard/sam/CreateSequenceDictionaryTest.java | 13 +++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/main/java/picard/sam/CreateSequenceDictionary.java b/src/main/java/picard/sam/CreateSequenceDictionary.java index 6aa587a79..484ce112e 100644 --- a/src/main/java/picard/sam/CreateSequenceDictionary.java +++ b/src/main/java/picard/sam/CreateSequenceDictionary.java @@ -82,8 +82,8 @@ @Option(doc = "Input reference fasta or fasta.gz", shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME) public File REFERENCE; - @Option(doc = "Output SAM or BAM file containing only the sequence dictionary", - shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME) + @Option(doc = "Output SAM file containing only the sequence dictionary. By default it will use the base name of the input reference with the .dict extension", + shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, optional = true) public File OUTPUT; @Option(doc = "Put into AS field of sequence dictionary entry if supplied", optional = true) @@ -146,6 +146,14 @@ public SAMSequenceDictionary makeSequenceDictionary(final File referenceFile) { if (URI == null) { URI = "file:" + REFERENCE.getAbsolutePath(); } + if (OUTPUT == null) { + // determine the name for the dict file in the same way as CachingIndexedFastaSequenceFile.checkAndCreate + final String name = REFERENCE.getName(); + final String fastaExt = ReferenceSequenceFileFactory.FASTA_EXTENSIONS.stream() + .filter(name::endsWith).findFirst().orElseGet(() -> ""); + OUTPUT = new File(REFERENCE.getParentFile(), + REFERENCE.getName().replace(fastaExt, IOUtil.DICT_FILE_EXTENSION)); + } return null; } diff --git a/src/test/java/picard/sam/CreateSequenceDictionaryTest.java b/src/test/java/picard/sam/CreateSequenceDictionaryTest.java index bb0821482..e371434f4 100644 --- a/src/test/java/picard/sam/CreateSequenceDictionaryTest.java +++ b/src/test/java/picard/sam/CreateSequenceDictionaryTest.java @@ -61,6 +61,19 @@ public void testBasic() throws Exception { } @Test + public void testDefaultOutputFile() throws Exception { + final File expectedDict = new File(TEST_DATA_DIR + "/sam", "basic.dict"); + expectedDict.deleteOnExit(); + Assert.assertFalse(expectedDict.exists()); + final String[] argv = { + "REFERENCE=" + BASIC_FASTA, + "TRUNCATE_NAMES_AT_WHITESPACE=false" + }; + Assert.assertEquals(runPicardCommandLine(argv), 0); + Assert.assertTrue(expectedDict.exists()); + } + + @Test public void testForEquivalence() throws Exception { final File outputDict = File.createTempFile("CreateSequenceDictionaryTest.", ".dict"); outputDict.delete(); From d84aaec4a9aec238a65d19a7101c08380f522c7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Wed, 14 Dec 2016 15:46:38 +0100 Subject: [PATCH 2/3] addressing comments --- .../java/picard/sam/CreateSequenceDictionary.java | 26 +++++++++++++++++----- .../picard/sam/CreateSequenceDictionaryTest.java | 18 +++++++++++++++ 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/src/main/java/picard/sam/CreateSequenceDictionary.java b/src/main/java/picard/sam/CreateSequenceDictionary.java index 484ce112e..0602887bc 100644 --- a/src/main/java/picard/sam/CreateSequenceDictionary.java +++ b/src/main/java/picard/sam/CreateSequenceDictionary.java @@ -23,6 +23,7 @@ */ package picard.sam; +import com.google.common.annotations.VisibleForTesting; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceDictionaryCodec; import htsjdk.samtools.SAMSequenceRecord; @@ -32,6 +33,7 @@ import htsjdk.samtools.util.AsciiWriter; import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.IOUtil; +import htsjdk.samtools.util.Log; import htsjdk.samtools.util.Md5CalculatingOutputStream; import htsjdk.samtools.util.RuntimeIOException; import htsjdk.samtools.util.SortingCollection; @@ -50,6 +52,7 @@ import java.util.ArrayList; import java.util.HashSet; import java.util.List; +import java.util.Optional; import java.util.Set; /** @@ -79,6 +82,8 @@ "
"; // The following attributes define the command-line arguments + private static final Log logger = Log.getInstance(CreateSequenceDictionary.class); + @Option(doc = "Input reference fasta or fasta.gz", shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME) public File REFERENCE; @@ -147,16 +152,25 @@ public SAMSequenceDictionary makeSequenceDictionary(final File referenceFile) { URI = "file:" + REFERENCE.getAbsolutePath(); } if (OUTPUT == null) { - // determine the name for the dict file in the same way as CachingIndexedFastaSequenceFile.checkAndCreate - final String name = REFERENCE.getName(); - final String fastaExt = ReferenceSequenceFileFactory.FASTA_EXTENSIONS.stream() - .filter(name::endsWith).findFirst().orElseGet(() -> ""); - OUTPUT = new File(REFERENCE.getParentFile(), - REFERENCE.getName().replace(fastaExt, IOUtil.DICT_FILE_EXTENSION)); + OUTPUT = getDefaultDictionaryNameForFasta(REFERENCE); + logger.info("Output dictionary will be written in ", OUTPUT); } return null; } + // TODO: move this method to ReferenceSequenceFileFactory + @VisibleForTesting + static File getDefaultDictionaryNameForFasta(final File fastaFile) { + final String name = fastaFile.getName(); + final Optional extension = ReferenceSequenceFileFactory.FASTA_EXTENSIONS + .stream().filter(name::endsWith).findFirst(); + if (!extension.isPresent()) { + throw new IllegalArgumentException("File is not a supported reference file type: " + fastaFile.getAbsolutePath()); + } + final int extensionIndex = name.length() - extension.get().length(); + return new File(fastaFile.getParentFile(), name.substring(0, extensionIndex) + IOUtil.DICT_FILE_EXTENSION); + } + /** * Do the work after command line has been parsed. * RuntimeException may be thrown by this method, and are reported appropriately. diff --git a/src/test/java/picard/sam/CreateSequenceDictionaryTest.java b/src/test/java/picard/sam/CreateSequenceDictionaryTest.java index e371434f4..4ddfaa670 100644 --- a/src/test/java/picard/sam/CreateSequenceDictionaryTest.java +++ b/src/test/java/picard/sam/CreateSequenceDictionaryTest.java @@ -24,6 +24,7 @@ package picard.sam; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import picard.cmdline.CommandLineProgramTest; import picard.PicardException; @@ -47,6 +48,23 @@ public String getCommandLineProgramName() { return CreateSequenceDictionary.class.getSimpleName(); } + @DataProvider + public Object[][] fastaNames() { + return new Object[][] { + {"break.fa", "break.dict"}, + {"break.txt.txt", "break.txt.dict"}, + {"break.fasta.fasta", "break.fasta.dict"}, + {"break.fa.gz", "break.dict"}, + {"break.txt.gz.txt.gz", "break.txt.gz.dict"}, + {"break.fasta.gz.fasta.gz", "break.fasta.gz.dict"} + }; + } + + @Test(dataProvider = "fastaNames") + public void testGetDictionaryNameForFasta(final String fastaFile, final String expectedDict) throws Exception { + Assert.assertEquals(CreateSequenceDictionary.getDefaultDictionaryNameForFasta(new File(fastaFile)), new File(expectedDict)); + } + @Test public void testBasic() throws Exception { final File outputDict = File.createTempFile("CreateSequenceDictionaryTest.", ".dict"); From bdb4cd86f22c9c8dbbd9de7fdc7bf303e9a2c943 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Wed, 14 Dec 2016 16:23:25 +0100 Subject: [PATCH 3/3] changes from the HTSJDK patch --- src/main/java/picard/sam/CreateSequenceDictionary.java | 16 +++++++--------- .../java/picard/sam/CreateSequenceDictionaryTest.java | 4 ++-- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/main/java/picard/sam/CreateSequenceDictionary.java b/src/main/java/picard/sam/CreateSequenceDictionary.java index 0602887bc..90774a748 100644 --- a/src/main/java/picard/sam/CreateSequenceDictionary.java +++ b/src/main/java/picard/sam/CreateSequenceDictionary.java @@ -152,22 +152,20 @@ public SAMSequenceDictionary makeSequenceDictionary(final File referenceFile) { URI = "file:" + REFERENCE.getAbsolutePath(); } if (OUTPUT == null) { - OUTPUT = getDefaultDictionaryNameForFasta(REFERENCE); + // TODO: use the htsjdk method implemented in https://github.com/samtools/htsjdk/pull/774 + OUTPUT = getDefaultDictionaryForReferenceSequence(REFERENCE); logger.info("Output dictionary will be written in ", OUTPUT); } return null; } - // TODO: move this method to ReferenceSequenceFileFactory + // TODO: this method will be in htsjdk (https://github.com/samtools/htsjdk/pull/774) @VisibleForTesting - static File getDefaultDictionaryNameForFasta(final File fastaFile) { + static File getDefaultDictionaryForReferenceSequence(final File fastaFile) { final String name = fastaFile.getName(); - final Optional extension = ReferenceSequenceFileFactory.FASTA_EXTENSIONS - .stream().filter(name::endsWith).findFirst(); - if (!extension.isPresent()) { - throw new IllegalArgumentException("File is not a supported reference file type: " + fastaFile.getAbsolutePath()); - } - final int extensionIndex = name.length() - extension.get().length(); + final String extension = ReferenceSequenceFileFactory.FASTA_EXTENSIONS.stream().filter(name::endsWith).findFirst() + .orElseGet(() -> {throw new IllegalArgumentException("File is not a supported reference file type: " + fastaFile.getAbsolutePath());}); + final int extensionIndex = name.length() - extension.length(); return new File(fastaFile.getParentFile(), name.substring(0, extensionIndex) + IOUtil.DICT_FILE_EXTENSION); } diff --git a/src/test/java/picard/sam/CreateSequenceDictionaryTest.java b/src/test/java/picard/sam/CreateSequenceDictionaryTest.java index 4ddfaa670..802714528 100644 --- a/src/test/java/picard/sam/CreateSequenceDictionaryTest.java +++ b/src/test/java/picard/sam/CreateSequenceDictionaryTest.java @@ -61,8 +61,8 @@ public String getCommandLineProgramName() { } @Test(dataProvider = "fastaNames") - public void testGetDictionaryNameForFasta(final String fastaFile, final String expectedDict) throws Exception { - Assert.assertEquals(CreateSequenceDictionary.getDefaultDictionaryNameForFasta(new File(fastaFile)), new File(expectedDict)); + public void testGetDefaultDictionaryForReferenceSequence(final String fastaFile, final String expectedDict) throws Exception { + Assert.assertEquals(CreateSequenceDictionary.getDefaultDictionaryForReferenceSequence(new File(fastaFile)), new File(expectedDict)); } @Test