diff --git a/src/main/java/picard/sam/CreateSequenceDictionary.java b/src/main/java/picard/sam/CreateSequenceDictionary.java index 6aa587a79..90774a748 100644 --- a/src/main/java/picard/sam/CreateSequenceDictionary.java +++ b/src/main/java/picard/sam/CreateSequenceDictionary.java @@ -23,6 +23,7 @@ */ package picard.sam; +import com.google.common.annotations.VisibleForTesting; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceDictionaryCodec; import htsjdk.samtools.SAMSequenceRecord; @@ -32,6 +33,7 @@ import htsjdk.samtools.util.AsciiWriter; import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.IOUtil; +import htsjdk.samtools.util.Log; import htsjdk.samtools.util.Md5CalculatingOutputStream; import htsjdk.samtools.util.RuntimeIOException; import htsjdk.samtools.util.SortingCollection; @@ -50,6 +52,7 @@ import java.util.ArrayList; import java.util.HashSet; import java.util.List; +import java.util.Optional; import java.util.Set; /** @@ -79,11 +82,13 @@ "
"; // The following attributes define the command-line arguments + private static final Log logger = Log.getInstance(CreateSequenceDictionary.class); + @Option(doc = "Input reference fasta or fasta.gz", shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME) public File REFERENCE; - @Option(doc = "Output SAM or BAM file containing only the sequence dictionary", - shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME) + @Option(doc = "Output SAM file containing only the sequence dictionary. By default it will use the base name of the input reference with the .dict extension", + shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, optional = true) public File OUTPUT; @Option(doc = "Put into AS field of sequence dictionary entry if supplied", optional = true) @@ -146,9 +151,24 @@ public SAMSequenceDictionary makeSequenceDictionary(final File referenceFile) { if (URI == null) { URI = "file:" + REFERENCE.getAbsolutePath(); } + if (OUTPUT == null) { + // TODO: use the htsjdk method implemented in https://github.com/samtools/htsjdk/pull/774 + OUTPUT = getDefaultDictionaryForReferenceSequence(REFERENCE); + logger.info("Output dictionary will be written in ", OUTPUT); + } return null; } + // TODO: this method will be in htsjdk (https://github.com/samtools/htsjdk/pull/774) + @VisibleForTesting + static File getDefaultDictionaryForReferenceSequence(final File fastaFile) { + final String name = fastaFile.getName(); + final String extension = ReferenceSequenceFileFactory.FASTA_EXTENSIONS.stream().filter(name::endsWith).findFirst() + .orElseGet(() -> {throw new IllegalArgumentException("File is not a supported reference file type: " + fastaFile.getAbsolutePath());}); + final int extensionIndex = name.length() - extension.length(); + return new File(fastaFile.getParentFile(), name.substring(0, extensionIndex) + IOUtil.DICT_FILE_EXTENSION); + } + /** * Do the work after command line has been parsed. * RuntimeException may be thrown by this method, and are reported appropriately. diff --git a/src/test/java/picard/sam/CreateSequenceDictionaryTest.java b/src/test/java/picard/sam/CreateSequenceDictionaryTest.java index bb0821482..802714528 100644 --- a/src/test/java/picard/sam/CreateSequenceDictionaryTest.java +++ b/src/test/java/picard/sam/CreateSequenceDictionaryTest.java @@ -24,6 +24,7 @@ package picard.sam; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import picard.cmdline.CommandLineProgramTest; import picard.PicardException; @@ -47,6 +48,23 @@ public String getCommandLineProgramName() { return CreateSequenceDictionary.class.getSimpleName(); } + @DataProvider + public Object[][] fastaNames() { + return new Object[][] { + {"break.fa", "break.dict"}, + {"break.txt.txt", "break.txt.dict"}, + {"break.fasta.fasta", "break.fasta.dict"}, + {"break.fa.gz", "break.dict"}, + {"break.txt.gz.txt.gz", "break.txt.gz.dict"}, + {"break.fasta.gz.fasta.gz", "break.fasta.gz.dict"} + }; + } + + @Test(dataProvider = "fastaNames") + public void testGetDefaultDictionaryForReferenceSequence(final String fastaFile, final String expectedDict) throws Exception { + Assert.assertEquals(CreateSequenceDictionary.getDefaultDictionaryForReferenceSequence(new File(fastaFile)), new File(expectedDict)); + } + @Test public void testBasic() throws Exception { final File outputDict = File.createTempFile("CreateSequenceDictionaryTest.", ".dict"); @@ -61,6 +79,19 @@ public void testBasic() throws Exception { } @Test + public void testDefaultOutputFile() throws Exception { + final File expectedDict = new File(TEST_DATA_DIR + "/sam", "basic.dict"); + expectedDict.deleteOnExit(); + Assert.assertFalse(expectedDict.exists()); + final String[] argv = { + "REFERENCE=" + BASIC_FASTA, + "TRUNCATE_NAMES_AT_WHITESPACE=false" + }; + Assert.assertEquals(runPicardCommandLine(argv), 0); + Assert.assertTrue(expectedDict.exists()); + } + + @Test public void testForEquivalence() throws Exception { final File outputDict = File.createTempFile("CreateSequenceDictionaryTest.", ".dict"); outputDict.delete();