diff --git a/src/main/java/picard/sam/CreateSequenceDictionary.java b/src/main/java/picard/sam/CreateSequenceDictionary.java
index 6aa587a79..90774a748 100644
--- a/src/main/java/picard/sam/CreateSequenceDictionary.java
+++ b/src/main/java/picard/sam/CreateSequenceDictionary.java
@@ -23,6 +23,7 @@
*/
package picard.sam;
+import com.google.common.annotations.VisibleForTesting;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceDictionaryCodec;
import htsjdk.samtools.SAMSequenceRecord;
@@ -32,6 +33,7 @@
import htsjdk.samtools.util.AsciiWriter;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.Md5CalculatingOutputStream;
import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.samtools.util.SortingCollection;
@@ -50,6 +52,7 @@
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
+import java.util.Optional;
import java.util.Set;
/**
@@ -79,11 +82,13 @@
"
";
// The following attributes define the command-line arguments
+ private static final Log logger = Log.getInstance(CreateSequenceDictionary.class);
+
@Option(doc = "Input reference fasta or fasta.gz", shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME)
public File REFERENCE;
- @Option(doc = "Output SAM or BAM file containing only the sequence dictionary",
- shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME)
+ @Option(doc = "Output SAM file containing only the sequence dictionary. By default it will use the base name of the input reference with the .dict extension",
+ shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, optional = true)
public File OUTPUT;
@Option(doc = "Put into AS field of sequence dictionary entry if supplied", optional = true)
@@ -146,9 +151,24 @@ public SAMSequenceDictionary makeSequenceDictionary(final File referenceFile) {
if (URI == null) {
URI = "file:" + REFERENCE.getAbsolutePath();
}
+ if (OUTPUT == null) {
+ // TODO: use the htsjdk method implemented in https://github.com/samtools/htsjdk/pull/774
+ OUTPUT = getDefaultDictionaryForReferenceSequence(REFERENCE);
+ logger.info("Output dictionary will be written in ", OUTPUT);
+ }
return null;
}
+ // TODO: this method will be in htsjdk (https://github.com/samtools/htsjdk/pull/774)
+ @VisibleForTesting
+ static File getDefaultDictionaryForReferenceSequence(final File fastaFile) {
+ final String name = fastaFile.getName();
+ final String extension = ReferenceSequenceFileFactory.FASTA_EXTENSIONS.stream().filter(name::endsWith).findFirst()
+ .orElseGet(() -> {throw new IllegalArgumentException("File is not a supported reference file type: " + fastaFile.getAbsolutePath());});
+ final int extensionIndex = name.length() - extension.length();
+ return new File(fastaFile.getParentFile(), name.substring(0, extensionIndex) + IOUtil.DICT_FILE_EXTENSION);
+ }
+
/**
* Do the work after command line has been parsed.
* RuntimeException may be thrown by this method, and are reported appropriately.
diff --git a/src/test/java/picard/sam/CreateSequenceDictionaryTest.java b/src/test/java/picard/sam/CreateSequenceDictionaryTest.java
index bb0821482..802714528 100644
--- a/src/test/java/picard/sam/CreateSequenceDictionaryTest.java
+++ b/src/test/java/picard/sam/CreateSequenceDictionaryTest.java
@@ -24,6 +24,7 @@
package picard.sam;
import org.testng.Assert;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import picard.cmdline.CommandLineProgramTest;
import picard.PicardException;
@@ -47,6 +48,23 @@ public String getCommandLineProgramName() {
return CreateSequenceDictionary.class.getSimpleName();
}
+ @DataProvider
+ public Object[][] fastaNames() {
+ return new Object[][] {
+ {"break.fa", "break.dict"},
+ {"break.txt.txt", "break.txt.dict"},
+ {"break.fasta.fasta", "break.fasta.dict"},
+ {"break.fa.gz", "break.dict"},
+ {"break.txt.gz.txt.gz", "break.txt.gz.dict"},
+ {"break.fasta.gz.fasta.gz", "break.fasta.gz.dict"}
+ };
+ }
+
+ @Test(dataProvider = "fastaNames")
+ public void testGetDefaultDictionaryForReferenceSequence(final String fastaFile, final String expectedDict) throws Exception {
+ Assert.assertEquals(CreateSequenceDictionary.getDefaultDictionaryForReferenceSequence(new File(fastaFile)), new File(expectedDict));
+ }
+
@Test
public void testBasic() throws Exception {
final File outputDict = File.createTempFile("CreateSequenceDictionaryTest.", ".dict");
@@ -61,6 +79,19 @@ public void testBasic() throws Exception {
}
@Test
+ public void testDefaultOutputFile() throws Exception {
+ final File expectedDict = new File(TEST_DATA_DIR + "/sam", "basic.dict");
+ expectedDict.deleteOnExit();
+ Assert.assertFalse(expectedDict.exists());
+ final String[] argv = {
+ "REFERENCE=" + BASIC_FASTA,
+ "TRUNCATE_NAMES_AT_WHITESPACE=false"
+ };
+ Assert.assertEquals(runPicardCommandLine(argv), 0);
+ Assert.assertTrue(expectedDict.exists());
+ }
+
+ @Test
public void testForEquivalence() throws Exception {
final File outputDict = File.createTempFile("CreateSequenceDictionaryTest.", ".dict");
outputDict.delete();