it = reader.iterator(); it.hasNext(); ) {
@@ -127,8 +128,9 @@ protected int doWork() {
libraryToWriter.get(rg.getLibrary()).addAlignment(sam);
} else {
if (unknown == null) {
- unknown = factory.makeSAMOrBAMWriter(unknownHeader, true,
- new File(OUTPUT, "unknown" + extension));
+ unknown = factory.makeWriter(unknownHeader, true,
+ new File(OUTPUT, "unknown" + extension),
+ REFERENCE_SEQUENCE);
}
unknown.addAlignment(sam);
}
diff --git a/src/main/java/picard/sam/markduplicates/MarkDuplicates.java b/src/main/java/picard/sam/markduplicates/MarkDuplicates.java
index 0141c4bdc..cf267c2e8 100644
--- a/src/main/java/picard/sam/markduplicates/MarkDuplicates.java
+++ b/src/main/java/picard/sam/markduplicates/MarkDuplicates.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2009 The Broad Institute
+ * Copyright (c) 2009-2016 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -64,7 +64,7 @@
)
public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
static final String USAGE_SUMMARY = "Identifies duplicate reads. ";
- static final String USAGE_DETAILS = "This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are " +
+ static final String USAGE_DETAILS = "
This tool locates and tags duplicate reads in a SAM, BAM or CRAM file, where duplicate reads are " +
"defined as originating from a single fragment of DNA. Duplicates can arise during sample preparation e.g. library " +
"construction using PCR. See also " +
"EstimateLibraryComplexity" +
@@ -77,16 +77,16 @@
" collected, the tool differentiates the primary and duplicate reads using an algorithm that ranks reads by the sums " +
"of their base-quality scores (default method).
" +
- "The tool's main output is a new SAM or BAM file, in which duplicates have been identified in the SAM flags field for each" +
+ "
The tool's main output is a new SAM, BAM or CRAM file, in which duplicates have been identified in the SAM flags field for each" +
" read. Duplicates are marked with the hexadecimal value of 0x0400, which corresponds to a decimal value of 1024. " +
"If you are not familiar with this type of annotation, please see the following " +
"blog post for additional information.
" +
"" +
"Although the bitwise flag annotation indicates whether a read was marked as a duplicate, it does not identify the type of " +
"duplicate. To do this, a new tag called the duplicate type (DT) tag was recently added as an optional output in " +
- "the 'optional field' section of a SAM/BAM file. Invoking the TAGGING_POLICY option," +
+ "the 'optional field' section of a SAM/BAM/CRAM file. Invoking the TAGGING_POLICY option," +
" you can instruct the program to mark all the duplicates (All), only the optical duplicates (OpticalOnly), or no " +
- "duplicates (DontTag). The records within the output of a SAM/BAM file will have values for the 'DT' tag (depending on the invoked " +
+ "duplicates (DontTag). The records within the output of a SAM/BAM/CRAM file will have values for the 'DT' tag (depending on the invoked " +
"TAGGING_POLICY), as either library/PCR-generated duplicates (LB), or sequencing-platform artifact duplicates (SQ). " +
"This tool uses the READ_NAME_REGEX and the OPTICAL_DUPLICATE_PIXEL_DISTANCE options as the primary methods to identify " +
"and differentiate duplicate types. Set READ_NAME_REGEX to null to skip optical duplicate detection, e.g. for RNA-seq " +
@@ -119,7 +119,7 @@
/** Enum used to control how duplicates are flagged in the DT optional tag on each read. */
public enum DuplicateTaggingPolicy { DontTag, OpticalOnly, All }
- /** The optional attribute in SAM/BAM files used to store the duplicate type. */
+ /** The optional attribute in SAM/BAM/CRAM files used to store the duplicate type. */
public static final String DUPLICATE_TYPE_TAG = "DT";
/** The duplicate type tag value for duplicate type: library. */
public static final String DUPLICATE_TYPE_LIBRARY = "LB";
@@ -205,7 +205,7 @@ public static void main(final String[] args) {
}
/**
- * Main work method. Reads the BAM file once and collects sorted information about
+ * Main work method. Reads the SAM file once and collects sorted information about
* the 5' ends of both ends of each read (or just one end in the case of pairs).
* Then makes a pass through those determining duplicates before re-reading the
* input file and writing it out with duplication flags set correctly.
@@ -250,9 +250,10 @@ protected int doWork() {
// Key: previous PG ID on a SAM Record (or null). Value: New PG ID to replace it.
final Map chainedPgIds = getChainedPgIds(outputHeader);
- final SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(outputHeader,
+ final SAMFileWriter out = new SAMFileWriterFactory().makeWriter(outputHeader,
true,
- OUTPUT);
+ OUTPUT,
+ REFERENCE_SEQUENCE);
// Now copy over the file while marking all the necessary indexes as duplicates
long recordInFileIndex = 0;
diff --git a/src/main/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigar.java b/src/main/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigar.java
index fb9aff26a..564cb18a3 100644
--- a/src/main/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigar.java
+++ b/src/main/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigar.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2014 The Broad Institute
+ * Copyright (c) 2014-2016 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -64,7 +64,7 @@
)
public class MarkDuplicatesWithMateCigar extends AbstractMarkDuplicatesCommandLineProgram {
static final String USAGE_SUMMARY = "Identifies duplicate reads, accounting for mate CIGAR. ";
- static final String USAGE_DETAILS = "This tool locates and tags duplicate reads (both PCR and optical) in a BAM or SAM file, where " +
+ static final String USAGE_DETAILS = "This tool locates and tags duplicate reads (both PCR and optical) in a BAM, SAM or CRAM file, where " +
"duplicate reads are defined as originating from the same original fragment of DNA, taking into account the CIGAR string of " +
"read mates.
" +
"" +
@@ -136,9 +136,10 @@ protected int doWork() {
final Map chainedPgIds = getChainedPgIds(outputHeader);
// Open the output
- final SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(outputHeader,
+ final SAMFileWriter out = new SAMFileWriterFactory().makeWriter(outputHeader,
true,
- OUTPUT);
+ OUTPUT,
+ REFERENCE_SEQUENCE);
// Create the mark duplicate iterator. The duplicate marking is handled by the iterator, conveniently.
final MarkDuplicatesWithMateCigarIterator iterator = new MarkDuplicatesWithMateCigarIterator(headerAndIterator.header,
diff --git a/src/main/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigar.java b/src/main/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigar.java
index e8fe4d064..baac38a30 100644
--- a/src/main/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigar.java
+++ b/src/main/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigar.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2015 The Broad Institute
+ * Copyright (c) 2015-2016 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -62,9 +62,9 @@
* @author nhomer
*/
@CommandLineProgramProperties(
- usage = "Examines aligned records in the supplied SAM or BAM file to locate duplicate molecules. " +
+ usage = "Examines aligned records in the supplied SAM/BAM/CRAM file to locate duplicate molecules. " +
"All records are then written to the output file with the duplicate records flagged.",
- usageShort = "Examines aligned records in the supplied SAM or BAM file to locate duplicate molecules.",
+ usageShort = "Examines aligned records in the supplied SAM/BAM/CRAM file to locate duplicate molecules.",
programGroup = Testing.class
)
public class SimpleMarkDuplicatesWithMateCigar extends MarkDuplicates {
@@ -110,9 +110,10 @@ protected int doWork() {
final Map chainedPgIds = getChainedPgIds(outputHeader);
// Open the output
- final SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(outputHeader,
+ final SAMFileWriter out = new SAMFileWriterFactory().makeWriter(outputHeader,
false,
- OUTPUT);
+ OUTPUT,
+ REFERENCE_SEQUENCE);
final SAMRecordDuplicateComparator comparator = new SAMRecordDuplicateComparator(Collections.singletonList(headerAndIterator.header));
comparator.setScoringStrategy(this.DUPLICATE_SCORING_STRATEGY);
diff --git a/src/main/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java b/src/main/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java
index 5bf6972f4..3af3267dd 100644
--- a/src/main/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java
+++ b/src/main/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java
@@ -36,7 +36,6 @@
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.Histogram;
-import htsjdk.samtools.util.Log;
import picard.PicardException;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
@@ -52,14 +51,14 @@
/**
* Abstract class that holds parameters and methods common to classes that perform duplicate
- * detection and/or marking within SAM/BAM files.
+ * detection and/or marking within SAM/BAM/CRAM files.
*
* @author Nils Homer
*/
public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractOpticalDuplicateFinderCommandLineProgram {
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME,
- doc = "One or more input SAM or BAM files to analyze. Must be coordinate sorted.")
+ doc = "One or more input SAM, BAM or CRAM files to analyze. Must be coordinate sorted.")
public List INPUT;
@Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME,
@@ -209,6 +208,7 @@ protected SamHeaderAndIterator openInputs() {
for (final String input : INPUT) {
SamReader reader = SamReaderFactory.makeDefault()
.enable(SamReaderFactory.Option.EAGERLY_DECODE)
+ .referenceSequence(REFERENCE_SEQUENCE)
.open(SamInputResource.of(input));
final SAMFileHeader header = reader.getFileHeader();
diff --git a/src/test/java/picard/sam/CramCompatibilityTest.java b/src/test/java/picard/sam/CramCompatibilityTest.java
new file mode 100644
index 000000000..930b84a12
--- /dev/null
+++ b/src/test/java/picard/sam/CramCompatibilityTest.java
@@ -0,0 +1,275 @@
+package picard.sam;
+
+import htsjdk.samtools.SamStreams;
+import htsjdk.samtools.cram.CRAMException;
+import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.TestUtil;
+import org.testng.Assert;
+import org.testng.annotations.AfterTest;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+import picard.cmdline.CommandLineProgram;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+
+public class CramCompatibilityTest {
+
+ public static final String CRAM_FILE = "testdata/picard/sam/test_cram_file_coordinate_sorted.cram";
+ public static final String CRAM_FILE_2 = "testdata/picard/sam/test_cram_file_header_only.cram";
+ public static final String CRAM_FILE_ONE_PAIR_MC = "testdata/picard/sam/MarkDuplicates/one_pair_mc.cram";
+
+ public static final String CRAM_FILE_QUERY_SORTED_UNMAPPED = "testdata/picard/sam/unmapped_queryname_sorted.cram";
+ public static final String CRAM_FILE_QUERY_SORTED = "testdata/picard/sam/test_cram_file_query_sorted.cram";
+
+ public static final String REFERENCE_FILE = "testdata/picard/sam/test_cram_file.ref.fa";
+ public static final String FASTQ_FILE = "testdata/picard/sam/fastq2bam/fastq-sanger/5k-v1-Rhodobacter_LW1.sam.fastq";
+
+ public static final String CRAM_UNMAPPED = "testdata/picard/sam/SamFileConverterTest/unmapped.cram";
+ public static final String CRAM_UNMAPPED_WITH_OQ_TAG = "testdata/picard/sam/unmapped_with_oq_tag.cram";
+
+ public static final String CRAM_UNMAPPED_PART_1 = "testdata/picard/sam/unmapped_part_1.cram";
+ public static final String CRAM_UNMAPPED_PART_2 = "testdata/picard/sam/unmapped_part_2.cram";
+
+ public static final String CRAM_SPLIT_UNMAPPED = "testdata/picard/sam/split_test_unmapped.cram";
+
+ public static final String MBA_ALIGNED_CRAM = "testdata/picard/sam/MergeBamAlignment/cliptest.aligned.cram";
+ public static final String MBA_UNMAPPED_CRAM = "testdata/picard/sam/MergeBamAlignment/cliptest.unmapped.cram";
+ public static final String MBA_REFERENCE = "testdata/picard/sam/MergeBamAlignment/cliptest.fasta";
+
+ private static final File outputDir = IOUtil.createTempDir("testdata/picard/sam/CramCompatibilityTest", ".tmp");
+
+ @AfterTest
+ public void tearDown() {
+ TestUtil.recursiveDelete(outputDir);
+ }
+
+ @DataProvider(name = "programArgsForCRAMWithReference")
+ public Object[][] getArgsForCRAMWithReference() {
+ return new Object[][] {
+ {"picard.sam.AddOrReplaceReadGroups",
+ "RGID=4 RGLB=lib1 RGPL=illumina RGPU=unit1 RGSM=20",
+ CRAM_FILE,
+ REFERENCE_FILE
+ },
+ {"picard.sam.CleanSam", null, CRAM_FILE, REFERENCE_FILE},
+ {"picard.sam.DownsampleSam", null, CRAM_FILE, REFERENCE_FILE},
+ {"picard.sam.FixMateInformation", null, CRAM_FILE, REFERENCE_FILE},
+ {"picard.sam.markduplicates.MarkDuplicates",
+ "M=" + createTempFile("MarkDuplicates", ".dir").getAbsolutePath(),
+ CRAM_FILE,
+ REFERENCE_FILE
+ },
+ {"picard.sam.MergeSamFiles", null, CRAM_FILE, REFERENCE_FILE},
+ {"picard.sam.PositionBasedDownsampleSam", "FRACTION=0.5", CRAM_FILE, REFERENCE_FILE},
+ {"picard.sam.SortSam", "SORT_ORDER=queryname", CRAM_FILE, REFERENCE_FILE},
+ {"picard.sam.ReplaceSamHeader", "HEADER=" + CRAM_FILE_2, CRAM_FILE, REFERENCE_FILE},
+ {"picard.sam.RevertOriginalBaseQualitiesAndAddMateCigar", null, CRAM_FILE_QUERY_SORTED, REFERENCE_FILE},
+ {"picard.sam.GatherBamFiles",
+ "I=" + new File(CRAM_UNMAPPED).getAbsolutePath(),
+ CRAM_FILE_QUERY_SORTED,
+ REFERENCE_FILE
+ },
+ {"picard.sam.markduplicates.MarkDuplicatesWithMateCigar",
+ "M=" + createTempFile("MarkDuplicatesWithMateCigar", ".txt").getAbsolutePath(),
+ CRAM_FILE,
+ REFERENCE_FILE
+ },
+ {"picard.sam.markduplicates.SimpleMarkDuplicatesWithMateCigar",
+ "M=" + createTempFile("SimpleMarkDuplicatesWithMateCigar", ".txt").getAbsolutePath(),
+ CRAM_FILE_ONE_PAIR_MC,
+ REFERENCE_FILE
+ },
+ {"picard.sam.ReorderSam",
+ null,
+ CRAM_FILE,
+ REFERENCE_FILE
+ },
+ {"picard.sam.SetNmMdAndUqTags", null, CRAM_FILE, REFERENCE_FILE},
+ {"picard.sam.MergeBamAlignment",
+ "UNMAPPED=" + new File(MBA_UNMAPPED_CRAM).getAbsolutePath() +
+ " ALIGNED=" + new File(MBA_ALIGNED_CRAM).getAbsolutePath(),
+ null,
+ MBA_REFERENCE
+ },
+ {"picard.illumina.MarkIlluminaAdapters",
+ "METRICS=" + createTempFile("picard.illumina.MarkIlluminaAdapters", ".txt").getAbsolutePath(),
+ CRAM_FILE_QUERY_SORTED,
+ REFERENCE_FILE
+ },
+ {"picard.sam.SplitSamByLibrary", null, CRAM_FILE, REFERENCE_FILE}
+ };
+ }
+
+ @Test(dataProvider = "programArgsForCRAMWithReference")
+ public void testShouldWriteCRAMWhenCRAMWithReference(String program,
+ String parameters,
+ String cramFile,
+ String reference) throws IOException, IllegalAccessException, InstantiationException, ClassNotFoundException {
+ if (!program.equals("picard.sam.SplitSamByLibrary")) {
+ final File outputFile = createTempCram(program);
+ launchProgram(program, cramFile, outputFile.getAbsolutePath(), parameters, reference);
+ assertCRAM(outputFile);
+ } else {
+ final File tmpDir = IOUtil.createTempDir(outputDir.getAbsolutePath(), program);
+ launchProgram(program, cramFile, tmpDir.getAbsolutePath(), parameters, reference);
+ assertCRAMs(tmpDir);
+ }
+ }
+
+ @DataProvider(name = "programArgsForCRAMWithoutReferenceToFail")
+ public Object[][] getArgsForCRAMWithoutReferenceToFail() {
+ return new Object[][] {
+ {"picard.sam.AddOrReplaceReadGroups",
+ "RGID=4 RGLB=lib1 RGPL=illumina RGPU=unit1 RGSM=20",
+ CRAM_FILE
+ },
+ {"picard.sam.CleanSam", null, CRAM_FILE},
+ {"picard.sam.DownsampleSam", null, CRAM_FILE},
+ {"picard.sam.FixMateInformation", null, CRAM_FILE},
+ {"picard.sam.markduplicates.MarkDuplicates",
+ "M=" + createTempFile("MarkDuplicates", ".dir").getAbsolutePath(),
+ CRAM_FILE
+ },
+ {"picard.sam.MergeSamFiles", null, CRAM_FILE},
+ {"picard.sam.PositionBasedDownsampleSam", "FRACTION=0.5", CRAM_FILE},
+ {"picard.sam.SortSam", "SORT_ORDER=queryname", CRAM_FILE},
+ {"picard.sam.ReplaceSamHeader", "HEADER=" + CRAM_FILE_2, CRAM_FILE},
+ {"picard.sam.RevertOriginalBaseQualitiesAndAddMateCigar", null, CRAM_FILE_QUERY_SORTED},
+ {"picard.sam.GatherBamFiles",
+ "I=" + new File(CRAM_UNMAPPED).getAbsolutePath(),
+ CRAM_FILE_QUERY_SORTED
+ },
+ {"picard.sam.markduplicates.MarkDuplicatesWithMateCigar",
+ "M=" + createTempFile("MarkDuplicatesWithMateCigar", ".txt").getAbsolutePath(),
+ CRAM_FILE},
+ {"picard.sam.markduplicates.SimpleMarkDuplicatesWithMateCigar",
+ "M=" + createTempFile("SimpleMarkDuplicatesWithMateCigar", ".txt").getAbsolutePath(),
+ CRAM_FILE_ONE_PAIR_MC},
+ {"picard.illumina.MarkIlluminaAdapters",
+ "METRICS=" + createTempFile("picard.illumina.MarkIlluminaAdapters", ".txt").getAbsolutePath(),
+ CRAM_FILE_QUERY_SORTED,
+ },
+ {"picard.sam.SplitSamByLibrary", null, CRAM_FILE}
+ };
+ }
+
+ @Test(dataProvider = "programArgsForCRAMWithoutReferenceToFail", expectedExceptions = CRAMException.class)
+ public void testShouldFailWhenCRAMWithoutReference(String program,
+ String parameters,
+ String cramFile) throws IOException, IllegalAccessException, InstantiationException, ClassNotFoundException {
+ if (!program.equals("picard.sam.SplitSamByLibrary")) {
+ final File outputFile = createTempCram(program);
+ launchProgram(program, cramFile, outputFile.getAbsolutePath(), parameters, null);
+ assertCRAM(outputFile);
+ } else {
+ final File tmpDir = IOUtil.createTempDir(outputDir.getAbsolutePath(), program);
+ launchProgram(program, cramFile, tmpDir.getAbsolutePath(), parameters, null);
+ assertCRAMs(tmpDir);
+ }
+ }
+
+ // test with CRAMs that don't need reference (unmapped CRAMs for input or output)
+ @DataProvider(name = "programArgsWithUnmappedCRAM")
+ public Object[][] getArgsWithUnmappedCRAM() {
+ return new Object[][] {
+ {"picard.sam.AddOrReplaceReadGroups", "RGID=4 RGLB=lib1 RGPL=illumina RGPU=unit1 RGSM=20", CRAM_UNMAPPED},
+ {"picard.sam.CleanSam", null, CRAM_UNMAPPED},
+ {"picard.sam.DownsampleSam", null, CRAM_UNMAPPED},
+ {"picard.sam.FixMateInformation", null, CRAM_UNMAPPED},
+ {"picard.sam.markduplicates.MarkDuplicates",
+ "M=" + createTempFile("MarkDuplicates", ".dir").getAbsolutePath(),
+ CRAM_UNMAPPED
+ },
+ {"picard.sam.MergeSamFiles", null, CRAM_UNMAPPED},
+ {"picard.sam.PositionBasedDownsampleSam", "FRACTION=0.5", CRAM_UNMAPPED},
+ {"picard.sam.SortSam", "SORT_ORDER=unsorted", CRAM_UNMAPPED},
+ {"picard.sam.ReplaceSamHeader", "HEADER=" + MBA_UNMAPPED_CRAM, CRAM_UNMAPPED},
+ {"picard.sam.RevertOriginalBaseQualitiesAndAddMateCigar", null, CRAM_UNMAPPED_WITH_OQ_TAG},
+ {"picard.sam.GatherBamFiles",
+ "I=" + new File(CRAM_UNMAPPED_PART_2).getAbsolutePath(),
+ CRAM_UNMAPPED_PART_1
+ },
+ {"picard.sam.FastqToSam", "F1=" + FASTQ_FILE + " SAMPLE_NAME=s1", null},
+ {"picard.illumina.IlluminaBasecallsToSam",
+ "BASECALLS_DIR=" + new File("testdata/picard/illumina/25T8B25T/Data/Intensities/BaseCalls") +
+ " LANE=1 READ_STRUCTURE=25S8S25T RUN_BARCODE=HiMom SAMPLE_ALIAS=HiDad LIBRARY_NAME=HelloWorld",
+ null
+ },
+ {"picard.illumina.MarkIlluminaAdapters",
+ "METRICS=" + createTempFile("picard.illumina.MarkIlluminaAdapters", ".txt").getAbsolutePath(),
+ CRAM_FILE_QUERY_SORTED_UNMAPPED
+ },
+ {"picard.sam.SplitSamByLibrary", null, CRAM_SPLIT_UNMAPPED}
+ };
+ }
+
+ @Test(dataProvider = "programArgsWithUnmappedCRAM")
+ public void testShouldWriteCRAMWhenUnmappedCRAMWithoutReference(String program,
+ String parameters,
+ String cramFile) throws IOException, IllegalAccessException, InstantiationException, ClassNotFoundException {
+ if (!program.equals("picard.sam.SplitSamByLibrary")) {
+ final File outputFile = createTempCram(program);
+ launchProgram(program, cramFile, outputFile.getAbsolutePath(), parameters, null);
+ assertCRAM(outputFile);
+ } else {
+ final File tmpDir = IOUtil.createTempDir(outputDir.getAbsolutePath(), program);
+ launchProgram(program, cramFile, tmpDir.getAbsolutePath(), parameters, null);
+ assertCRAMs(tmpDir);
+ }
+ }
+
+ private File createTempCram(String name) throws IOException {
+ return createTempFile(name, ".cram");
+ }
+
+ private static File createTempFile(String name, String extension) {
+ File file = null;
+ try {
+ file = File.createTempFile(name, extension, outputDir);
+ file.deleteOnExit();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ return file;
+ }
+
+ private void launchProgram(String programClassname,
+ String input,
+ String output,
+ String exParams,
+ String reference) throws ClassNotFoundException, IllegalAccessException, InstantiationException {
+ final Collection args = new ArrayList<>();
+
+ if (input != null) {
+ args.add("INPUT=" + new File(input).getAbsolutePath());
+ }
+ args.add("OUTPUT=" + output);
+
+ if (exParams != null) {
+ args.addAll(Arrays.asList(exParams.split(" ")));
+ }
+
+ if (reference != null) {
+ args.add("REFERENCE_SEQUENCE=" + new File(reference).getAbsolutePath());
+ }
+
+ CommandLineProgram program = (CommandLineProgram) Class.forName(programClassname).newInstance();
+ program.instanceMain(args.toArray(new String[args.size()]));
+ }
+
+ static void assertCRAM(File outputFile) {
+ try (InputStream in = new FileInputStream(outputFile)) {
+ Assert.assertTrue(SamStreams.isCRAMFile(new BufferedInputStream(in)), "File is not a CRAM.");
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ private static void assertCRAMs(File dir) {
+ Arrays.stream(dir.listFiles()).filter(file -> file.getName().endsWith("cram")).forEach(CramCompatibilityTest::assertCRAM);
+ }
+}
diff --git a/src/test/java/picard/sam/FilterSamReadsTest.java b/src/test/java/picard/sam/FilterSamReadsTest.java
index 71469c5ad..54dc3b53f 100644
--- a/src/test/java/picard/sam/FilterSamReadsTest.java
+++ b/src/test/java/picard/sam/FilterSamReadsTest.java
@@ -24,7 +24,10 @@
package picard.sam;
import htsjdk.samtools.*;
+import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.TestUtil;
import org.testng.Assert;
+import org.testng.annotations.AfterTest;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
@@ -35,15 +38,10 @@
import java.util.stream.StreamSupport;
public class FilterSamReadsTest extends CommandLineProgramTest {
- @Override
- public String getCommandLineProgramName() {
- return FilterSamReads.class.getSimpleName();
- }
-
private static final int READ_LENGTH = 151;
- private final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();
- private final static File TEST_DIR = new File("testdata/picard/sam/FilterSamReads/");
+ private final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();
+ private final static File TEST_DIR = IOUtil.createTempDir("testdata/picard/sam/FilterSamReads/output", ".tmp");
@BeforeTest
public void setUp() {
@@ -54,6 +52,16 @@ public void setUp() {
builder.addPair("one_of_pair", 0, 1, 1000); //first read should pass, second should not, but both will be kept in first test
}
+ @AfterTest
+ public void tearDown() {
+ TestUtil.recursiveDelete(TEST_DIR);
+ }
+
+ @Override
+ public String getCommandLineProgramName() {
+ return FilterSamReads.class.getSimpleName();
+ }
+
@DataProvider(name = "dataTestJsFilter")
public Object[][] dataTestJsFilter() {
return new Object[][]{
@@ -69,22 +77,38 @@ public void setUp() {
{"testdata/picard/sam/FilterSamReads/filter2.interval_list", 0}
};
}
-
+
/**
* filters a SAM using a javascript filter
*/
@Test(dataProvider = "dataTestJsFilter")
public void testJavaScriptFilters(final String samFilename, final String javascriptFilename,final int expectNumber) throws Exception {
- // input as SAM file
+ launchJavaScriptFilter(samFilename, javascriptFilename, expectNumber);
+ }
+
+ @Test
+ public void testJavaScriptFiltersWithCRAM() throws Exception {
+ final FilterSamReads program = setupProgram(
+ new File("testdata/picard/sam/FilterSamReads/filterOddStarts.js"),
+ new File(CramCompatibilityTest.CRAM_FILE),
+ FilterSamReads.Filter.includeJavascript,
+ CramCompatibilityTest.REFERENCE_FILE);
+ Assert.assertEquals(program.doWork(), 0);
+ CramCompatibilityTest.assertCRAM(program.OUTPUT);
+ }
+
+ private FilterSamReads launchJavaScriptFilter(String samFilename, String javascriptFilename, int expectNumber) throws Exception {
+ // input as SAM file
final File inputSam = new File(samFilename);
final File javascriptFile = new File(javascriptFilename);
- FilterSamReads filterTest = setupProgram(javascriptFile, inputSam, FilterSamReads.Filter.includeJavascript);
- Assert.assertEquals(filterTest.doWork(),0);
-
+ FilterSamReads filterTest = setupProgram(javascriptFile, inputSam, FilterSamReads.Filter.includeJavascript, null);
+ Assert.assertEquals(filterTest.doWork(), 0);
long count = getReadCount(filterTest);
Assert.assertEquals(count, expectNumber);
+
+ return filterTest;
}
/**
@@ -106,7 +130,7 @@ public void testPairedIntervalFilter(final String intervalFilename, final int ex
final File intervalFile = new File(intervalFilename);
- FilterSamReads filterTest = setupProgram(intervalFile, inputSam, FilterSamReads.Filter.includePairedIntervals);
+ FilterSamReads filterTest = setupProgram(intervalFile, inputSam, FilterSamReads.Filter.includePairedIntervals, null);
Assert.assertEquals(filterTest.doWork(),0);
long count = getReadCount(filterTest);
@@ -114,20 +138,23 @@ public void testPairedIntervalFilter(final String intervalFilename, final int ex
Assert.assertEquals(count, expectNumber);
}
- private FilterSamReads setupProgram(final File inputFile, final File inputSam, final FilterSamReads.Filter filter) throws Exception {
+ private FilterSamReads setupProgram(final File inputFile, final File inputSam, final FilterSamReads.Filter filter, final String reference) throws Exception {
final FilterSamReads program = new FilterSamReads();
program.INPUT = inputSam;
- program.OUTPUT = File.createTempFile("FilterSamReads.output.", ".sam");
+ program.OUTPUT = File.createTempFile("FilterSamReads.output.", getFilenameExtension(inputSam.getAbsolutePath()));
program.OUTPUT.deleteOnExit();
program.FILTER = filter;
- if(filter == FilterSamReads.Filter.includePairedIntervals) {
+ if (filter == FilterSamReads.Filter.includePairedIntervals) {
program.INTERVAL_LIST = inputFile;
- }
- else {
+ } else {
program.JAVASCRIPT_FILE = inputFile;
}
+ if (reference != null) {
+ program.REFERENCE_SEQUENCE = new File(reference);
+ }
+
return program;
}
@@ -140,4 +167,9 @@ private long getReadCount(FilterSamReads filterTest) throws Exception {
samReader.close();
return count;
}
+
+ private static String getFilenameExtension(String samFilename) {
+ final String[] split = samFilename.split("\\.");
+ return "." + split[split.length - 1];
+ }
}
diff --git a/testdata/picard/sam/MarkDuplicates/one_pair_mc.cram b/testdata/picard/sam/MarkDuplicates/one_pair_mc.cram
new file mode 100644
index 000000000..ea35315b1
Binary files /dev/null and b/testdata/picard/sam/MarkDuplicates/one_pair_mc.cram differ
diff --git a/testdata/picard/sam/MergeBamAlignment/cliptest.aligned.cram b/testdata/picard/sam/MergeBamAlignment/cliptest.aligned.cram
new file mode 100644
index 000000000..c66d98819
Binary files /dev/null and b/testdata/picard/sam/MergeBamAlignment/cliptest.aligned.cram differ
diff --git a/testdata/picard/sam/MergeBamAlignment/cliptest.fasta.fai b/testdata/picard/sam/MergeBamAlignment/cliptest.fasta.fai
new file mode 100644
index 000000000..1e6a48d89
--- /dev/null
+++ b/testdata/picard/sam/MergeBamAlignment/cliptest.fasta.fai
@@ -0,0 +1 @@
+chr1 1000 6 50 51
diff --git a/testdata/picard/sam/MergeBamAlignment/cliptest.unmapped.cram b/testdata/picard/sam/MergeBamAlignment/cliptest.unmapped.cram
new file mode 100644
index 000000000..691fcf68d
Binary files /dev/null and b/testdata/picard/sam/MergeBamAlignment/cliptest.unmapped.cram differ
diff --git a/testdata/picard/sam/split_test_unmapped.cram b/testdata/picard/sam/split_test_unmapped.cram
new file mode 100644
index 000000000..58b807967
Binary files /dev/null and b/testdata/picard/sam/split_test_unmapped.cram differ
diff --git a/testdata/picard/sam/test_cram_file.ref.dict b/testdata/picard/sam/test_cram_file.ref.dict
new file mode 100644
index 000000000..433246b66
--- /dev/null
+++ b/testdata/picard/sam/test_cram_file.ref.dict
@@ -0,0 +1,2 @@
+@HD VN:1.5 SO:unsorted
+@SQ SN:17 LN:4200 M5:f8c08a4411f07717451464d546b3706d UR:file:\testdata\picard\sam\test_cram_file.fa
diff --git a/testdata/picard/sam/test_cram_file.ref.fa b/testdata/picard/sam/test_cram_file.ref.fa
new file mode 100644
index 000000000..7c2ec2a88
--- /dev/null
+++ b/testdata/picard/sam/test_cram_file.ref.fa
@@ -0,0 +1,71 @@
+>17 17:1-4200
+AAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAA
+TGTGCTCTGGGGTCTCTGGGGTCTCACCCACGACCAACTCCCTGGGCCTGGCACCAGGGA
+GCTTAACAAACATCTGTCCAGCGAATACCTGCATCCCTAGAAGTGAAGCCACCGCCCAAA
+GACACGCCCATGTCCAGCTTAACCTGCATCCCTAGAAGTGAAGGCACCGCCCAAAGACAC
+GCCCATGTCCAGCTTATTCTGCCCAGTTCCTCTCCAGAAAGGCTGCATGGTTGACACACA
+GTGCCTGCGACAAAGCTGAATGCTATCATTTAAAAACTCCTTGCTGGTTTGAGAGGCAGA
+AAATGATATCTCATAGTTGCTTTACTTTGCATATTTTAAAATTGTGACTTTCATGGCATA
+AATAATACTGGTTTATTACAGAAGCACTAGAAAATGCATGTGGACAAAAGTTGGGATTAG
+GAGAGAGAAATGAAGACATATGTCCACACAAAAACCTGTTCATTGCAGCTTTCTACCATC
+ACCAAAAATTGCAAACAACCACACGCCCTTCAACTGGGGAACTCATCAACAACAAACTTG
+TGGTTTACCCACACAATGGAAGACCACTTAGCAACAAAAAGGACCAAACTCCTGGTACAT
+GCAACTGACAGATGAATCTCAAACGCATTCCTCCGTGTGAAAGAAGCCGGACTCACAGGG
+CAACACACTATCTGACTGTTTCATGGGAAAGTCTGGAAACGGCAACACCATTGAGACAGA
+AAACAGGTGAGTGGTTGCCTGGGGCCAGGGAACTTTCTGGGGTCATATTCTCTGTGTTGA
+TTCTGGTGGTGGAAACAAGACTGTCCCAGCCTGGGTGATACAGCGAGACCCCATCTCTAC
+CAAAAAATTAAAAATTAGCTGGGCATGGTGGTGCATGCCTGTAGTCCCAGCTATTCACAG
+TGCTGAGGTGGGAAGATGCTTGAGCCCAGGAGTTCAAGGCTGCAATGAGCTATGATTGCG
+CCACTGCACTTTGGCCTGGACAACAGAGCAAAACCCTGTCTCTAAAAAAAGAAAAGAAAA
+GAAAAACTCACTGGATATGAATGATACAGGTTGAGGATCCATTATCTGAAATGCTTGGAC
+CAGATGTTTTGAATTTTGGATTTTTTCATATTTTGTAATCTTTGCAGTATATTTACCAGT
+TCAGCATCCCTAACTCAAAAATTCAAAAATCTGAAATCCCAAACGCGCCAATAAGCATTC
+CCTTTGAGCGTCATGTCGGTGCTTGGAATGTTTGGGGTTTTGGATTTACAGCTTTGGGAC
+GCTCAACCTGTACCTCAATAAACCTGATTTTAAAAAAGTTTGGGGGGATTCCCCTAAGCC
+CGCCACCCGGAGACAGCGGATTTCCTTAGTTACTTACTATGCTCCTTGGCCATTTCTCTA
+GGTATTGGTATATTGTGTCTGCTGTGAACTGTCCTTGGCCTGTTTGGTGACGGGTGAGGA
+GCAGGGACAGAAGGGTCCTGCGTGCCCTGCCTTCACAAGCCCCTGGAAGGAAAGTTGTTT
+TGGGATCTCTGCACCCTCAGCCTGGACAACTTGTGCCCATCTGGTGACCCCTCACTCAGC
+CACCAGACTTCCACGACAGGCTCCAGCCTCGGCACCTTCAGCCATGGACAGTTCCGCCAG
+CGTTGCCCTCTGTTCTGCTGTTTTCTCTACCAGAAGTGCCCTTCCCTCCTCACCTGACCA
+CTCTGGGGAAATCCCTCAGCACCCTCCCTGAGCATACCCTACTCTGGCACAAGCCCACCC
+TGCAAAGCCCCTGAGGCCCGCCCTGTGGCGTCTCTCCCTCCCTTGCTGTCAGGACAGTGG
+TCCTGGCCACCGGGGCTCACGGAGCCGCCCTGTGCCGTGTACCTCTGAGCCCTCTGCACA
+GTGCCTTCTGCTTGCCTGTGGCTTTGAGAAGAAACCCCTTCTGGTTATACATAAGACAGC
+CAGAGAAGGGAGTTGCCCAGGGTGGCACAGCACGTTGCTGCCAGTTACTGCCATTTTCAC
+GGGCATGAAATGGAGATAACAACAGGAGCGACCGCACAGGCTGCTGAGCGCGTCACACGC
+AGCCATCGCGCAGCTCAGGGATATTACGTGTAACTCGACATGTCAGCGATTGTCACAGGC
+ACTGCTACTCCTGGGGTTTTCCATCAAACCCTCAAGAGCTGGGCCTGGGGTCAACTTCCG
+GCCTGGGGAAACTGGGGCAAGTATCACCAGAGATGAGCTTTATAAAAATAATGGTGCTAG
+CTGGGCATGGTGGCTTGCACCTGTAATCCCAGCACTTTGGGAGGCCGAGCTAGGAGGATC
+GTTTGAGTCCAGCAGTTTGAGACCAGCCTGGCCAATACGGCAAAACCCAGTCTCTACAAA
+AAATACAAAAAACAACTAGCCAGGCGTGGTGGTGCACACCTGTAGTCCCAGCTACTCAGG
+AGGCTGAGGGGGAAGGACTGCTTGAGCCCAGGAGTTTGAGGCTGCTGTGAGCTGTGATCG
+CATCACTGCATTCCAGCCCGGTGACAGAGTGAGTCACTGTCTCAAAAAAGAAAGGAAGAA
+ATAAAGAAAACAAATAAAAATAATAGTGCAGACAAAAGGCCTTGACCCATCTAGCTTTGG
+CCCTCAGCATCAACCGCTAGATACGTCCCTCCCTTTCTTCTGGGGCACAGGTCACACTCT
+CTTCCAGGTCTAGGATGCAGCTGAGGGGTGCCCCTCTTACCATCTAATCTGTGCCCTTAT
+TTCCTCTGCTTTAGTGAGGAAGAGGCCCCTGGTCCATGAAGGGGCCTTTCAGAGACGGGG
+ACCCCTGAGGAGCCCCGAGCAGCAGCCGTCGTGTCTCACCCAGGGTGTCTGAAACAGATG
+TGGAGGTCTCGGGTGAGGCGTGGCTCAGATACAGGGAGTGGCCCACAGCTCGGCCTGTCT
+TTGAAAGGCCACGTGACCTGGCCCACGGCTGGCAGGTGGGACCCAGCTGCAGGGGTCCAG
+CAGCACCCACAGCAGCCACCTGTGGCAGGGAGGAGCTTGTGGTACAGTGGACAGGCCCTG
+CCCAGATGGCCCCCCGCCTGCCTGTGGAAGTTGACCAGACCATCTGTCACAGCAGGTAAG
+ACTCTGCTTTCTGGGCAACCCAGCAGGTGACCCTGGAATTCCTGTCCATCTGGCAGGTGG
+GCATTGAAACTGGTTTAAAAATGTCACACCATAGGCCGGGCACAGTGGCTCACGCCTGTA
+ATCCCAGCCCTTTGGGAGGCCAGGGTGGGTGGATCACTTGAGGTCAGGAGTTCAAGACCA
+GCCTGGCCAACATGGTGAAACCCCGTCTACTAAAAATACAAAAATTAGCCTGGCGTGGTG
+GCGCATGCCTGTAATCCCAGCTACTTGGGAAGCTGAGGGATGAGAACTGCTTGAACCTGG
+GAGGCAGACGTTGCAGTGAGCTGAGATCACGCCACTGCACTCCAGCCTGGGCAACAGAGT
+AAGACTCTGTCTCAAAAAAAAAAAAATCACACCATTTTGGCTTCAGATTGCATATCCTCC
+TGCAAGGATATATACGCGTGAAATTCAAGTCAATGACAAATCAGAAGAAAAAACATATAT
+ATACGCAAACCAGTATCCTACTGTGTGTGTCGTTTGTTGTGTTTTCGACAGCTGTCCGTG
+TTATAATAATTCCTCTAGTTCAAATTTATTCATTTTTAACTTCATAGTACCACATTCTAC
+ACACTGCCCATGTCCCCTCAAGCTTCCCCTGGCTCCTGCAACCACAAATCTACTCTCTGC
+CTCTGTGGGTTGACCTATTCTGGACACGTCATAGAAATAGAGTCCTGCAACACGTGGCCG
+TCTGTGTCTGGCTTCTCTCGCTTAGCATCTTGTTTCCAAGGTCCTCCCACAGTGTAGCAT
+GCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAT
+GGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACACACCCGCT
+ACACTCCTTCTTAGGGCTGATATTCCACGCACCCGCTACACTCCTTCTTAGGGCTGATAT
+TCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTT
+CTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCAC
diff --git a/testdata/picard/sam/test_cram_file.ref.fa.fai b/testdata/picard/sam/test_cram_file.ref.fa.fai
new file mode 100644
index 000000000..c2112667e
--- /dev/null
+++ b/testdata/picard/sam/test_cram_file.ref.fa.fai
@@ -0,0 +1 @@
+17 4200 14 60 61
diff --git a/testdata/picard/sam/test_cram_file_coordinate_sorted.cram b/testdata/picard/sam/test_cram_file_coordinate_sorted.cram
new file mode 100644
index 000000000..4d312952c
Binary files /dev/null and b/testdata/picard/sam/test_cram_file_coordinate_sorted.cram differ
diff --git a/testdata/picard/sam/test_cram_file_coordinate_sorted.cram.bai b/testdata/picard/sam/test_cram_file_coordinate_sorted.cram.bai
new file mode 100644
index 000000000..62bacfd63
Binary files /dev/null and b/testdata/picard/sam/test_cram_file_coordinate_sorted.cram.bai differ
diff --git a/testdata/picard/sam/test_cram_file_header_only.cram b/testdata/picard/sam/test_cram_file_header_only.cram
new file mode 100644
index 000000000..606986d5f
Binary files /dev/null and b/testdata/picard/sam/test_cram_file_header_only.cram differ
diff --git a/testdata/picard/sam/test_cram_file_query_sorted.cram b/testdata/picard/sam/test_cram_file_query_sorted.cram
new file mode 100644
index 000000000..1f7655a4c
Binary files /dev/null and b/testdata/picard/sam/test_cram_file_query_sorted.cram differ
diff --git a/testdata/picard/sam/test_cram_file_query_sorted.cram.crai b/testdata/picard/sam/test_cram_file_query_sorted.cram.crai
new file mode 100644
index 000000000..bc15eb2ed
Binary files /dev/null and b/testdata/picard/sam/test_cram_file_query_sorted.cram.crai differ
diff --git a/testdata/picard/sam/unmapped_part_1.cram b/testdata/picard/sam/unmapped_part_1.cram
new file mode 100644
index 000000000..5b14a69b4
Binary files /dev/null and b/testdata/picard/sam/unmapped_part_1.cram differ
diff --git a/testdata/picard/sam/unmapped_part_2.cram b/testdata/picard/sam/unmapped_part_2.cram
new file mode 100644
index 000000000..ff842a4f6
Binary files /dev/null and b/testdata/picard/sam/unmapped_part_2.cram differ
diff --git a/testdata/picard/sam/unmapped_queryname_sorted.cram b/testdata/picard/sam/unmapped_queryname_sorted.cram
new file mode 100644
index 000000000..893e530ba
Binary files /dev/null and b/testdata/picard/sam/unmapped_queryname_sorted.cram differ
diff --git a/testdata/picard/sam/unmapped_with_oq_tag.cram b/testdata/picard/sam/unmapped_with_oq_tag.cram
new file mode 100644
index 000000000..8b9ac1402
Binary files /dev/null and b/testdata/picard/sam/unmapped_with_oq_tag.cram differ