diff --git a/src/main/java/picard/sam/AbstractAlignmentMerger.java b/src/main/java/picard/sam/AbstractAlignmentMerger.java index 7475b88fa..e7a5390df 100644 --- a/src/main/java/picard/sam/AbstractAlignmentMerger.java +++ b/src/main/java/picard/sam/AbstractAlignmentMerger.java @@ -23,37 +23,12 @@ */ package picard.sam; -import htsjdk.samtools.BAMRecordCodec; -import htsjdk.samtools.Cigar; -import htsjdk.samtools.CigarElement; -import htsjdk.samtools.CigarOperator; -import htsjdk.samtools.ReservedTagConstants; -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMFileHeader.SortOrder; -import htsjdk.samtools.SAMFileWriter; -import htsjdk.samtools.SAMFileWriterFactory; -import htsjdk.samtools.SAMProgramRecord; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMRecordCoordinateComparator; -import htsjdk.samtools.SAMRecordQueryNameComparator; -import htsjdk.samtools.SAMSequenceDictionary; -import htsjdk.samtools.SAMSequenceRecord; -import htsjdk.samtools.SAMTag; -import htsjdk.samtools.SAMUtils; -import htsjdk.samtools.SamPairUtil; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SamReaderFactory; +import htsjdk.samtools.*; import htsjdk.samtools.filter.FilteringSamIterator; import htsjdk.samtools.filter.SamRecordFilter; import htsjdk.samtools.reference.ReferenceSequenceFileWalker; -import htsjdk.samtools.util.CigarUtil; -import htsjdk.samtools.util.CloseableIterator; -import htsjdk.samtools.util.CloserUtil; -import htsjdk.samtools.util.IOUtil; -import htsjdk.samtools.util.Log; -import htsjdk.samtools.util.ProgressLogger; -import htsjdk.samtools.util.SequenceUtil; -import htsjdk.samtools.util.SortingCollection; +import htsjdk.samtools.SAMFileHeader.SortOrder; +import htsjdk.samtools.util.*; import picard.PicardException; import java.io.File; @@ -92,15 +67,14 @@ private final File unmappedBamFile; private final File targetBamFile; - private final SAMSequenceDictionary sequenceDictionary; private ReferenceSequenceFileWalker refSeq = null; private final boolean clipAdapters; private final boolean bisulfiteSequence; private SAMProgramRecord programRecord; private final boolean alignedReadsOnly; private final SAMFileHeader header; - private final List attributesToRetain = new ArrayList(); - private final List attributesToRemove = new ArrayList(); + private final List attributesToRetain = new ArrayList<>(); + private final List attributesToRemove = new ArrayList<>(); private Set attributesToReverse = new TreeSet<>(SAMRecord.TAGS_TO_REVERSE); private Set attributesToReverseComplement = new TreeSet<>(SAMRecord.TAGS_TO_REVERSE_COMPLEMENT); protected final File referenceFasta; @@ -184,6 +158,8 @@ public boolean isPopulatePaTag() { } } + protected abstract SAMSequenceDictionary getDictionaryForMergedBam(); + protected abstract CloseableIterator getQuerynameSortedAlignedRecords(); protected boolean ignoreAlignment(final SAMRecord sam) { return false; } // default implementation @@ -203,7 +179,7 @@ public AbstractAlignmentMerger(final File unmappedBamFile, final File targetBamF final List attributesToRemove, final Integer read1BasesTrimmed, final Integer read2BasesTrimmed, final List expectedOrientations, - final SAMFileHeader.SortOrder sortOrder, + final SortOrder sortOrder, final PrimaryAlignmentSelectionStrategy primaryAlignmentSelectionStrategy, final boolean addMateCigar, final boolean unmapContaminantReads) { @@ -268,7 +244,7 @@ public AbstractAlignmentMerger(final File unmappedBamFile, final File targetBamF final List attributesToRemove, final Integer read1BasesTrimmed, final Integer read2BasesTrimmed, final List expectedOrientations, - final SAMFileHeader.SortOrder sortOrder, + final SortOrder sortOrder, final PrimaryAlignmentSelectionStrategy primaryAlignmentSelectionStrategy, final boolean addMateCigar, final boolean unmapContaminantReads, @@ -282,11 +258,6 @@ public AbstractAlignmentMerger(final File unmappedBamFile, final File targetBamF this.referenceFasta = referenceFasta; this.refSeq = new ReferenceSequenceFileWalker(referenceFasta); - this.sequenceDictionary = refSeq.getSequenceDictionary(); - if (this.sequenceDictionary == null) { - throw new PicardException("No sequence dictionary found for " + referenceFasta.getAbsolutePath() + - ". Use CreateSequenceDictionary.jar to create a sequence dictionary."); - } this.clipAdapters = clipAdapters; this.bisulfiteSequence = bisulfiteSequence; @@ -298,7 +269,7 @@ public AbstractAlignmentMerger(final File unmappedBamFile, final File targetBamF if (programRecord != null) { setProgramRecord(programRecord); } - header.setSequenceDictionary(this.sequenceDictionary); + if (attributesToRetain != null) { this.attributesToRetain.addAll(attributesToRetain); } @@ -306,12 +277,10 @@ public AbstractAlignmentMerger(final File unmappedBamFile, final File targetBamF this.attributesToRemove.addAll(attributesToRemove); // attributesToRemove overrides attributesToRetain if (!this.attributesToRetain.isEmpty()) { - for (String attribute : this.attributesToRemove) { - if (this.attributesToRetain.contains(attribute)) { - log.info("Overriding retaining the " + attribute + " tag since remove overrides retain."); - this.attributesToRetain.remove(attribute); - } - } + this.attributesToRemove.stream() + .filter(this.attributesToRetain::contains) + .peek(a->log.info("Overriding retaining the " + a + " tag since 'remove' overrides 'retain'.")) + .forEach(this.attributesToRetain::remove); } } this.read1BasesTrimmed = read1BasesTrimmed; @@ -372,6 +341,10 @@ public void mergeAlignment(final File referenceFasta) { // Get the aligned records and set up the first one alignedIterator = new MultiHitAlignedReadIterator(new FilteringSamIterator(getQuerynameSortedAlignedRecords(), alignmentFilter), primaryAlignmentSelectionStrategy); + + // once the aligned Iterator has been set-up we can merge the reference dictionary and that from the input files + this.header.setSequenceDictionary(getDictionaryForMergedBam()); + HitsForInsert nextAligned = nextAligned(); // Check that the program record we are going to insert is not already used in the unmapped SAM @@ -621,8 +594,6 @@ private HitsForInsert nextAligned() { return null; } - private int numCrossSpeciesContaminantWarnings = 0; - /** * Copies alignment info from aligned to unaligned read, clips as appropriate, and sets PG ID. * May also un-map the resulting read if the alignment is bad (e.g. no unclipped bases). @@ -880,8 +851,6 @@ protected void updateCigarForTrimmedOrClippedBases(final SAMRecord rec, final SA } } - protected SAMSequenceDictionary getSequenceDictionary() { return this.sequenceDictionary; } - protected SAMProgramRecord getProgramRecord() { return this.programRecord; } protected void setProgramRecord(final SAMProgramRecord pg) { diff --git a/src/main/java/picard/sam/MergeBamAlignment.java b/src/main/java/picard/sam/MergeBamAlignment.java index c1011baf2..45edbe82b 100644 --- a/src/main/java/picard/sam/MergeBamAlignment.java +++ b/src/main/java/picard/sam/MergeBamAlignment.java @@ -26,6 +26,7 @@ import htsjdk.samtools.SAMFileHeader.SortOrder; import htsjdk.samtools.SAMProgramRecord; import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SamPairUtil; import htsjdk.samtools.util.Log; import picard.PicardException; @@ -211,6 +212,9 @@ @Option(doc = "If UNMAP_CONTAMINANT_READS is set, require this many unclipped bases or else the read will be marked as contaminant.") public int MIN_UNCLIPPED_BASES = 32; + @Option(doc = "List of Sequence Records tags that must be equal (if present) in the reference dictionary and in the aligned file. Mismatching tags will cause an error if in this list, and a warning otherwise.") + public List MATCHING_DICTIONARY_TAGS = SAMSequenceDictionary.DEFAULT_DICTIONARY_EQUAL_TAG; + @Option(doc = "How to deal with alignment information in reads that are being unmapped (e.g. due to cross-species contamination.) Currently ignored unless UNMAP_CONTAMINANT_READS = true", optional = true) public AbstractAlignmentMerger.UnmappingReadStrategy UNMAPPED_READ_STRATEGY = AbstractAlignmentMerger.UnmappingReadStrategy.DO_NOT_CHANGE; @@ -257,9 +261,9 @@ protected int doWork() { } // TEMPORARY FIX until internal programs all specify EXPECTED_ORIENTATIONS if (JUMP_SIZE != null) { - EXPECTED_ORIENTATIONS = Arrays.asList(SamPairUtil.PairOrientation.RF); + EXPECTED_ORIENTATIONS = Collections.singletonList(SamPairUtil.PairOrientation.RF); } else if (EXPECTED_ORIENTATIONS == null || EXPECTED_ORIENTATIONS.isEmpty()) { - EXPECTED_ORIENTATIONS = Arrays.asList(SamPairUtil.PairOrientation.FR); + EXPECTED_ORIENTATIONS = Collections.singletonList(SamPairUtil.PairOrientation.FR); } final SamAlignmentMerger merger = new SamAlignmentMerger(UNMAPPED_BAM, OUTPUT, @@ -268,7 +272,7 @@ protected int doWork() { ATTRIBUTES_TO_RETAIN, ATTRIBUTES_TO_REMOVE, READ1_TRIM, READ2_TRIM, READ1_ALIGNED_BAM, READ2_ALIGNED_BAM, EXPECTED_ORIENTATIONS, SORT_ORDER, PRIMARY_ALIGNMENT_STRATEGY.newInstance(), ADD_MATE_CIGAR, UNMAP_CONTAMINANT_READS, - MIN_UNCLIPPED_BASES, UNMAPPED_READ_STRATEGY); + MIN_UNCLIPPED_BASES, UNMAPPED_READ_STRATEGY, MATCHING_DICTIONARY_TAGS); merger.setClipOverlappingReads(CLIP_OVERLAPPING_READS); merger.setMaxRecordsInRam(MAX_RECORDS_IN_RAM); merger.setKeepAlignerProperPairFlags(ALIGNER_PROPER_PAIR_FLAGS); @@ -311,10 +315,8 @@ protected int doWork() { if (ALIGNED_BAM == null || ALIGNED_BAM.isEmpty() && !(r1sExist && r2sExist)) { return new String[]{"Either ALIGNED_BAM or the combination of " + "READ1_ALIGNED_BAM and READ2_ALIGNED_BAM must be supplied."}; - } return null; } - } diff --git a/src/main/java/picard/sam/SamAlignmentMerger.java b/src/main/java/picard/sam/SamAlignmentMerger.java index 69278076b..0405d8559 100644 --- a/src/main/java/picard/sam/SamAlignmentMerger.java +++ b/src/main/java/picard/sam/SamAlignmentMerger.java @@ -1,25 +1,11 @@ package picard.sam; -import htsjdk.samtools.BAMRecordCodec; -import htsjdk.samtools.CigarElement; -import htsjdk.samtools.CigarOperator; -import htsjdk.samtools.MergingSamRecordIterator; -import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.*; import htsjdk.samtools.SAMFileHeader.SortOrder; -import htsjdk.samtools.SAMProgramRecord; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMRecordQueryNameComparator; -import htsjdk.samtools.SamFileHeaderMerger; -import htsjdk.samtools.SamPairUtil; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.filter.OverclippedReadFilter; -import htsjdk.samtools.util.CloseableIterator; -import htsjdk.samtools.util.DelegatingIterator; -import htsjdk.samtools.util.IOUtil; -import htsjdk.samtools.util.Log; -import htsjdk.samtools.util.PeekableIterator; -import htsjdk.samtools.util.SortingCollection; +import htsjdk.samtools.util.*; +import htsjdk.variant.utils.SAMSequenceDictionaryExtractor; +import picard.PicardException; import java.io.File; import java.util.ArrayList; @@ -45,6 +31,8 @@ private final int minUnclippedBases; private boolean forceSort = false; private final OverclippedReadFilter contaminationFilter; + private final List requiredMatchingDictionaryTags; + private SAMSequenceDictionary alignedSamDictionary; /** * Constructor with a default value for unmappingReadStrategy @@ -84,7 +72,9 @@ public SamAlignmentMerger(final File unmappedBamFile, final File targetBamFile, addMateCigar, unmapContaminantReads, minUnclippedBases, - UnmappingReadStrategy.DO_NOT_CHANGE); + UnmappingReadStrategy.DO_NOT_CHANGE, + SAMSequenceDictionary.DEFAULT_DICTIONARY_EQUAL_TAG + ); } /** @@ -131,6 +121,8 @@ public SamAlignmentMerger(final File unmappedBamFile, final File targetBamFile, * @param minUnclippedBases If unmapContaminantReads is set, require this many unclipped bases or else the read will be marked as contaminant. * @param unmappingReadStrategy An enum describing how to deal with reads whose mapping information are being removed (currently this happens due to cross-species * contamination). Ignored unless unmapContaminantReads is true. + * @param requiredMatchingDictionaryTags A list of SAMSequenceRecord tags that must be equal (if present) in the aligned bam and the reference dictionary. + * Program will issue a warning about other tags, if present in both files and are different. */ public SamAlignmentMerger(final File unmappedBamFile, final File targetBamFile, final File referenceFasta, final SAMProgramRecord programRecord, final boolean clipAdapters, final boolean bisulfiteSequence, @@ -145,7 +137,8 @@ public SamAlignmentMerger(final File unmappedBamFile, final File targetBamFile, final boolean addMateCigar, final boolean unmapContaminantReads, final int minUnclippedBases, - final UnmappingReadStrategy unmappingReadStrategy) { + final UnmappingReadStrategy unmappingReadStrategy, + final List requiredMatchingDictionaryTags) { super(unmappedBamFile, targetBamFile, referenceFasta, clipAdapters, bisulfiteSequence, alignedReadsOnly, programRecord, attributesToRetain, attributesToRemove, read1BasesTrimmed, @@ -172,11 +165,11 @@ public SamAlignmentMerger(final File unmappedBamFile, final File targetBamFile, this.maxGaps = maxGaps; this.minUnclippedBases = minUnclippedBases; this.contaminationFilter = new OverclippedReadFilter(minUnclippedBases, false); + this.requiredMatchingDictionaryTags = requiredMatchingDictionaryTags; log.info("Processing SAM file(s): " + ((alignedSamFile != null) ? alignedSamFile : (read1AlignedSamFile + "," + read2AlignedSamFile))); } - /** * Merges the alignment from the map file with the non-aligned records from the source BAM file. * Overrides mergeAlignment in AbstractAlignmentMerger. Tries first to proceed on the assumption @@ -194,6 +187,16 @@ public void mergeAlignment(final File referenceFasta) { } } + @Override + protected SAMSequenceDictionary getDictionaryForMergedBam() { + SAMSequenceDictionary referenceDict = SAMSequenceDictionaryExtractor.extractDictionary(referenceFasta); + if (referenceDict == null) { + throw new PicardException("No sequence dictionary found for " + referenceFasta.getAbsolutePath() + + ". Use Picard's CreateSequenceDictionary to create a sequence dictionary."); + } + return SAMSequenceDictionary.mergeDictionaries(alignedSamDictionary, referenceDict, requiredMatchingDictionaryTags); + } + /** * Reads the aligned SAM records into a SortingCollection and returns an iterator over that collection */ @@ -204,8 +207,8 @@ public void mergeAlignment(final File referenceFasta) { // When the alignment records, including both ends of a pair, are in SAM files if (alignedSamFile != null && !alignedSamFile.isEmpty()) { - final List headers = new ArrayList(alignedSamFile.size()); - final List readers = new ArrayList(alignedSamFile.size()); + final List headers = new ArrayList<>(alignedSamFile.size()); + final List readers = new ArrayList<>(alignedSamFile.size()); for (final File f : this.alignedSamFile) { final SamReader r = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).open(f); headers.add(r.getFileHeader()); @@ -218,6 +221,12 @@ public void mergeAlignment(final File referenceFasta) { } } + // assert that all the dictionaries are the same before merging the headers. + alignedSamDictionary = headers.get(0).getSequenceDictionary(); + headers.stream() + .map(SAMFileHeader::getSequenceDictionary) + .forEach(alignedSamDictionary::assertSameDictionary); + final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SortOrder.queryname, headers, false); mergingIterator = new MergingSamRecordIterator(headerMerger, readers, true); @@ -227,9 +236,12 @@ public void mergeAlignment(final File referenceFasta) { // When the ends are aligned separately and don't have firstOfPair information correctly // set we use this branch. else { + // this merging iterator already asserts that all the headers are the same mergingIterator = new SeparateEndAlignmentIterator(this.read1AlignedSamFile, this.read2AlignedSamFile, referenceFasta); header = ((SeparateEndAlignmentIterator) mergingIterator).getHeader(); + alignedSamDictionary = header.getSequenceDictionary(); + // As we're going through and opening the aligned files, if we don't have a @PG yet // and there is only a single one in the input file, use that! if (getProgramRecord() == null && header.getProgramRecords().size() == 1) { @@ -237,12 +249,10 @@ public void mergeAlignment(final File referenceFasta) { } } - if (!forceSort) { return mergingIterator; } - final SortingCollection alignmentSorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(header), new SAMRecordQueryNameComparator(), MAX_RECORDS_IN_RAM); @@ -308,9 +318,9 @@ public void remove() { private final SAMFileHeader header; public SeparateEndAlignmentIterator(final List read1Alignments, final List read2Alignments, File referenceFasta) { - final List headers = new ArrayList(); - final List read1 = new ArrayList(read1Alignments.size()); - final List read2 = new ArrayList(read2Alignments.size()); + final List headers = new ArrayList<>(); + final List read1 = new ArrayList<>(read1Alignments.size()); + final List read2 = new ArrayList<>(read2Alignments.size()); for (final File f : read1Alignments) { final SamReader r = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).open(f); headers.add(r.getFileHeader()); @@ -323,9 +333,9 @@ public SeparateEndAlignmentIterator(final List read1Alignments, final List } final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, headers, false); - read1Iterator = new PeekableIterator( + read1Iterator = new PeekableIterator<>( new SuffixTrimingSamRecordIterator(new MergingSamRecordIterator(headerMerger, read1, true), "/1")); - read2Iterator = new PeekableIterator( + read2Iterator = new PeekableIterator<>( new SuffixTrimingSamRecordIterator(new MergingSamRecordIterator(headerMerger, read2, true), "/2")); header = headerMerger.getMergedHeader(); diff --git a/src/test/java/picard/sam/MergeBamAlignmentTest.java b/src/test/java/picard/sam/MergeBamAlignmentTest.java index b0c22c997..2230923ec 100644 --- a/src/test/java/picard/sam/MergeBamAlignmentTest.java +++ b/src/test/java/picard/sam/MergeBamAlignmentTest.java @@ -26,6 +26,7 @@ import htsjdk.samtools.*; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.IOUtil; +import htsjdk.variant.utils.SAMSequenceDictionaryExtractor; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -34,13 +35,11 @@ import picard.sam.testers.ValidateSamTester; import java.io.File; +import java.io.FileDescriptor; +import java.io.FileInputStream; import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.lang.reflect.Field; +import java.util.*; /** * Test for the MergeBamAlignment class @@ -63,11 +62,11 @@ private static final File secondReadAlignedBam_firstHalf = new File(TEST_DATA_DIR, "firsthalf.read2.trimmed.aligned.sam"); private static final File secondReadAlignedBam_secondHalf = new File(TEST_DATA_DIR, "secondhalf.read2.trimmed.aligned.sam"); private static final File supplementalReadAlignedBam = new File(TEST_DATA_DIR, "aligned.supplement.sam"); - private static final File alignedQuerynameSortedBam = - new File("testdata/picard/sam/aligned_queryname_sorted.sam"); + private static final File alignedQuerynameSortedBam = new File("testdata/picard/sam/aligned_queryname_sorted.sam"); private static final File fasta = new File("testdata/picard/sam/merger.fasta"); private static final String bigSequenceName = "chr7"; // The longest sequence in merger.fasta private static final File sequenceDict = new File("testdata/picard/sam/merger.dict"); + private static final File sequenceDict2 = new File("testdata/picard/sam/merger.2.dict"); private static final File badorderUnmappedBam = new File(TEST_DATA_DIR, "unmapped.badorder.sam"); private static final File badorderAlignedBam = new File(TEST_DATA_DIR, "aligned.badorder.sam"); private static final File multipleStrandsAlignedBam = new File(TEST_DATA_DIR, "aligned.both.strands.sam"); @@ -280,7 +279,7 @@ public void testMergerFromMultipleFiles() throws Exception { // MIN_ADAPTER_BASES hanging off the end else if (sam.getReadName().equals("both_reads_align_min_adapter_bases_exceeded")) { Assert.assertEquals(sam.getReferenceName(), "chr7"); - Assert.assertTrue(sam.getCigarString().indexOf("S") == -1, + Assert.assertTrue(!sam.getCigarString().contains("S"), "Read was clipped when it should not be."); } else if (sam.getReadName().equals("neither_read_aligns_or_present")) { Assert.assertTrue(sam.getReadUnmappedFlag(), "Read should be unmapped but isn't"); @@ -306,10 +305,10 @@ public void testSortingOnSamAlignmentMerger(final File unmapped, final File alig final File target = File.createTempFile("target", "bam"); target.deleteOnExit(); final SamAlignmentMerger merger = new SamAlignmentMerger(unmapped, target, fasta, null, true, false, - false, Arrays.asList(aligned), 1, null, null, null, null, null, null, - Arrays.asList(SamPairUtil.PairOrientation.FR), + false, Collections.singletonList(aligned), 1, null, null, null, null, null, null, + Collections.singletonList(SamPairUtil.PairOrientation.FR), coordinateSorted ? SAMFileHeader.SortOrder.coordinate : SAMFileHeader.SortOrder.queryname, - new BestMapqPrimaryAlignmentSelectionStrategy(), false, false, 30, AbstractAlignmentMerger.UnmappingReadStrategy.DO_NOT_CHANGE); + new BestMapqPrimaryAlignmentSelectionStrategy(), false, false, 30); merger.mergeAlignment(Defaults.REFERENCE_FASTA); Assert.assertEquals(sorted, !merger.getForceSort()); @@ -954,7 +953,7 @@ public void testEarliestFragmentStrategyPaired() throws Exception { alignedSam.deleteOnExit(); // Populate the header with SAMSequenceRecords - header.getSequenceDictionary().addSequence(new SAMSequenceRecord("chr1", 1000000)); + header.setSequenceDictionary(SAMSequenceDictionaryExtractor.extractDictionary(sequenceDict2)); // Create 2 alignments for each end of pair final SAMFileWriter alignedWriter = factory.makeSAMWriter(header, false, alignedSam); @@ -1108,7 +1107,7 @@ public void testEarliestFragmentStrategy(final String testName, final MultipleAl final String sequence = "chr1"; // Populate the header with SAMSequenceRecords - header.getSequenceDictionary().addSequence(new SAMSequenceRecord(sequence, 1000000)); + header.setSequenceDictionary(SAMSequenceDictionaryExtractor.extractDictionary(sequenceDict2)); final SAMFileWriter alignedWriter = factory.makeSAMWriter(header, false, alignedSam); for (final MultipleAlignmentSpec spec : specs) { @@ -1233,7 +1232,7 @@ private void testBestFragmentMapqStrategy(final String testName, final int[] fir final String sequence = "chr1"; // Populate the header with SAMSequenceRecords - header.getSequenceDictionary().addSequence(new SAMSequenceRecord(sequence, 1000000)); + header.setSequenceDictionary(SAMSequenceDictionaryExtractor.extractDictionary(sequenceDict2)); final SAMFileWriter alignedWriter = factory.makeSAMWriter(header, false, alignedSam); @@ -1248,7 +1247,7 @@ private void testBestFragmentMapqStrategy(final String testName, final int[] fir false, true, false, 1, "0", "1.0", "align!", "myAligner", true, - new File(TEST_DATA_DIR, "cliptest.fasta"), output, + fasta, output, SamPairUtil.PairOrientation.FR, MergeBamAlignment.PrimaryAlignmentStrategy.BestEndMapq, null, includeSecondary, null, null); @@ -1287,7 +1286,7 @@ private void testBestFragmentMapqStrategy(final String testName, final int[] fir Assert.assertEquals(numSecondRecords, Math.max(1, secondMapQs.length)); } } - + private void doMergeAlignment(final File unmappedBam, final List alignedBams, final List read1AlignedBams, final List read2AlignedBams, final Integer read1Trim, final Integer read2Trim, final boolean alignReadsOnly, final boolean clipAdapters, final boolean isBisulfiteSequence, final int maxInsOrDels, @@ -1749,15 +1748,14 @@ public void testRemoveNmMdAndUqOnOverlappingReads() throws IOException { if (hasTags) { Assert.assertNull(rec.getAttribute("MD")); Assert.assertNull(rec.getAttribute("NM")); - } - else { + } else { Assert.assertNotNull(rec.getAttribute("MD")); Assert.assertNotNull(rec.getAttribute("NM")); } } result.close(); } - + @Test public void testMappedToMultipleStrands() throws Exception { final File outputMappedToMultipleStands = File.createTempFile("mappedToMultipleStrands", ".sam"); @@ -1799,4 +1797,114 @@ public void testMappedToMultipleStrands() throws Exception { } } } + + @Test + public void testMergeHeaderMappedAndReference() throws IOException { + final File unmappedSam = new File(TEST_DATA_DIR, "specialHeader.unmapped.sam"); + final File alignedSam = new File(TEST_DATA_DIR, "specialHeader.aligned.sam"); + final File expectedSam = new File(TEST_DATA_DIR, "specialHeader.expected.sam"); + final File refFasta = new File(TEST_DATA_DIR, "specialHeader.fasta"); + final File mergedSam = File.createTempFile("merged", ".sam"); + mergedSam.deleteOnExit(); + + doMergeAlignment(unmappedSam, Collections.singletonList(alignedSam), + null, null, null, null, + false, true, false, 1, + "0", "1.0", "align!", "myAligner", + true, refFasta, mergedSam, + null, null, null, null, true, null); + + assertSamValid(mergedSam); + IOUtil.assertFilesEqual(expectedSam, mergedSam); + } + + @Test + public void testMergeHeaderMappedAndReferenceWithAlignedAsNamedStream() throws IOException, InterruptedException, NoSuchFieldException, IllegalAccessException { + final File unmappedSam = new File(TEST_DATA_DIR, "specialHeader.unmapped.sam"); + final File alignedSam = new File(TEST_DATA_DIR, "specialHeader.aligned.sam"); + final File expectedSam = new File(TEST_DATA_DIR, "specialHeader.expected.sam"); + final File refFasta = new File(TEST_DATA_DIR, "specialHeader.fasta"); + final File mergedSam = File.createTempFile("merged", ".sam"); + mergedSam.deleteOnExit(); + + FileInputStream stream = new FileInputStream(alignedSam); + + // Ugliness required to read from a stream given as a string on the commandline. + // Since the actual fd number is private inside FileDescriptor, need reflection + // in order to pull it out. + + final Field fdField = FileDescriptor.class.getDeclaredField("fd"); + fdField.setAccessible(true); + final File inputStream = new File("/dev/fd/" + fdField.getInt(stream.getFD())); + + doMergeAlignment(unmappedSam, Collections.singletonList(inputStream), + null, null, null, null, + false, true, false, 1, + "0", "1.0", "align!", "myAligner", + true, refFasta, mergedSam, + null, null, null, null, true, null); + + assertSamValid(mergedSam); + IOUtil.assertFilesEqual(expectedSam, mergedSam); + } + + @Test + public void testMergeHeaderMappedAndReferenceWithUnmappedAsNamedStream() throws IOException, InterruptedException, NoSuchFieldException, IllegalAccessException { + final File unmappedSam = new File(TEST_DATA_DIR, "specialHeader.unmapped.sam"); + final File alignedSam = new File(TEST_DATA_DIR, "specialHeader.aligned.sam"); + final File expectedSam = new File(TEST_DATA_DIR, "specialHeader.expected.sam"); + final File refFasta = new File(TEST_DATA_DIR, "specialHeader.fasta"); + final File mergedSam = File.createTempFile("merged", ".sam"); + mergedSam.deleteOnExit(); + + FileInputStream stream = new FileInputStream(unmappedSam); + + // Ugliness required to read from a stream given as a string on the commandline. + // Since the actual fd number is private inside FileDescriptor, need reflection + // in order to pull it out. + + final Field fdField = FileDescriptor.class.getDeclaredField("fd"); + fdField.setAccessible(true); + final File inputStream = new File("/dev/fd/" + fdField.getInt(stream.getFD())); + + doMergeAlignment(inputStream, Collections.singletonList(alignedSam), + null, null, null, null, + false, true, false, 1, + "0", "1.0", "align!", "myAligner", + true, refFasta, mergedSam, + null, null, null, null, true, null); + + assertSamValid(mergedSam); + IOUtil.assertFilesEqual(expectedSam, mergedSam); + } + + + @DataProvider(name = "brokenAlignedFiles") + Object[][] brokenAlignedFiles() { + return new Object[][]{ + new Object[]{"specialHeader.aligned.breaks.length.sam"}, + new Object[]{"specialHeader.aligned.breaks.md5.sam"} + }; + } + + @Test(dataProvider = "brokenAlignedFiles", expectedExceptions = IllegalArgumentException.class) + public void testHeaderFromMappedBreaks(final String filename) throws IOException { + final File unmappedSam = new File(TEST_DATA_DIR, "specialHeader.unmapped.sam"); + final File alignedSam = new File(TEST_DATA_DIR, filename); + final File expectedSam = new File(TEST_DATA_DIR, "specialHeader.expected.sam"); + final File refFasta = new File(TEST_DATA_DIR, "specialHeader.fasta"); + final File mergedSam = File.createTempFile("merged", ".sam"); + mergedSam.deleteOnExit(); + + doMergeAlignment(unmappedSam, Collections.singletonList(alignedSam), + null, null, null, null, + false, true, false, 1, + "0", "1.0", "align!", "myAligner", + true, refFasta, mergedSam, + null, null, null, null, true, null); + + assertSamValid(mergedSam); + IOUtil.assertFilesEqual(expectedSam, mergedSam); + } } + diff --git a/testdata/picard/sam/MergeBamAlignment/contam.expected.COPY_TO_TAG.sam b/testdata/picard/sam/MergeBamAlignment/contam.expected.COPY_TO_TAG.sam index 76ef1af05..4dca588a6 100644 --- a/testdata/picard/sam/MergeBamAlignment/contam.expected.COPY_TO_TAG.sam +++ b/testdata/picard/sam/MergeBamAlignment/contam.expected.COPY_TO_TAG.sam @@ -1,5 +1,5 @@ @HD VN:1.5 SO:coordinate -@SQ SN:chr1 LN:1000 UR:file:testdata/net/sf/picard/sam/MergeBamAlignment/cliptest.fasta M5:17522ddd273279f4595f50fea9864734 +@SQ SN:chr1 LN:1000 M5:17522ddd273279f4595f50fea9864734 UR:file:testdata/net/sf/picard/sam/MergeBamAlignment/cliptest.fasta @RG ID:0 SM:Hi,Mom! PL:ILLUMINA @PG ID:0 VN:1.0 CL:align! PN:myAligner frag_multiple_primary_1 4 chr1 1 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,null; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination diff --git a/testdata/picard/sam/MergeBamAlignment/contam.expected.MOVE_TO_TAG.sam b/testdata/picard/sam/MergeBamAlignment/contam.expected.MOVE_TO_TAG.sam index af6f9e6c8..40c06f420 100644 --- a/testdata/picard/sam/MergeBamAlignment/contam.expected.MOVE_TO_TAG.sam +++ b/testdata/picard/sam/MergeBamAlignment/contam.expected.MOVE_TO_TAG.sam @@ -1,5 +1,5 @@ @HD VN:1.5 SO:coordinate -@SQ SN:chr1 LN:1000 UR:file:testdata/net/sf/picard/sam/MergeBamAlignment/cliptest.fasta M5:17522ddd273279f4595f50fea9864734 +@SQ SN:chr1 LN:1000 M5:17522ddd273279f4595f50fea9864734 UR:file:testdata/net/sf/picard/sam/MergeBamAlignment/cliptest.fasta @RG ID:0 SM:Hi,Mom! PL:ILLUMINA @PG ID:0 VN:1.0 CL:align! PN:myAligner frag_multiple_primary_2 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:50 PG:Z:0 RG:Z:0 NM:i:0 UQ:i:0 diff --git a/testdata/picard/sam/MergeBamAlignment/contam.expected.NO_CHANGE.sam b/testdata/picard/sam/MergeBamAlignment/contam.expected.NO_CHANGE.sam index 2a3352fc2..5bead4783 100644 --- a/testdata/picard/sam/MergeBamAlignment/contam.expected.NO_CHANGE.sam +++ b/testdata/picard/sam/MergeBamAlignment/contam.expected.NO_CHANGE.sam @@ -1,5 +1,5 @@ @HD VN:1.5 SO:coordinate -@SQ SN:chr1 LN:1000 UR:file:testdata/net/sf/picard/sam/MergeBamAlignment/cliptest.fasta M5:17522ddd273279f4595f50fea9864734 +@SQ SN:chr1 LN:1000 M5:17522ddd273279f4595f50fea9864734 UR:file:testdata/net/sf/picard/sam/MergeBamAlignment/cliptest.fasta @RG ID:0 SM:Hi,Mom! PL:ILLUMINA @PG ID:0 VN:1.0 CL:align! PN:myAligner frag_multiple_primary_1 4 chr1 1 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination diff --git a/testdata/picard/sam/MergeBamAlignment/specialHeader.aligned.breaks.length.sam b/testdata/picard/sam/MergeBamAlignment/specialHeader.aligned.breaks.length.sam new file mode 100644 index 000000000..5b5276231 --- /dev/null +++ b/testdata/picard/sam/MergeBamAlignment/specialHeader.aligned.breaks.length.sam @@ -0,0 +1,26 @@ +@HD VN:1.0 SO:queryname +@SQ SN:chr1 LN:1000 M5:17522ddd273279f4595f50fea9864734 UR:file:testdata/picard/sam/MergeBamAlignment/specialHeaderTest.fasta +@SQ SN:chr1_alt AH:* LN:201 M5:8e0c728a0fb8a73feb55f9a447f4b144 UR:file:testdata/picard/sam/MergeBamAlignment/specialHeaderTest.fasta +@RG ID:0 SM:Hi,Mom! PL:ILLUMINA +@CO frag_multiple_primary_1 should be marked contaminant because the overclipped alignment has higher MAPQ, and the other alignment should be omitted +@CO frag_multiple_primary_2 should NOT be marked contaminant because the good alignment has higher MAPQ, and the overclipped alignment should be marked as secondary +@CO frag_primary_clipped should be marked contaminant because primary alignment is overclipped, and the secondary / supplementary should be omitted +@CO frag_secondary_clipped should NOT be marked contaminant because only secondary is overclipped, and will be preserved as-is +@CO r1_clipped_r2_clipped should be marked contaminant because at least one segment is overclipped +@CO r1_clipped_r2_perfect should be marked contaminant because at least one segment is overclipped +@CO r1_clipped_r2_unmapped should be marked contaminant because at least one segment is overclipped +frag_multiple_primary_1 0 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_multiple_primary_1 0 chr1 1 15 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_multiple_primary_2 0 chr1 1 15 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_multiple_primary_2 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_primary_clipped 0 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_primary_clipped 256 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_primary_clipped 2048 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_secondary_clipped 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_secondary_clipped 256 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_clipped 97 chr1 1 30 20S10M20S chr1 51 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_clipped 145 chr1 51 30 20S10M20S chr1 1 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_perfect 97 chr1 1 30 20S10M20S chr1 51 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_perfect 145 chr1 51 30 50M chr1 1 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_unmapped 73 chr1 1 30 20S10M20S chr1 51 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_unmapped 133 chr1 51 0 * chr1 1 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 diff --git a/testdata/picard/sam/MergeBamAlignment/specialHeader.aligned.breaks.md5.sam b/testdata/picard/sam/MergeBamAlignment/specialHeader.aligned.breaks.md5.sam new file mode 100644 index 000000000..f237b692e --- /dev/null +++ b/testdata/picard/sam/MergeBamAlignment/specialHeader.aligned.breaks.md5.sam @@ -0,0 +1,26 @@ +@HD VN:1.0 SO:queryname +@SQ SN:chr1 LN:1000 M5:17522ddd273279f4595f50fea9864734 UR:file:testdata/picard/sam/MergeBamAlignment/specialHeaderTest.fasta +@SQ SN:chr1_alt AH:* LN:200 M5:dummy! UR:file:testdata/picard/sam/MergeBamAlignment/specialHeaderTest.fasta +@RG ID:0 SM:Hi,Mom! PL:ILLUMINA +@CO frag_multiple_primary_1 should be marked contaminant because the overclipped alignment has higher MAPQ, and the other alignment should be omitted +@CO frag_multiple_primary_2 should NOT be marked contaminant because the good alignment has higher MAPQ, and the overclipped alignment should be marked as secondary +@CO frag_primary_clipped should be marked contaminant because primary alignment is overclipped, and the secondary / supplementary should be omitted +@CO frag_secondary_clipped should NOT be marked contaminant because only secondary is overclipped, and will be preserved as-is +@CO r1_clipped_r2_clipped should be marked contaminant because at least one segment is overclipped +@CO r1_clipped_r2_perfect should be marked contaminant because at least one segment is overclipped +@CO r1_clipped_r2_unmapped should be marked contaminant because at least one segment is overclipped +frag_multiple_primary_1 0 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_multiple_primary_1 0 chr1 1 15 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_multiple_primary_2 0 chr1 1 15 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_multiple_primary_2 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_primary_clipped 0 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_primary_clipped 256 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_primary_clipped 2048 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_secondary_clipped 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_secondary_clipped 256 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_clipped 97 chr1 1 30 20S10M20S chr1 51 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_clipped 145 chr1 51 30 20S10M20S chr1 1 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_perfect 97 chr1 1 30 20S10M20S chr1 51 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_perfect 145 chr1 51 30 50M chr1 1 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_unmapped 73 chr1 1 30 20S10M20S chr1 51 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_unmapped 133 chr1 51 0 * chr1 1 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 diff --git a/testdata/picard/sam/MergeBamAlignment/specialHeader.aligned.sam b/testdata/picard/sam/MergeBamAlignment/specialHeader.aligned.sam new file mode 100644 index 000000000..2325869aa --- /dev/null +++ b/testdata/picard/sam/MergeBamAlignment/specialHeader.aligned.sam @@ -0,0 +1,26 @@ +@HD VN:1.0 SO:queryname +@SQ SN:chr1 LN:1000 M5:17522ddd273279f4595f50fea9864734 UR:file:testdata/picard/sam/MergeBamAlignment/specialHeaderTest.fasta +@SQ SN:chr1_alt AH:* LN:200 M5:8e0c728a0fb8a73feb55f9a447f4b144 UR:file:testdata/picard/sam/MergeBamAlignment/specialHeaderTest.fasta +@RG ID:0 SM:Hi,Mom! PL:ILLUMINA +@CO frag_multiple_primary_1 should be marked contaminant because the overclipped alignment has higher MAPQ, and the other alignment should be omitted +@CO frag_multiple_primary_2 should NOT be marked contaminant because the good alignment has higher MAPQ, and the overclipped alignment should be marked as secondary +@CO frag_primary_clipped should be marked contaminant because primary alignment is overclipped, and the secondary / supplementary should be omitted +@CO frag_secondary_clipped should NOT be marked contaminant because only secondary is overclipped, and will be preserved as-is +@CO r1_clipped_r2_clipped should be marked contaminant because at least one segment is overclipped +@CO r1_clipped_r2_perfect should be marked contaminant because at least one segment is overclipped +@CO r1_clipped_r2_unmapped should be marked contaminant because at least one segment is overclipped +frag_multiple_primary_1 0 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_multiple_primary_1 0 chr1 1 15 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_multiple_primary_2 0 chr1 1 15 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_multiple_primary_2 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_primary_clipped 0 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_primary_clipped 256 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_primary_clipped 2048 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_secondary_clipped 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_secondary_clipped 256 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_clipped 97 chr1 1 30 20S10M20S chr1 51 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_clipped 145 chr1 51 30 20S10M20S chr1 1 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_perfect 97 chr1 1 30 20S10M20S chr1 51 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_perfect 145 chr1 51 30 50M chr1 1 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_unmapped 73 chr1 1 30 20S10M20S chr1 51 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_unmapped 133 chr1 51 0 * chr1 1 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 diff --git a/testdata/picard/sam/MergeBamAlignment/specialHeader.dict b/testdata/picard/sam/MergeBamAlignment/specialHeader.dict new file mode 100644 index 000000000..bc68c964a --- /dev/null +++ b/testdata/picard/sam/MergeBamAlignment/specialHeader.dict @@ -0,0 +1,3 @@ +@HD VN:1.5 SO:unsorted +@SQ SN:chr1 LN:1000 M5:17522ddd273279f4595f50fea9864734 UR:file:testdata/picard/sam/MergeBamAlignment/specialHeaderTest.fasta +@SQ SN:chr1_alt LN:200 M5:8e0c728a0fb8a73feb55f9a447f4b144 UR:file:testdata/picard/sam/MergeBamAlignment/specialHeaderTest.fasta diff --git a/testdata/picard/sam/MergeBamAlignment/specialHeader.expected.sam b/testdata/picard/sam/MergeBamAlignment/specialHeader.expected.sam new file mode 100644 index 000000000..e5344ec88 --- /dev/null +++ b/testdata/picard/sam/MergeBamAlignment/specialHeader.expected.sam @@ -0,0 +1,17 @@ +@HD VN:1.5 SO:coordinate +@SQ SN:chr1 LN:1000 M5:17522ddd273279f4595f50fea9864734 UR:file:testdata/picard/sam/MergeBamAlignment/specialHeaderTest.fasta +@SQ SN:chr1_alt LN:200 AH:* M5:8e0c728a0fb8a73feb55f9a447f4b144 UR:file:testdata/picard/sam/MergeBamAlignment/specialHeaderTest.fasta +@RG ID:0 SM:Hi,Mom! PL:ILLUMINA +@PG ID:0 VN:1.0 CL:align! PN:myAligner +frag_multiple_primary_1 4 chr1 1 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +frag_multiple_primary_2 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:50 PG:Z:0 RG:Z:0 NM:i:0 UQ:i:0 +frag_multiple_primary_2 256 chr1 1 15 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:0T0T0C0A0T1C0T0G1 PG:Z:0 RG:Z:0 NM:i:8 UQ:i:240 +frag_primary_clipped 4 chr1 1 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +frag_secondary_clipped 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:50 PG:Z:0 RG:Z:0 NM:i:0 UQ:i:0 +frag_secondary_clipped 256 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:0T0T0C0A0T1C0T0G1 PG:Z:0 RG:Z:0 NM:i:8 UQ:i:240 +r1_clipped_r2_clipped 109 * 0 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +r1_clipped_r2_perfect 109 * 0 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +r1_clipped_r2_unmapped 77 * 0 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +r1_clipped_r2_unmapped 141 * 0 0 * * 0 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 +r1_clipped_r2_clipped 157 * 0 0 20S10M20S * 0 0 TGGAGTGTTAACGTACTCTATTATTGTATTGTTTTTTTTTTGCCCTTAAA ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination +r1_clipped_r2_perfect 157 * 0 0 50M * 0 0 TGGAGTGTTAACGTACTCTATTATTGTATTGTTTTTTTTTTGCCCTTAAA ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination diff --git a/testdata/picard/sam/MergeBamAlignment/specialHeader.fasta b/testdata/picard/sam/MergeBamAlignment/specialHeader.fasta new file mode 100644 index 000000000..8694bfccf --- /dev/null +++ b/testdata/picard/sam/MergeBamAlignment/specialHeader.fasta @@ -0,0 +1,26 @@ +>chr1 +TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC +TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA +TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC +TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA +TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC +TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA +TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC +TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA +TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC +TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA +TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC +TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA +TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC +TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA +TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC +TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA +TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC +TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA +TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC +TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA +>chr1_alt +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC +GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT \ No newline at end of file diff --git a/testdata/picard/sam/MergeBamAlignment/specialHeader.unmapped.sam b/testdata/picard/sam/MergeBamAlignment/specialHeader.unmapped.sam new file mode 100644 index 000000000..ceb5f1917 --- /dev/null +++ b/testdata/picard/sam/MergeBamAlignment/specialHeader.unmapped.sam @@ -0,0 +1,12 @@ +@HD VN:1.0 SO:queryname +@RG ID:0 SM:Hi,Mom! PL:ILLUMINA +frag_multiple_primary_1 4 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_multiple_primary_2 4 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_primary_clipped 4 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +frag_secondary_clipped 4 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_clipped 77 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_clipped 141 * 0 0 * * 0 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_perfect 77 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_perfect 141 * 0 0 * * 0 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_unmapped 77 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 +r1_clipped_r2_unmapped 141 * 0 0 * * 0 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 diff --git a/testdata/picard/sam/merger.2.dict b/testdata/picard/sam/merger.2.dict new file mode 100644 index 000000000..2d46e4cfc --- /dev/null +++ b/testdata/picard/sam/merger.2.dict @@ -0,0 +1,9 @@ +@HD VN:1.0 SO:unsorted +@SQ SN:chr1 LN:101 UR:merger.fasta AH:* +@SQ SN:chr2 LN:101 UR:merger.fasta AH:* +@SQ SN:chr3 LN:101 UR:merger.fasta AH:* +@SQ SN:chr4 LN:101 UR:merger.fasta AH:* +@SQ SN:chr5 LN:101 UR:merger.fasta AH:* +@SQ SN:chr6 LN:101 UR:merger.fasta M5:7be2f5e7ee39e60a6c3b5b6a41178c6d +@SQ SN:chr7 LN:404 UR:merger.fasta M5:da488fc432cdaf2c20c96da473a7b630 +@SQ SN:chr8 LN:202 UR:merger.fasta M5:d339678efce576d5546e88b49a487b63 diff --git a/testdata/picard/sam/merger.dict b/testdata/picard/sam/merger.dict index 797fb6fe2..30fe241ff 100644 --- a/testdata/picard/sam/merger.dict +++ b/testdata/picard/sam/merger.dict @@ -1,9 +1,9 @@ @HD VN:1.0 SO:unsorted -@SQ SN:chr1 LN:101 UR:merger.fasta M5:bd01f7e11515bb6beda8f7257902aa67 -@SQ SN:chr2 LN:101 UR:merger.fasta M5:31c33e2155b3de5e2554b693c475b310 -@SQ SN:chr3 LN:101 UR:merger.fasta M5:631593c6dd2048ae88dcce2bd505d295 -@SQ SN:chr4 LN:101 UR:merger.fasta M5:c60cb92f1ee5b78053c92bdbfa19abf1 -@SQ SN:chr5 LN:101 UR:merger.fasta M5:07ebc213c7611db0eacbb1590c3e9bda -@SQ SN:chr6 LN:101 UR:merger.fasta M5:7be2f5e7ee39e60a6c3b5b6a41178c6d -@SQ SN:chr7 LN:404 UR:merger.fasta M5:da488fc432cdaf2c20c96da473a7b630 -@SQ SN:chr8 LN:202 UR:merger.fasta M5:d339678efce576d5546e88b49a487b63 +@SQ SN:chr1 LN:101 UR:file:testdata/net/sf/picard/sam/merger.fasta M5:bd01f7e11515bb6beda8f7257902aa67 +@SQ SN:chr2 LN:101 UR:file:testdata/net/sf/picard/sam/merger.fasta M5:31c33e2155b3de5e2554b693c475b310 +@SQ SN:chr3 LN:101 UR:file:testdata/net/sf/picard/sam/merger.fasta M5:631593c6dd2048ae88dcce2bd505d295 +@SQ SN:chr4 LN:101 UR:file:testdata/net/sf/picard/sam/merger.fasta M5:c60cb92f1ee5b78053c92bdbfa19abf1 +@SQ SN:chr5 LN:101 UR:file:testdata/net/sf/picard/sam/merger.fasta M5:07ebc213c7611db0eacbb1590c3e9bda +@SQ SN:chr6 LN:101 UR:file:testdata/net/sf/picard/sam/merger.fasta M5:7be2f5e7ee39e60a6c3b5b6a41178c6d +@SQ SN:chr7 LN:404 UR:file:testdata/net/sf/picard/sam/merger.fasta M5:da488fc432cdaf2c20c96da473a7b630 +@SQ SN:chr8 LN:202 UR:file:testdata/net/sf/picard/sam/merger.fasta M5:d339678efce576d5546e88b49a487b63