From 5273b57595fa9ee57f5e08ef7640b87929e63b1f Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Wed, 18 Jan 2017 20:11:37 -0500 Subject: [PATCH] Revert "Fix sorting of lifted intervals in LiftOverIntervalList (#726) (#682)" This reverts commit afb4af37be4a38eb94b50a09ed6bf275de151472. --- .../java/picard/sam/AbstractAlignmentMerger.java | 63 ++++++++++++++---- src/main/java/picard/sam/MergeBamAlignment.java | 13 ++-- src/main/java/picard/sam/SamAlignmentMerger.java | 70 ++++++++------------ .../java/picard/sam/MergeBamAlignmentTest.java | 77 ++++------------------ .../contam.expected.COPY_TO_TAG.sam | 2 +- .../contam.expected.MOVE_TO_TAG.sam | 2 +- .../contam.expected.NO_CHANGE.sam | 2 +- .../specialHeader.aligned.breaks.length.sam | 26 -------- .../specialHeader.aligned.breaks.md5.sam | 26 -------- .../MergeBamAlignment/specialHeader.aligned.sam | 26 -------- .../sam/MergeBamAlignment/specialHeader.dict | 3 - .../MergeBamAlignment/specialHeader.expected.sam | 17 ----- .../sam/MergeBamAlignment/specialHeader.fasta | 26 -------- .../MergeBamAlignment/specialHeader.unmapped.sam | 12 ---- testdata/picard/sam/merger.2.dict | 9 --- testdata/picard/sam/merger.dict | 16 ++--- 16 files changed, 108 insertions(+), 282 deletions(-) delete mode 100644 testdata/picard/sam/MergeBamAlignment/specialHeader.aligned.breaks.length.sam delete mode 100644 testdata/picard/sam/MergeBamAlignment/specialHeader.aligned.breaks.md5.sam delete mode 100644 testdata/picard/sam/MergeBamAlignment/specialHeader.aligned.sam delete mode 100644 testdata/picard/sam/MergeBamAlignment/specialHeader.dict delete mode 100644 testdata/picard/sam/MergeBamAlignment/specialHeader.expected.sam delete mode 100644 testdata/picard/sam/MergeBamAlignment/specialHeader.fasta delete mode 100644 testdata/picard/sam/MergeBamAlignment/specialHeader.unmapped.sam delete mode 100644 testdata/picard/sam/merger.2.dict diff --git a/src/main/java/picard/sam/AbstractAlignmentMerger.java b/src/main/java/picard/sam/AbstractAlignmentMerger.java index c4240714b..7475b88fa 100644 --- a/src/main/java/picard/sam/AbstractAlignmentMerger.java +++ b/src/main/java/picard/sam/AbstractAlignmentMerger.java @@ -23,12 +23,37 @@ */ package picard.sam; -import htsjdk.samtools.*; +import htsjdk.samtools.BAMRecordCodec; +import htsjdk.samtools.Cigar; +import htsjdk.samtools.CigarElement; +import htsjdk.samtools.CigarOperator; +import htsjdk.samtools.ReservedTagConstants; +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMFileHeader.SortOrder; +import htsjdk.samtools.SAMFileWriter; +import htsjdk.samtools.SAMFileWriterFactory; +import htsjdk.samtools.SAMProgramRecord; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMRecordCoordinateComparator; +import htsjdk.samtools.SAMRecordQueryNameComparator; +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.SAMTag; +import htsjdk.samtools.SAMUtils; +import htsjdk.samtools.SamPairUtil; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.filter.FilteringSamIterator; import htsjdk.samtools.filter.SamRecordFilter; import htsjdk.samtools.reference.ReferenceSequenceFileWalker; -import htsjdk.samtools.SAMFileHeader.SortOrder; -import htsjdk.samtools.util.*; +import htsjdk.samtools.util.CigarUtil; +import htsjdk.samtools.util.CloseableIterator; +import htsjdk.samtools.util.CloserUtil; +import htsjdk.samtools.util.IOUtil; +import htsjdk.samtools.util.Log; +import htsjdk.samtools.util.ProgressLogger; +import htsjdk.samtools.util.SequenceUtil; +import htsjdk.samtools.util.SortingCollection; import picard.PicardException; import java.io.File; @@ -67,14 +92,15 @@ private final File unmappedBamFile; private final File targetBamFile; + private final SAMSequenceDictionary sequenceDictionary; private ReferenceSequenceFileWalker refSeq = null; private final boolean clipAdapters; private final boolean bisulfiteSequence; private SAMProgramRecord programRecord; private final boolean alignedReadsOnly; private final SAMFileHeader header; - private final List attributesToRetain = new ArrayList<>(); - private final List attributesToRemove = new ArrayList<>(); + private final List attributesToRetain = new ArrayList(); + private final List attributesToRemove = new ArrayList(); private Set attributesToReverse = new TreeSet<>(SAMRecord.TAGS_TO_REVERSE); private Set attributesToReverseComplement = new TreeSet<>(SAMRecord.TAGS_TO_REVERSE_COMPLEMENT); protected final File referenceFasta; @@ -157,7 +183,6 @@ public boolean isPopulatePaTag() { return populatePATag; } } - protected abstract SAMSequenceDictionary getDictionaryForMergedBam(); protected abstract CloseableIterator getQuerynameSortedAlignedRecords(); @@ -178,7 +203,7 @@ public AbstractAlignmentMerger(final File unmappedBamFile, final File targetBamF final List attributesToRemove, final Integer read1BasesTrimmed, final Integer read2BasesTrimmed, final List expectedOrientations, - final SortOrder sortOrder, + final SAMFileHeader.SortOrder sortOrder, final PrimaryAlignmentSelectionStrategy primaryAlignmentSelectionStrategy, final boolean addMateCigar, final boolean unmapContaminantReads) { @@ -243,7 +268,7 @@ public AbstractAlignmentMerger(final File unmappedBamFile, final File targetBamF final List attributesToRemove, final Integer read1BasesTrimmed, final Integer read2BasesTrimmed, final List expectedOrientations, - final SortOrder sortOrder, + final SAMFileHeader.SortOrder sortOrder, final PrimaryAlignmentSelectionStrategy primaryAlignmentSelectionStrategy, final boolean addMateCigar, final boolean unmapContaminantReads, @@ -257,6 +282,11 @@ public AbstractAlignmentMerger(final File unmappedBamFile, final File targetBamF this.referenceFasta = referenceFasta; this.refSeq = new ReferenceSequenceFileWalker(referenceFasta); + this.sequenceDictionary = refSeq.getSequenceDictionary(); + if (this.sequenceDictionary == null) { + throw new PicardException("No sequence dictionary found for " + referenceFasta.getAbsolutePath() + + ". Use CreateSequenceDictionary.jar to create a sequence dictionary."); + } this.clipAdapters = clipAdapters; this.bisulfiteSequence = bisulfiteSequence; @@ -268,7 +298,7 @@ public AbstractAlignmentMerger(final File unmappedBamFile, final File targetBamF if (programRecord != null) { setProgramRecord(programRecord); } - + header.setSequenceDictionary(this.sequenceDictionary); if (attributesToRetain != null) { this.attributesToRetain.addAll(attributesToRetain); } @@ -276,10 +306,12 @@ public AbstractAlignmentMerger(final File unmappedBamFile, final File targetBamF this.attributesToRemove.addAll(attributesToRemove); // attributesToRemove overrides attributesToRetain if (!this.attributesToRetain.isEmpty()) { - this.attributesToRemove.stream() - .filter(this.attributesToRetain::contains) - .peek(a->log.info("Overriding retaining the " + a + " tag since 'remove' overrides 'retain'.")) - .forEach(this.attributesToRetain::remove); + for (String attribute : this.attributesToRemove) { + if (this.attributesToRetain.contains(attribute)) { + log.info("Overriding retaining the " + attribute + " tag since remove overrides retain."); + this.attributesToRetain.remove(attribute); + } + } } } this.read1BasesTrimmed = read1BasesTrimmed; @@ -333,7 +365,6 @@ public void mergeAlignment(final File referenceFasta) { final SamReader unmappedSam = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).open(this.unmappedBamFile); final CloseableIterator unmappedIterator = unmappedSam.iterator(); - this.header.setSequenceDictionary(getDictionaryForMergedBam()); this.header.setReadGroups(unmappedSam.getFileHeader().getReadGroups()); int aligned = 0; @@ -590,6 +621,8 @@ private HitsForInsert nextAligned() { return null; } + private int numCrossSpeciesContaminantWarnings = 0; + /** * Copies alignment info from aligned to unaligned read, clips as appropriate, and sets PG ID. * May also un-map the resulting read if the alignment is bad (e.g. no unclipped bases). @@ -847,6 +880,8 @@ protected void updateCigarForTrimmedOrClippedBases(final SAMRecord rec, final SA } } + protected SAMSequenceDictionary getSequenceDictionary() { return this.sequenceDictionary; } + protected SAMProgramRecord getProgramRecord() { return this.programRecord; } protected void setProgramRecord(final SAMProgramRecord pg) { diff --git a/src/main/java/picard/sam/MergeBamAlignment.java b/src/main/java/picard/sam/MergeBamAlignment.java index 87a69bac6..c1011baf2 100644 --- a/src/main/java/picard/sam/MergeBamAlignment.java +++ b/src/main/java/picard/sam/MergeBamAlignment.java @@ -26,7 +26,6 @@ import htsjdk.samtools.SAMFileHeader.SortOrder; import htsjdk.samtools.SAMProgramRecord; import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SamPairUtil; import htsjdk.samtools.util.Log; import picard.PicardException; @@ -39,7 +38,6 @@ import java.io.File; import java.util.*; - /** * A command-line tool to merge BAM/SAM alignment info from a third-party aligner with the data in an * unmapped BAM file, producing a third BAM file that has alignment data and all the additional data @@ -213,9 +211,6 @@ @Option(doc = "If UNMAP_CONTAMINANT_READS is set, require this many unclipped bases or else the read will be marked as contaminant.") public int MIN_UNCLIPPED_BASES = 32; - @Option(doc = "List of Sequence Records tags that must be equal (if present) in the reference dictionary and in the aligned file. Mismatching tags will cause an error if in this list, and a warning otherwise.") - public List MATCHING_DICTIONARY_TAGS = SAMSequenceDictionary.DEFAULT_DICTIONARY_EQUAL_TAG; - @Option(doc = "How to deal with alignment information in reads that are being unmapped (e.g. due to cross-species contamination.) Currently ignored unless UNMAP_CONTAMINANT_READS = true", optional = true) public AbstractAlignmentMerger.UnmappingReadStrategy UNMAPPED_READ_STRATEGY = AbstractAlignmentMerger.UnmappingReadStrategy.DO_NOT_CHANGE; @@ -262,9 +257,9 @@ protected int doWork() { } // TEMPORARY FIX until internal programs all specify EXPECTED_ORIENTATIONS if (JUMP_SIZE != null) { - EXPECTED_ORIENTATIONS = Collections.singletonList(SamPairUtil.PairOrientation.RF); + EXPECTED_ORIENTATIONS = Arrays.asList(SamPairUtil.PairOrientation.RF); } else if (EXPECTED_ORIENTATIONS == null || EXPECTED_ORIENTATIONS.isEmpty()) { - EXPECTED_ORIENTATIONS = Collections.singletonList(SamPairUtil.PairOrientation.FR); + EXPECTED_ORIENTATIONS = Arrays.asList(SamPairUtil.PairOrientation.FR); } final SamAlignmentMerger merger = new SamAlignmentMerger(UNMAPPED_BAM, OUTPUT, @@ -273,7 +268,7 @@ protected int doWork() { ATTRIBUTES_TO_RETAIN, ATTRIBUTES_TO_REMOVE, READ1_TRIM, READ2_TRIM, READ1_ALIGNED_BAM, READ2_ALIGNED_BAM, EXPECTED_ORIENTATIONS, SORT_ORDER, PRIMARY_ALIGNMENT_STRATEGY.newInstance(), ADD_MATE_CIGAR, UNMAP_CONTAMINANT_READS, - MIN_UNCLIPPED_BASES, UNMAPPED_READ_STRATEGY, MATCHING_DICTIONARY_TAGS); + MIN_UNCLIPPED_BASES, UNMAPPED_READ_STRATEGY); merger.setClipOverlappingReads(CLIP_OVERLAPPING_READS); merger.setMaxRecordsInRam(MAX_RECORDS_IN_RAM); merger.setKeepAlignerProperPairFlags(ALIGNER_PROPER_PAIR_FLAGS); @@ -316,8 +311,10 @@ protected int doWork() { if (ALIGNED_BAM == null || ALIGNED_BAM.isEmpty() && !(r1sExist && r2sExist)) { return new String[]{"Either ALIGNED_BAM or the combination of " + "READ1_ALIGNED_BAM and READ2_ALIGNED_BAM must be supplied."}; + } return null; } + } diff --git a/src/main/java/picard/sam/SamAlignmentMerger.java b/src/main/java/picard/sam/SamAlignmentMerger.java index 3f9442463..69278076b 100644 --- a/src/main/java/picard/sam/SamAlignmentMerger.java +++ b/src/main/java/picard/sam/SamAlignmentMerger.java @@ -1,11 +1,25 @@ package picard.sam; -import htsjdk.samtools.*; +import htsjdk.samtools.BAMRecordCodec; +import htsjdk.samtools.CigarElement; +import htsjdk.samtools.CigarOperator; +import htsjdk.samtools.MergingSamRecordIterator; +import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMFileHeader.SortOrder; +import htsjdk.samtools.SAMProgramRecord; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMRecordQueryNameComparator; +import htsjdk.samtools.SamFileHeaderMerger; +import htsjdk.samtools.SamPairUtil; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.filter.OverclippedReadFilter; -import htsjdk.samtools.util.*; -import htsjdk.variant.utils.SAMSequenceDictionaryExtractor; -import picard.PicardException; +import htsjdk.samtools.util.CloseableIterator; +import htsjdk.samtools.util.DelegatingIterator; +import htsjdk.samtools.util.IOUtil; +import htsjdk.samtools.util.Log; +import htsjdk.samtools.util.PeekableIterator; +import htsjdk.samtools.util.SortingCollection; import java.io.File; import java.util.ArrayList; @@ -31,7 +45,6 @@ private final int minUnclippedBases; private boolean forceSort = false; private final OverclippedReadFilter contaminationFilter; - private final List requiredMatchingDictionaryTags; /** * Constructor with a default value for unmappingReadStrategy @@ -71,9 +84,7 @@ public SamAlignmentMerger(final File unmappedBamFile, final File targetBamFile, addMateCigar, unmapContaminantReads, minUnclippedBases, - UnmappingReadStrategy.DO_NOT_CHANGE, - SAMSequenceDictionary.DEFAULT_DICTIONARY_EQUAL_TAG - ); + UnmappingReadStrategy.DO_NOT_CHANGE); } /** @@ -120,8 +131,6 @@ public SamAlignmentMerger(final File unmappedBamFile, final File targetBamFile, * @param minUnclippedBases If unmapContaminantReads is set, require this many unclipped bases or else the read will be marked as contaminant. * @param unmappingReadStrategy An enum describing how to deal with reads whose mapping information are being removed (currently this happens due to cross-species * contamination). Ignored unless unmapContaminantReads is true. - * @param requiredMatchingDictionaryTags A list of SAMSequenceRecord tags that must be equal (if present) in the aligned bam and the reference dictionary. - * Program will issue a warning about other tags, if present in both files and are different. */ public SamAlignmentMerger(final File unmappedBamFile, final File targetBamFile, final File referenceFasta, final SAMProgramRecord programRecord, final boolean clipAdapters, final boolean bisulfiteSequence, @@ -136,8 +145,7 @@ public SamAlignmentMerger(final File unmappedBamFile, final File targetBamFile, final boolean addMateCigar, final boolean unmapContaminantReads, final int minUnclippedBases, - final UnmappingReadStrategy unmappingReadStrategy, - final List requiredMatchingDictionaryTags) { + final UnmappingReadStrategy unmappingReadStrategy) { super(unmappedBamFile, targetBamFile, referenceFasta, clipAdapters, bisulfiteSequence, alignedReadsOnly, programRecord, attributesToRetain, attributesToRemove, read1BasesTrimmed, @@ -164,11 +172,11 @@ public SamAlignmentMerger(final File unmappedBamFile, final File targetBamFile, this.maxGaps = maxGaps; this.minUnclippedBases = minUnclippedBases; this.contaminationFilter = new OverclippedReadFilter(minUnclippedBases, false); - this.requiredMatchingDictionaryTags = requiredMatchingDictionaryTags; log.info("Processing SAM file(s): " + ((alignedSamFile != null) ? alignedSamFile : (read1AlignedSamFile + "," + read2AlignedSamFile))); } + /** * Merges the alignment from the map file with the non-aligned records from the source BAM file. * Overrides mergeAlignment in AbstractAlignmentMerger. Tries first to proceed on the assumption @@ -186,27 +194,6 @@ public void mergeAlignment(final File referenceFasta) { } } - @Override - protected SAMSequenceDictionary getDictionaryForMergedBam() { - SAMSequenceDictionary finalDict; - if (alignedSamFile != null && !alignedSamFile.isEmpty()) { - finalDict = SAMSequenceDictionaryExtractor.extractDictionary(alignedSamFile.get(0)); - alignedSamFile.stream() - .map(SAMSequenceDictionaryExtractor::extractDictionary) - .forEach(finalDict::assertSameDictionary); - } else { - final SeparateEndAlignmentIterator mergingIterator = new SeparateEndAlignmentIterator(this.read1AlignedSamFile, this.read2AlignedSamFile, referenceFasta); - finalDict = mergingIterator.getHeader().getSequenceDictionary(); - } - - SAMSequenceDictionary referenceDict = SAMSequenceDictionaryExtractor.extractDictionary(referenceFasta); - if (referenceDict == null) { - throw new PicardException("No sequence dictionary found for " + referenceFasta.getAbsolutePath() + - ". Use Picard's CreateSequenceDictionary to create a sequence dictionary."); - } - return SAMSequenceDictionary.mergeDictionaries(finalDict, referenceDict, requiredMatchingDictionaryTags); - } - /** * Reads the aligned SAM records into a SortingCollection and returns an iterator over that collection */ @@ -217,8 +204,8 @@ protected SAMSequenceDictionary getDictionaryForMergedBam() { // When the alignment records, including both ends of a pair, are in SAM files if (alignedSamFile != null && !alignedSamFile.isEmpty()) { - final List headers = new ArrayList<>(alignedSamFile.size()); - final List readers = new ArrayList<>(alignedSamFile.size()); + final List headers = new ArrayList(alignedSamFile.size()); + final List readers = new ArrayList(alignedSamFile.size()); for (final File f : this.alignedSamFile) { final SamReader r = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).open(f); headers.add(r.getFileHeader()); @@ -255,6 +242,7 @@ protected SAMSequenceDictionary getDictionaryForMergedBam() { return mergingIterator; } + final SortingCollection alignmentSorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(header), new SAMRecordQueryNameComparator(), MAX_RECORDS_IN_RAM); @@ -320,9 +308,9 @@ public void remove() { private final SAMFileHeader header; public SeparateEndAlignmentIterator(final List read1Alignments, final List read2Alignments, File referenceFasta) { - final List headers = new ArrayList<>(); - final List read1 = new ArrayList<>(read1Alignments.size()); - final List read2 = new ArrayList<>(read2Alignments.size()); + final List headers = new ArrayList(); + final List read1 = new ArrayList(read1Alignments.size()); + final List read2 = new ArrayList(read2Alignments.size()); for (final File f : read1Alignments) { final SamReader r = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).open(f); headers.add(r.getFileHeader()); @@ -335,9 +323,9 @@ public SeparateEndAlignmentIterator(final List read1Alignments, final List } final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, headers, false); - read1Iterator = new PeekableIterator<>( + read1Iterator = new PeekableIterator( new SuffixTrimingSamRecordIterator(new MergingSamRecordIterator(headerMerger, read1, true), "/1")); - read2Iterator = new PeekableIterator<>( + read2Iterator = new PeekableIterator( new SuffixTrimingSamRecordIterator(new MergingSamRecordIterator(headerMerger, read2, true), "/2")); header = headerMerger.getMergedHeader(); diff --git a/src/test/java/picard/sam/MergeBamAlignmentTest.java b/src/test/java/picard/sam/MergeBamAlignmentTest.java index fced5e37e..b0c22c997 100644 --- a/src/test/java/picard/sam/MergeBamAlignmentTest.java +++ b/src/test/java/picard/sam/MergeBamAlignmentTest.java @@ -41,7 +41,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.stream.Collectors; /** * Test for the MergeBamAlignment class @@ -64,11 +63,11 @@ private static final File secondReadAlignedBam_firstHalf = new File(TEST_DATA_DIR, "firsthalf.read2.trimmed.aligned.sam"); private static final File secondReadAlignedBam_secondHalf = new File(TEST_DATA_DIR, "secondhalf.read2.trimmed.aligned.sam"); private static final File supplementalReadAlignedBam = new File(TEST_DATA_DIR, "aligned.supplement.sam"); - private static final File alignedQuerynameSortedBam = new File("testdata/picard/sam/aligned_queryname_sorted.sam"); + private static final File alignedQuerynameSortedBam = + new File("testdata/picard/sam/aligned_queryname_sorted.sam"); private static final File fasta = new File("testdata/picard/sam/merger.fasta"); private static final String bigSequenceName = "chr7"; // The longest sequence in merger.fasta private static final File sequenceDict = new File("testdata/picard/sam/merger.dict"); - private static final File sequenceDict2 = new File("testdata/picard/sam/merger.2.dict"); private static final File badorderUnmappedBam = new File(TEST_DATA_DIR, "unmapped.badorder.sam"); private static final File badorderAlignedBam = new File(TEST_DATA_DIR, "aligned.badorder.sam"); private static final File multipleStrandsAlignedBam = new File(TEST_DATA_DIR, "aligned.both.strands.sam"); @@ -281,7 +280,7 @@ public void testMergerFromMultipleFiles() throws Exception { // MIN_ADAPTER_BASES hanging off the end else if (sam.getReadName().equals("both_reads_align_min_adapter_bases_exceeded")) { Assert.assertEquals(sam.getReferenceName(), "chr7"); - Assert.assertTrue(!sam.getCigarString().contains("S"), + Assert.assertTrue(sam.getCigarString().indexOf("S") == -1, "Read was clipped when it should not be."); } else if (sam.getReadName().equals("neither_read_aligns_or_present")) { Assert.assertTrue(sam.getReadUnmappedFlag(), "Read should be unmapped but isn't"); @@ -307,10 +306,10 @@ public void testSortingOnSamAlignmentMerger(final File unmapped, final File alig final File target = File.createTempFile("target", "bam"); target.deleteOnExit(); final SamAlignmentMerger merger = new SamAlignmentMerger(unmapped, target, fasta, null, true, false, - false, Collections.singletonList(aligned), 1, null, null, null, null, null, null, - Collections.singletonList(SamPairUtil.PairOrientation.FR), + false, Arrays.asList(aligned), 1, null, null, null, null, null, null, + Arrays.asList(SamPairUtil.PairOrientation.FR), coordinateSorted ? SAMFileHeader.SortOrder.coordinate : SAMFileHeader.SortOrder.queryname, - new BestMapqPrimaryAlignmentSelectionStrategy(), false, false, 30); + new BestMapqPrimaryAlignmentSelectionStrategy(), false, false, 30, AbstractAlignmentMerger.UnmappingReadStrategy.DO_NOT_CHANGE); merger.mergeAlignment(Defaults.REFERENCE_FASTA); Assert.assertEquals(sorted, !merger.getForceSort()); @@ -955,7 +954,7 @@ public void testEarliestFragmentStrategyPaired() throws Exception { alignedSam.deleteOnExit(); // Populate the header with SAMSequenceRecords - header.setSequenceDictionary(SamReaderFactory.makeDefault().getFileHeader(sequenceDict2).getSequenceDictionary()); + header.getSequenceDictionary().addSequence(new SAMSequenceRecord("chr1", 1000000)); // Create 2 alignments for each end of pair final SAMFileWriter alignedWriter = factory.makeSAMWriter(header, false, alignedSam); @@ -1109,7 +1108,7 @@ public void testEarliestFragmentStrategy(final String testName, final MultipleAl final String sequence = "chr1"; // Populate the header with SAMSequenceRecords - header.setSequenceDictionary(SamReaderFactory.makeDefault().getFileHeader(sequenceDict2).getSequenceDictionary()); + header.getSequenceDictionary().addSequence(new SAMSequenceRecord(sequence, 1000000)); final SAMFileWriter alignedWriter = factory.makeSAMWriter(header, false, alignedSam); for (final MultipleAlignmentSpec spec : specs) { @@ -1234,7 +1233,7 @@ private void testBestFragmentMapqStrategy(final String testName, final int[] fir final String sequence = "chr1"; // Populate the header with SAMSequenceRecords - header.setSequenceDictionary(SamReaderFactory.makeDefault().getFileHeader(sequenceDict2).getSequenceDictionary()); + header.getSequenceDictionary().addSequence(new SAMSequenceRecord(sequence, 1000000)); final SAMFileWriter alignedWriter = factory.makeSAMWriter(header, false, alignedSam); @@ -1249,7 +1248,7 @@ private void testBestFragmentMapqStrategy(final String testName, final int[] fir false, true, false, 1, "0", "1.0", "align!", "myAligner", true, - fasta, output, + new File(TEST_DATA_DIR, "cliptest.fasta"), output, SamPairUtil.PairOrientation.FR, MergeBamAlignment.PrimaryAlignmentStrategy.BestEndMapq, null, includeSecondary, null, null); @@ -1288,7 +1287,7 @@ private void testBestFragmentMapqStrategy(final String testName, final int[] fir Assert.assertEquals(numSecondRecords, Math.max(1, secondMapQs.length)); } } - + private void doMergeAlignment(final File unmappedBam, final List alignedBams, final List read1AlignedBams, final List read2AlignedBams, final Integer read1Trim, final Integer read2Trim, final boolean alignReadsOnly, final boolean clipAdapters, final boolean isBisulfiteSequence, final int maxInsOrDels, @@ -1750,14 +1749,15 @@ public void testRemoveNmMdAndUqOnOverlappingReads() throws IOException { if (hasTags) { Assert.assertNull(rec.getAttribute("MD")); Assert.assertNull(rec.getAttribute("NM")); - } else { + } + else { Assert.assertNotNull(rec.getAttribute("MD")); Assert.assertNotNull(rec.getAttribute("NM")); } } result.close(); } - + @Test public void testMappedToMultipleStrands() throws Exception { final File outputMappedToMultipleStands = File.createTempFile("mappedToMultipleStrands", ".sam"); @@ -1799,53 +1799,4 @@ public void testMappedToMultipleStrands() throws Exception { } } } - - @Test - public void testMergeHeaderMappedAndReference() throws IOException { - final File unmappedSam = new File(TEST_DATA_DIR, "specialHeader.unmapped.sam"); - final File alignedSam = new File(TEST_DATA_DIR, "specialHeader.aligned.sam"); - final File expectedSam = new File(TEST_DATA_DIR, "specialHeader.expected.sam"); - final File refFasta = new File(TEST_DATA_DIR, "specialHeader.fasta"); - final File mergedSam = File.createTempFile("merged", ".sam"); - mergedSam.deleteOnExit(); - - doMergeAlignment(unmappedSam, Collections.singletonList(alignedSam), - null, null, null, null, - false, true, false, 1, - "0", "1.0", "align!", "myAligner", - true, refFasta, mergedSam, - null, null, null, null, true, null); - - assertSamValid(mergedSam); - IOUtil.assertFilesEqual(expectedSam, mergedSam); - } - - @DataProvider(name = "brokenAlignedFiles") - Object[][] brokenAlignedFiles() { - return new Object[][]{ - new Object[]{"specialHeader.aligned.breaks.length.sam"}, - new Object[]{"specialHeader.aligned.breaks.md5.sam"} - }; - } - - @Test(dataProvider = "brokenAlignedFiles", expectedExceptions = IllegalArgumentException.class) - public void testHeaderFromMappedBreaks(final String filename) throws IOException { - final File unmappedSam = new File(TEST_DATA_DIR, "specialHeader.unmapped.sam"); - final File alignedSam = new File(TEST_DATA_DIR, filename); - final File expectedSam = new File(TEST_DATA_DIR, "specialHeader.expected.sam"); - final File refFasta = new File(TEST_DATA_DIR, "specialHeader.fasta"); - final File mergedSam = File.createTempFile("merged", ".sam"); - mergedSam.deleteOnExit(); - - doMergeAlignment(unmappedSam, Collections.singletonList(alignedSam), - null, null, null, null, - false, true, false, 1, - "0", "1.0", "align!", "myAligner", - true, refFasta, mergedSam, - null, null, null, null, true, null); - - assertSamValid(mergedSam); - IOUtil.assertFilesEqual(expectedSam, mergedSam); - } } - diff --git a/testdata/picard/sam/MergeBamAlignment/contam.expected.COPY_TO_TAG.sam b/testdata/picard/sam/MergeBamAlignment/contam.expected.COPY_TO_TAG.sam index 4dca588a6..76ef1af05 100644 --- a/testdata/picard/sam/MergeBamAlignment/contam.expected.COPY_TO_TAG.sam +++ b/testdata/picard/sam/MergeBamAlignment/contam.expected.COPY_TO_TAG.sam @@ -1,5 +1,5 @@ @HD VN:1.5 SO:coordinate -@SQ SN:chr1 LN:1000 M5:17522ddd273279f4595f50fea9864734 UR:file:testdata/net/sf/picard/sam/MergeBamAlignment/cliptest.fasta +@SQ SN:chr1 LN:1000 UR:file:testdata/net/sf/picard/sam/MergeBamAlignment/cliptest.fasta M5:17522ddd273279f4595f50fea9864734 @RG ID:0 SM:Hi,Mom! PL:ILLUMINA @PG ID:0 VN:1.0 CL:align! PN:myAligner frag_multiple_primary_1 4 chr1 1 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PA:Z:chr1,1,20S10M20S,30,null; PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination diff --git a/testdata/picard/sam/MergeBamAlignment/contam.expected.MOVE_TO_TAG.sam b/testdata/picard/sam/MergeBamAlignment/contam.expected.MOVE_TO_TAG.sam index 40c06f420..af6f9e6c8 100644 --- a/testdata/picard/sam/MergeBamAlignment/contam.expected.MOVE_TO_TAG.sam +++ b/testdata/picard/sam/MergeBamAlignment/contam.expected.MOVE_TO_TAG.sam @@ -1,5 +1,5 @@ @HD VN:1.5 SO:coordinate -@SQ SN:chr1 LN:1000 M5:17522ddd273279f4595f50fea9864734 UR:file:testdata/net/sf/picard/sam/MergeBamAlignment/cliptest.fasta +@SQ SN:chr1 LN:1000 UR:file:testdata/net/sf/picard/sam/MergeBamAlignment/cliptest.fasta M5:17522ddd273279f4595f50fea9864734 @RG ID:0 SM:Hi,Mom! PL:ILLUMINA @PG ID:0 VN:1.0 CL:align! PN:myAligner frag_multiple_primary_2 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:50 PG:Z:0 RG:Z:0 NM:i:0 UQ:i:0 diff --git a/testdata/picard/sam/MergeBamAlignment/contam.expected.NO_CHANGE.sam b/testdata/picard/sam/MergeBamAlignment/contam.expected.NO_CHANGE.sam index 5bead4783..2a3352fc2 100644 --- a/testdata/picard/sam/MergeBamAlignment/contam.expected.NO_CHANGE.sam +++ b/testdata/picard/sam/MergeBamAlignment/contam.expected.NO_CHANGE.sam @@ -1,5 +1,5 @@ @HD VN:1.5 SO:coordinate -@SQ SN:chr1 LN:1000 M5:17522ddd273279f4595f50fea9864734 UR:file:testdata/net/sf/picard/sam/MergeBamAlignment/cliptest.fasta +@SQ SN:chr1 LN:1000 UR:file:testdata/net/sf/picard/sam/MergeBamAlignment/cliptest.fasta M5:17522ddd273279f4595f50fea9864734 @RG ID:0 SM:Hi,Mom! PL:ILLUMINA @PG ID:0 VN:1.0 CL:align! PN:myAligner frag_multiple_primary_1 4 chr1 1 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination diff --git a/testdata/picard/sam/MergeBamAlignment/specialHeader.aligned.breaks.length.sam b/testdata/picard/sam/MergeBamAlignment/specialHeader.aligned.breaks.length.sam deleted file mode 100644 index 5b5276231..000000000 --- a/testdata/picard/sam/MergeBamAlignment/specialHeader.aligned.breaks.length.sam +++ /dev/null @@ -1,26 +0,0 @@ -@HD VN:1.0 SO:queryname -@SQ SN:chr1 LN:1000 M5:17522ddd273279f4595f50fea9864734 UR:file:testdata/picard/sam/MergeBamAlignment/specialHeaderTest.fasta -@SQ SN:chr1_alt AH:* LN:201 M5:8e0c728a0fb8a73feb55f9a447f4b144 UR:file:testdata/picard/sam/MergeBamAlignment/specialHeaderTest.fasta -@RG ID:0 SM:Hi,Mom! PL:ILLUMINA -@CO frag_multiple_primary_1 should be marked contaminant because the overclipped alignment has higher MAPQ, and the other alignment should be omitted -@CO frag_multiple_primary_2 should NOT be marked contaminant because the good alignment has higher MAPQ, and the overclipped alignment should be marked as secondary -@CO frag_primary_clipped should be marked contaminant because primary alignment is overclipped, and the secondary / supplementary should be omitted -@CO frag_secondary_clipped should NOT be marked contaminant because only secondary is overclipped, and will be preserved as-is -@CO r1_clipped_r2_clipped should be marked contaminant because at least one segment is overclipped -@CO r1_clipped_r2_perfect should be marked contaminant because at least one segment is overclipped -@CO r1_clipped_r2_unmapped should be marked contaminant because at least one segment is overclipped -frag_multiple_primary_1 0 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_multiple_primary_1 0 chr1 1 15 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_multiple_primary_2 0 chr1 1 15 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_multiple_primary_2 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_primary_clipped 0 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_primary_clipped 256 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_primary_clipped 2048 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_secondary_clipped 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_secondary_clipped 256 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_clipped 97 chr1 1 30 20S10M20S chr1 51 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_clipped 145 chr1 51 30 20S10M20S chr1 1 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_perfect 97 chr1 1 30 20S10M20S chr1 51 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_perfect 145 chr1 51 30 50M chr1 1 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_unmapped 73 chr1 1 30 20S10M20S chr1 51 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_unmapped 133 chr1 51 0 * chr1 1 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 diff --git a/testdata/picard/sam/MergeBamAlignment/specialHeader.aligned.breaks.md5.sam b/testdata/picard/sam/MergeBamAlignment/specialHeader.aligned.breaks.md5.sam deleted file mode 100644 index f237b692e..000000000 --- a/testdata/picard/sam/MergeBamAlignment/specialHeader.aligned.breaks.md5.sam +++ /dev/null @@ -1,26 +0,0 @@ -@HD VN:1.0 SO:queryname -@SQ SN:chr1 LN:1000 M5:17522ddd273279f4595f50fea9864734 UR:file:testdata/picard/sam/MergeBamAlignment/specialHeaderTest.fasta -@SQ SN:chr1_alt AH:* LN:200 M5:dummy! UR:file:testdata/picard/sam/MergeBamAlignment/specialHeaderTest.fasta -@RG ID:0 SM:Hi,Mom! PL:ILLUMINA -@CO frag_multiple_primary_1 should be marked contaminant because the overclipped alignment has higher MAPQ, and the other alignment should be omitted -@CO frag_multiple_primary_2 should NOT be marked contaminant because the good alignment has higher MAPQ, and the overclipped alignment should be marked as secondary -@CO frag_primary_clipped should be marked contaminant because primary alignment is overclipped, and the secondary / supplementary should be omitted -@CO frag_secondary_clipped should NOT be marked contaminant because only secondary is overclipped, and will be preserved as-is -@CO r1_clipped_r2_clipped should be marked contaminant because at least one segment is overclipped -@CO r1_clipped_r2_perfect should be marked contaminant because at least one segment is overclipped -@CO r1_clipped_r2_unmapped should be marked contaminant because at least one segment is overclipped -frag_multiple_primary_1 0 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_multiple_primary_1 0 chr1 1 15 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_multiple_primary_2 0 chr1 1 15 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_multiple_primary_2 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_primary_clipped 0 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_primary_clipped 256 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_primary_clipped 2048 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_secondary_clipped 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_secondary_clipped 256 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_clipped 97 chr1 1 30 20S10M20S chr1 51 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_clipped 145 chr1 51 30 20S10M20S chr1 1 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_perfect 97 chr1 1 30 20S10M20S chr1 51 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_perfect 145 chr1 51 30 50M chr1 1 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_unmapped 73 chr1 1 30 20S10M20S chr1 51 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_unmapped 133 chr1 51 0 * chr1 1 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 diff --git a/testdata/picard/sam/MergeBamAlignment/specialHeader.aligned.sam b/testdata/picard/sam/MergeBamAlignment/specialHeader.aligned.sam deleted file mode 100644 index 2325869aa..000000000 --- a/testdata/picard/sam/MergeBamAlignment/specialHeader.aligned.sam +++ /dev/null @@ -1,26 +0,0 @@ -@HD VN:1.0 SO:queryname -@SQ SN:chr1 LN:1000 M5:17522ddd273279f4595f50fea9864734 UR:file:testdata/picard/sam/MergeBamAlignment/specialHeaderTest.fasta -@SQ SN:chr1_alt AH:* LN:200 M5:8e0c728a0fb8a73feb55f9a447f4b144 UR:file:testdata/picard/sam/MergeBamAlignment/specialHeaderTest.fasta -@RG ID:0 SM:Hi,Mom! PL:ILLUMINA -@CO frag_multiple_primary_1 should be marked contaminant because the overclipped alignment has higher MAPQ, and the other alignment should be omitted -@CO frag_multiple_primary_2 should NOT be marked contaminant because the good alignment has higher MAPQ, and the overclipped alignment should be marked as secondary -@CO frag_primary_clipped should be marked contaminant because primary alignment is overclipped, and the secondary / supplementary should be omitted -@CO frag_secondary_clipped should NOT be marked contaminant because only secondary is overclipped, and will be preserved as-is -@CO r1_clipped_r2_clipped should be marked contaminant because at least one segment is overclipped -@CO r1_clipped_r2_perfect should be marked contaminant because at least one segment is overclipped -@CO r1_clipped_r2_unmapped should be marked contaminant because at least one segment is overclipped -frag_multiple_primary_1 0 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_multiple_primary_1 0 chr1 1 15 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_multiple_primary_2 0 chr1 1 15 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_multiple_primary_2 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_primary_clipped 0 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_primary_clipped 256 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_primary_clipped 2048 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_secondary_clipped 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_secondary_clipped 256 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_clipped 97 chr1 1 30 20S10M20S chr1 51 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_clipped 145 chr1 51 30 20S10M20S chr1 1 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_perfect 97 chr1 1 30 20S10M20S chr1 51 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_perfect 145 chr1 51 30 50M chr1 1 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_unmapped 73 chr1 1 30 20S10M20S chr1 51 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_unmapped 133 chr1 51 0 * chr1 1 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 diff --git a/testdata/picard/sam/MergeBamAlignment/specialHeader.dict b/testdata/picard/sam/MergeBamAlignment/specialHeader.dict deleted file mode 100644 index bc68c964a..000000000 --- a/testdata/picard/sam/MergeBamAlignment/specialHeader.dict +++ /dev/null @@ -1,3 +0,0 @@ -@HD VN:1.5 SO:unsorted -@SQ SN:chr1 LN:1000 M5:17522ddd273279f4595f50fea9864734 UR:file:testdata/picard/sam/MergeBamAlignment/specialHeaderTest.fasta -@SQ SN:chr1_alt LN:200 M5:8e0c728a0fb8a73feb55f9a447f4b144 UR:file:testdata/picard/sam/MergeBamAlignment/specialHeaderTest.fasta diff --git a/testdata/picard/sam/MergeBamAlignment/specialHeader.expected.sam b/testdata/picard/sam/MergeBamAlignment/specialHeader.expected.sam deleted file mode 100644 index e5344ec88..000000000 --- a/testdata/picard/sam/MergeBamAlignment/specialHeader.expected.sam +++ /dev/null @@ -1,17 +0,0 @@ -@HD VN:1.5 SO:coordinate -@SQ SN:chr1 LN:1000 M5:17522ddd273279f4595f50fea9864734 UR:file:testdata/picard/sam/MergeBamAlignment/specialHeaderTest.fasta -@SQ SN:chr1_alt LN:200 AH:* M5:8e0c728a0fb8a73feb55f9a447f4b144 UR:file:testdata/picard/sam/MergeBamAlignment/specialHeaderTest.fasta -@RG ID:0 SM:Hi,Mom! PL:ILLUMINA -@PG ID:0 VN:1.0 CL:align! PN:myAligner -frag_multiple_primary_1 4 chr1 1 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination -frag_multiple_primary_2 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:50 PG:Z:0 RG:Z:0 NM:i:0 UQ:i:0 -frag_multiple_primary_2 256 chr1 1 15 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:0T0T0C0A0T1C0T0G1 PG:Z:0 RG:Z:0 NM:i:8 UQ:i:240 -frag_primary_clipped 4 chr1 1 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination -frag_secondary_clipped 0 chr1 1 30 50M * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:50 PG:Z:0 RG:Z:0 NM:i:0 UQ:i:0 -frag_secondary_clipped 256 chr1 1 30 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? MD:Z:0T0T0C0A0T1C0T0G1 PG:Z:0 RG:Z:0 NM:i:8 UQ:i:240 -r1_clipped_r2_clipped 109 * 0 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination -r1_clipped_r2_perfect 109 * 0 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination -r1_clipped_r2_unmapped 77 * 0 0 20S10M20S * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination -r1_clipped_r2_unmapped 141 * 0 0 * * 0 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 -r1_clipped_r2_clipped 157 * 0 0 20S10M20S * 0 0 TGGAGTGTTAACGTACTCTATTATTGTATTGTTTTTTTTTTGCCCTTAAA ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination -r1_clipped_r2_perfect 157 * 0 0 50M * 0 0 TGGAGTGTTAACGTACTCTATTATTGTATTGTTTTTTTTTTGCCCTTAAA ?????????????????????????????????????????????????? PG:Z:0 RG:Z:0 CO:Z:Cross-species contamination diff --git a/testdata/picard/sam/MergeBamAlignment/specialHeader.fasta b/testdata/picard/sam/MergeBamAlignment/specialHeader.fasta deleted file mode 100644 index 8694bfccf..000000000 --- a/testdata/picard/sam/MergeBamAlignment/specialHeader.fasta +++ /dev/null @@ -1,26 +0,0 @@ ->chr1 -TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC -TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA -TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC -TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA -TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC -TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA -TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC -TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA -TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC -TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA -TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC -TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA -TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC -TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA -TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC -TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA -TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC -TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA -TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC -TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ->chr1_alt -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC -GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG -TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT \ No newline at end of file diff --git a/testdata/picard/sam/MergeBamAlignment/specialHeader.unmapped.sam b/testdata/picard/sam/MergeBamAlignment/specialHeader.unmapped.sam deleted file mode 100644 index ceb5f1917..000000000 --- a/testdata/picard/sam/MergeBamAlignment/specialHeader.unmapped.sam +++ /dev/null @@ -1,12 +0,0 @@ -@HD VN:1.0 SO:queryname -@RG ID:0 SM:Hi,Mom! PL:ILLUMINA -frag_multiple_primary_1 4 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_multiple_primary_2 4 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_primary_clipped 4 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -frag_secondary_clipped 4 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_clipped 77 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_clipped 141 * 0 0 * * 0 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_perfect 77 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_perfect 141 * 0 0 * * 0 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_unmapped 77 * 0 0 * * 0 0 TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC ?????????????????????????????????????????????????? RG:Z:0 -r1_clipped_r2_unmapped 141 * 0 0 * * 0 0 TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA ?????????????????????????????????????????????????? RG:Z:0 diff --git a/testdata/picard/sam/merger.2.dict b/testdata/picard/sam/merger.2.dict deleted file mode 100644 index 2d46e4cfc..000000000 --- a/testdata/picard/sam/merger.2.dict +++ /dev/null @@ -1,9 +0,0 @@ -@HD VN:1.0 SO:unsorted -@SQ SN:chr1 LN:101 UR:merger.fasta AH:* -@SQ SN:chr2 LN:101 UR:merger.fasta AH:* -@SQ SN:chr3 LN:101 UR:merger.fasta AH:* -@SQ SN:chr4 LN:101 UR:merger.fasta AH:* -@SQ SN:chr5 LN:101 UR:merger.fasta AH:* -@SQ SN:chr6 LN:101 UR:merger.fasta M5:7be2f5e7ee39e60a6c3b5b6a41178c6d -@SQ SN:chr7 LN:404 UR:merger.fasta M5:da488fc432cdaf2c20c96da473a7b630 -@SQ SN:chr8 LN:202 UR:merger.fasta M5:d339678efce576d5546e88b49a487b63 diff --git a/testdata/picard/sam/merger.dict b/testdata/picard/sam/merger.dict index 30fe241ff..797fb6fe2 100644 --- a/testdata/picard/sam/merger.dict +++ b/testdata/picard/sam/merger.dict @@ -1,9 +1,9 @@ @HD VN:1.0 SO:unsorted -@SQ SN:chr1 LN:101 UR:file:testdata/net/sf/picard/sam/merger.fasta M5:bd01f7e11515bb6beda8f7257902aa67 -@SQ SN:chr2 LN:101 UR:file:testdata/net/sf/picard/sam/merger.fasta M5:31c33e2155b3de5e2554b693c475b310 -@SQ SN:chr3 LN:101 UR:file:testdata/net/sf/picard/sam/merger.fasta M5:631593c6dd2048ae88dcce2bd505d295 -@SQ SN:chr4 LN:101 UR:file:testdata/net/sf/picard/sam/merger.fasta M5:c60cb92f1ee5b78053c92bdbfa19abf1 -@SQ SN:chr5 LN:101 UR:file:testdata/net/sf/picard/sam/merger.fasta M5:07ebc213c7611db0eacbb1590c3e9bda -@SQ SN:chr6 LN:101 UR:file:testdata/net/sf/picard/sam/merger.fasta M5:7be2f5e7ee39e60a6c3b5b6a41178c6d -@SQ SN:chr7 LN:404 UR:file:testdata/net/sf/picard/sam/merger.fasta M5:da488fc432cdaf2c20c96da473a7b630 -@SQ SN:chr8 LN:202 UR:file:testdata/net/sf/picard/sam/merger.fasta M5:d339678efce576d5546e88b49a487b63 +@SQ SN:chr1 LN:101 UR:merger.fasta M5:bd01f7e11515bb6beda8f7257902aa67 +@SQ SN:chr2 LN:101 UR:merger.fasta M5:31c33e2155b3de5e2554b693c475b310 +@SQ SN:chr3 LN:101 UR:merger.fasta M5:631593c6dd2048ae88dcce2bd505d295 +@SQ SN:chr4 LN:101 UR:merger.fasta M5:c60cb92f1ee5b78053c92bdbfa19abf1 +@SQ SN:chr5 LN:101 UR:merger.fasta M5:07ebc213c7611db0eacbb1590c3e9bda +@SQ SN:chr6 LN:101 UR:merger.fasta M5:7be2f5e7ee39e60a6c3b5b6a41178c6d +@SQ SN:chr7 LN:404 UR:merger.fasta M5:da488fc432cdaf2c20c96da473a7b630 +@SQ SN:chr8 LN:202 UR:merger.fasta M5:d339678efce576d5546e88b49a487b63