diff --git a/src/main/java/picard/sam/AbstractAlignmentMerger.java b/src/main/java/picard/sam/AbstractAlignmentMerger.java index f9bd6ab31..7475b88fa 100644 --- a/src/main/java/picard/sam/AbstractAlignmentMerger.java +++ b/src/main/java/picard/sam/AbstractAlignmentMerger.java @@ -57,11 +57,7 @@ import picard.PicardException; import java.io.File; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.Optional; +import java.util.*; /** * Abstract class that coordinates the general task of taking in a set of alignment information, @@ -105,6 +101,8 @@ private final SAMFileHeader header; private final List attributesToRetain = new ArrayList(); private final List attributesToRemove = new ArrayList(); + private Set attributesToReverse = new TreeSet<>(SAMRecord.TAGS_TO_REVERSE); + private Set attributesToReverseComplement = new TreeSet<>(SAMRecord.TAGS_TO_REVERSE_COMPLEMENT); protected final File referenceFasta; private final Integer read1BasesTrimmed; private final Integer read2BasesTrimmed; @@ -327,6 +325,20 @@ public AbstractAlignmentMerger(final File unmappedBamFile, final File targetBamF this.unmappingReadsStrategy = unmappingReadsStrategy; } + /** Gets the set of attributes to be reversed on reads marked as negative strand. */ + public Set getAttributesToReverse() { return attributesToReverse; } + + /** Sets the set of attributes to be reversed on reads marked as negative strand. */ + public void setAttributesToReverse(Set attributesToReverse) { this.attributesToReverse = attributesToReverse; } + + /** Gets the set of attributes to be reverse complemented on reads marked as negative strand. */ + public Set getAttributesToReverseComplement() { return attributesToReverseComplement; } + + /** Sets the set of attributes to be reverse complemented on reads marked as negative strand. */ + public void setAttributesToReverseComplement(Set attributesToReverseComplement) { + this.attributesToReverseComplement = attributesToReverseComplement; + } + /** Allows the caller to override the maximum records in RAM. */ public void setMaxRecordsInRam(final int maxRecordsInRam) { this.maxRecordsInRam = maxRecordsInRam; @@ -788,11 +800,7 @@ protected void setValuesFromAlignment(final SAMRecord rec, final SAMRecord align // If it's on the negative strand, reverse complement the bases // and reverse the order of the qualities if (rec.getReadNegativeStrandFlag()) { - if(needsSafeReverseComplement) { - rec.reverseComplement(); - } else { - rec.reverseComplement(true); - } + rec.reverseComplement(attributesToReverseComplement, attributesToReverse, !needsSafeReverseComplement); } } diff --git a/src/main/java/picard/sam/MergeBamAlignment.java b/src/main/java/picard/sam/MergeBamAlignment.java index 2ed07d872..c1011baf2 100644 --- a/src/main/java/picard/sam/MergeBamAlignment.java +++ b/src/main/java/picard/sam/MergeBamAlignment.java @@ -25,6 +25,7 @@ import htsjdk.samtools.SAMFileHeader.SortOrder; import htsjdk.samtools.SAMProgramRecord; +import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SamPairUtil; import htsjdk.samtools.util.Log; import picard.PicardException; @@ -35,9 +36,7 @@ import picard.cmdline.programgroups.SamOrBam; import java.io.File; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; +import java.util.*; /** * A command-line tool to merge BAM/SAM alignment info from a third-party aligner with the data in an @@ -154,6 +153,12 @@ " This overrides ATTRIBUTES_TO_RETAIN if they share common tags.") public List ATTRIBUTES_TO_REMOVE = new ArrayList(); + @Option(shortName="RV", doc="Attributes on negative strand reads that need to be reversed.") + public Set ATTRIBUTES_TO_REVERSE = new TreeSet<>(SAMRecord.TAGS_TO_REVERSE); + + @Option(shortName="RC", doc="Attributes on negative strand reads that need to be reverse complemented.") + public Set ATTRIBUTES_TO_REVERSE_COMPLEMENT = new TreeSet<>(SAMRecord.TAGS_TO_REVERSE_COMPLEMENT); + @Option(shortName = "R1_TRIM", doc = "The number of bases trimmed from the beginning of read 1 prior to alignment") public int READ1_TRIM = 0; @@ -268,6 +273,8 @@ protected int doWork() { merger.setMaxRecordsInRam(MAX_RECORDS_IN_RAM); merger.setKeepAlignerProperPairFlags(ALIGNER_PROPER_PAIR_FLAGS); merger.setIncludeSecondaryAlignments(INCLUDE_SECONDARY_ALIGNMENTS); + merger.setAttributesToReverse(ATTRIBUTES_TO_REVERSE); + merger.setAttributesToReverseComplement(ATTRIBUTES_TO_REVERSE_COMPLEMENT); merger.mergeAlignment(REFERENCE_SEQUENCE); merger.close(); diff --git a/src/test/java/picard/sam/MergeBamAlignmentTest.java b/src/test/java/picard/sam/MergeBamAlignmentTest.java index f3a031e38..b0c22c997 100644 --- a/src/test/java/picard/sam/MergeBamAlignmentTest.java +++ b/src/test/java/picard/sam/MergeBamAlignmentTest.java @@ -213,6 +213,13 @@ else if (sam.getReadName().equals("both_reads_present_only_first_aligns") || throw new Exception("Unexpected read name: " + sam.getReadName()); } + final boolean pos = !sam.getReadNegativeStrandFlag(); + Assert.assertEquals(sam.getAttribute("aa"), pos ? "Hello" : "olleH"); + Assert.assertEquals(sam.getAttribute("ab"), pos ? "ATTCGG" : "CCGAAT"); + Assert.assertEquals(sam.getAttribute("ac"), pos ? new byte[] {1,2,3} : new byte[] {3,2,1}); + Assert.assertEquals(sam.getAttribute("as"), pos ? new short[]{1,2,3} : new short[]{3,2,1}); + Assert.assertEquals(sam.getAttribute("ai"), pos ? new int[] {1,2,3} : new int[] {3,2,1}); + Assert.assertEquals(sam.getAttribute("af"), pos ? new float[]{1,2,3} : new float[]{3,2,1}); } // Quick test to make sure the program record gets picked up from the file if not specified @@ -1310,6 +1317,11 @@ private void doMergeAlignment(final File unmappedBam, final List alignedBa final SAMFileHeader.SortOrder sortOrder, final AbstractAlignmentMerger.UnmappingReadStrategy unmappingReadStrategy) { + final List tagsToRc = new ArrayList<>(SAMRecord.TAGS_TO_REVERSE_COMPLEMENT); + final List tagsToRev = new ArrayList<>(SAMRecord.TAGS_TO_REVERSE); + tagsToRc.add("ab"); + tagsToRev.addAll(Arrays.asList("aa", "ac", "as", "ai", "af")); + final List args = new ArrayList<>(Arrays.asList( "UNMAPPED_BAM=" + unmappedBam.getAbsolutePath(), "ALIGNED_READS_ONLY=" + alignReadsOnly, @@ -1362,6 +1374,12 @@ private void doMergeAlignment(final File unmappedBam, final List alignedBa if (attributesToRetain != null) { args.add("ATTRIBUTES_TO_RETAIN=" + attributesToRetain); } + for (final String t : tagsToRc) { + args.add("ATTRIBUTES_TO_REVERSE_COMPLEMENT=" + t); + } + for (final String t : tagsToRev) { + args.add("ATTRIBUTES_TO_REVERSE=" + t); + } if (includeSecondary != null) { args.add("INCLUDE_SECONDARY_ALIGNMENTS=" + includeSecondary); } diff --git a/testdata/picard/sam/unmapped.sam b/testdata/picard/sam/unmapped.sam index b47ad2bff..eaf1337b1 100644 --- a/testdata/picard/sam/unmapped.sam +++ b/testdata/picard/sam/unmapped.sam @@ -1,12 +1,12 @@ @HD VN:1.0 SO:queryname @RG ID:0 SM:Hi,Mom! PL:ILLUMINA -both_reads_align_clip_adapter 77 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 -both_reads_align_clip_adapter 141 * 0 0 * * 0 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 -both_reads_align_clip_marked 77 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 XT:i:97 -both_reads_align_clip_marked 141 * 0 0 * * 0 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 XT:i:97 -both_reads_present_only_first_aligns 77 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 -both_reads_present_only_first_aligns 141 * 0 0 * * 0 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 -neither_read_aligns_or_present 77 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 -neither_read_aligns_or_present 141 * 0 0 * * 0 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 -read_2_too_many_gaps 77 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 -read_2_too_many_gaps 141 * 0 0 * * 0 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 +both_reads_align_clip_adapter 77 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 aa:Z:Hello ab:Z:ATTCGG ac:B:c,1,2,3 as:B:s,1,2,3 ai:B:i,1,2,3 af:B:f,1,2,3 +both_reads_align_clip_adapter 141 * 0 0 * * 0 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 aa:Z:Hello ab:Z:ATTCGG ac:B:c,1,2,3 as:B:s,1,2,3 ai:B:i,1,2,3 af:B:f,1,2,3 +both_reads_align_clip_marked 77 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 XT:i:97 aa:Z:Hello ab:Z:ATTCGG ac:B:c,1,2,3 as:B:s,1,2,3 ai:B:i,1,2,3 af:B:f,1,2,3 +both_reads_align_clip_marked 141 * 0 0 * * 0 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 XT:i:97 aa:Z:Hello ab:Z:ATTCGG ac:B:c,1,2,3 as:B:s,1,2,3 ai:B:i,1,2,3 af:B:f,1,2,3 +both_reads_present_only_first_aligns 77 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 aa:Z:Hello ab:Z:ATTCGG ac:B:c,1,2,3 as:B:s,1,2,3 ai:B:i,1,2,3 af:B:f,1,2,3 +both_reads_present_only_first_aligns 141 * 0 0 * * 0 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 aa:Z:Hello ab:Z:ATTCGG ac:B:c,1,2,3 as:B:s,1,2,3 ai:B:i,1,2,3 af:B:f,1,2,3 +neither_read_aligns_or_present 77 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 aa:Z:Hello ab:Z:ATTCGG ac:B:c,1,2,3 as:B:s,1,2,3 ai:B:i,1,2,3 af:B:f,1,2,3 +neither_read_aligns_or_present 141 * 0 0 * * 0 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 aa:Z:Hello ab:Z:ATTCGG ac:B:c,1,2,3 as:B:s,1,2,3 ai:B:i,1,2,3 af:B:f,1,2,3 +read_2_too_many_gaps 77 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 aa:Z:Hello ab:Z:ATTCGG ac:B:c,1,2,3 as:B:s,1,2,3 ai:B:i,1,2,3 af:B:f,1,2,3 +read_2_too_many_gaps 141 * 0 0 * * 0 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 aa:Z:Hello ab:Z:ATTCGG ac:B:c,1,2,3 as:B:s,1,2,3 ai:B:i,1,2,3 af:B:f,1,2,3