From 3418ea0df3160847d83f780eb63354d0b89de209 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Thu, 20 Apr 2017 10:58:20 -0400 Subject: [PATCH 1/7] - added a "unknown" enum value to the SortOrder enum. It behaves exactly like unsorted, but the sam-spec allows for it. - wrapped getSortOrder() with a try-catch to translate any unknown values to 'unknown' --- src/main/java/htsjdk/samtools/SAMFileHeader.java | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/main/java/htsjdk/samtools/SAMFileHeader.java b/src/main/java/htsjdk/samtools/SAMFileHeader.java index b94598185..9acf660bf 100644 --- a/src/main/java/htsjdk/samtools/SAMFileHeader.java +++ b/src/main/java/htsjdk/samtools/SAMFileHeader.java @@ -24,6 +24,7 @@ package htsjdk.samtools; +import htsjdk.samtools.util.Log; import htsjdk.samtools.util.StringLineReader; import java.io.StringWriter; @@ -50,6 +51,7 @@ public static final Set ACCEPTABLE_VERSIONS = new HashSet(Arrays.asList("1.0", "1.3", "1.4", "1.5")); + private static final Log log = Log.getInstance(SAMFileHeader.class); /** * These tags are of known type, so don't need a type field in the text representation. */ @@ -65,8 +67,8 @@ * Ways in which a SAM or BAM may be sorted. */ public enum SortOrder { - unsorted(null), + unknown(null), queryname(SAMRecordQueryNameComparator.class), coordinate(SAMRecordCoordinateComparator.class), duplicate(SAMRecordDuplicateComparator.class); // NB: this is not in the SAM spec! @@ -253,7 +255,13 @@ public SortOrder getSortOrder() { if (so == null || so.equals("unknown")) { return SortOrder.unsorted; } - return SortOrder.valueOf((String) so); + try { + SortOrder sortOrder = SortOrder.valueOf(so); + return sortOrder; + } catch (IllegalArgumentException e) { + log.warn("Found non conforming header SO tag: "+ so + ". Treating as 'unknown'."); + return SortOrder.unknown; + } } public void setSortOrder(final SortOrder so) { From cb0fe3065c91d036f7e120e27544ff1639bb35e9 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Thu, 20 Apr 2017 11:25:18 -0400 Subject: [PATCH 2/7] - added an error for non-conforming tags. - added to the validation a check for both SO and GO to see that they are legal values. --- src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java | 18 ++++++++++++++++++ src/main/java/htsjdk/samtools/SAMValidationError.java | 3 +++ 2 files changed, 21 insertions(+) diff --git a/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java b/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java index 402ea3ce8..15feed2ff 100644 --- a/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java +++ b/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java @@ -228,6 +228,24 @@ private void parseHDLine(final ParsedHeaderLine parsedHeaderLine) { if (!parsedHeaderLine.requireTag(SAMFileHeader.VERSION_TAG)) { return; } + final String SOString = parsedHeaderLine.getValue(SAMFileHeader.SORT_ORDER_TAG); + try { + SAMFileHeader.SortOrder.valueOf(SOString); + } catch (IllegalArgumentException e) { + reportErrorParsingLine(HEADER_LINE_START + parsedHeaderLine.getHeaderRecordType() + + " line has non-conforming SO tag value: "+ SOString + ".", + SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE, null); + } + + final String GOString = parsedHeaderLine.getValue(SAMFileHeader.GROUP_ORDER_TAG); + try { + SAMFileHeader.GroupOrder.valueOf(GOString); + } catch (IllegalArgumentException e) { + reportErrorParsingLine(HEADER_LINE_START + parsedHeaderLine.getHeaderRecordType() + + " line has non-conforming GO tag value: "+ GOString + ".", + SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE, null); + } + transferAttributes(mFileHeader, parsedHeaderLine.mKeyValuePairs); } diff --git a/src/main/java/htsjdk/samtools/SAMValidationError.java b/src/main/java/htsjdk/samtools/SAMValidationError.java index d560b119e..452e92cf5 100644 --- a/src/main/java/htsjdk/samtools/SAMValidationError.java +++ b/src/main/java/htsjdk/samtools/SAMValidationError.java @@ -171,6 +171,9 @@ HEADER_RECORD_MISSING_REQUIRED_TAG, + /** Header tag contains illegal value */ + HEADER_TAG_NON_CONFORMING_VALUE, + /** Date string is not ISO-8601 */ INVALID_DATE_STRING(Severity.WARNING), From 0e1d1b164e515f9adb154839f5933ed6509678c3 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Thu, 20 Apr 2017 11:33:57 -0400 Subject: [PATCH 3/7] - aesthetic changes --- src/main/java/htsjdk/samtools/SAMFileHeader.java | 28 +++++++++++------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/src/main/java/htsjdk/samtools/SAMFileHeader.java b/src/main/java/htsjdk/samtools/SAMFileHeader.java index 9acf660bf..ba13fce43 100644 --- a/src/main/java/htsjdk/samtools/SAMFileHeader.java +++ b/src/main/java/htsjdk/samtools/SAMFileHeader.java @@ -49,14 +49,14 @@ public static final String GROUP_ORDER_TAG = "GO"; public static final String CURRENT_VERSION = "1.5"; public static final Set ACCEPTABLE_VERSIONS = - new HashSet(Arrays.asList("1.0", "1.3", "1.4", "1.5")); + new HashSet<>(Arrays.asList("1.0", "1.3", "1.4", "1.5")); private static final Log log = Log.getInstance(SAMFileHeader.class); /** * These tags are of known type, so don't need a type field in the text representation. */ public static final Set STANDARD_TAGS = - new HashSet(Arrays.asList(VERSION_TAG, SORT_ORDER_TAG, GROUP_ORDER_TAG)); + new HashSet<>(Arrays.asList(VERSION_TAG, SORT_ORDER_TAG, GROUP_ORDER_TAG)); @Override Set getStandardTags() { @@ -109,15 +109,15 @@ public SAMRecordComparator getComparatorInstance() { } private List mReadGroups = - new ArrayList(); - private List mProgramRecords = new ArrayList(); + new ArrayList<>(); + private List mProgramRecords = new ArrayList<>(); private final Map mReadGroupMap = - new HashMap(); - private final Map mProgramRecordMap = new HashMap(); + new HashMap<>(); + private final Map mProgramRecordMap = new HashMap<>(); private SAMSequenceDictionary mSequenceDictionary = new SAMSequenceDictionary(); - final private List mComments = new ArrayList(); + final private List mComments = new ArrayList<>(); private String textHeader; - private final List mValidationErrors = new ArrayList(); + private final List mValidationErrors = new ArrayList<>(); public SAMFileHeader() { setAttribute(VERSION_TAG, CURRENT_VERSION); @@ -130,11 +130,11 @@ public SAMFileHeader(final SAMSequenceDictionary dict) { } public String getVersion() { - return (String) getAttribute("VN"); + return getAttribute("VN"); } public String getCreator() { - return (String) getAttribute("CR"); + return getAttribute("CR"); } public SAMSequenceDictionary getSequenceDictionary() { @@ -256,8 +256,7 @@ public SortOrder getSortOrder() { return SortOrder.unsorted; } try { - SortOrder sortOrder = SortOrder.valueOf(so); - return sortOrder; + return SortOrder.valueOf(so); } catch (IllegalArgumentException e) { log.warn("Found non conforming header SO tag: "+ so + ". Treating as 'unknown'."); return SortOrder.unknown; @@ -272,7 +271,7 @@ public GroupOrder getGroupOrder() { if (getAttribute("GO") == null) { return GroupOrder.none; } - return GroupOrder.valueOf((String)getAttribute("GO")); + return GroupOrder.valueOf(getAttribute("GO")); } public void setGroupOrder(final GroupOrder go) { @@ -380,7 +379,7 @@ public String getSAMString() { public static class PgIdGenerator { private int recordCounter; - private final Set idsThatAreAlreadyTaken = new HashSet(); + private final Set idsThatAreAlreadyTaken = new HashSet<>(); public PgIdGenerator(final SAMFileHeader header) { for (final SAMProgramRecord pgRecord : header.getProgramRecords()) { @@ -408,7 +407,6 @@ public String getNonCollidingId(final String recordId) { idsThatAreAlreadyTaken.add(newId); return newId; } - } } } From 460912dc6491f93d8e158c50f4163a1920236978 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Thu, 20 Apr 2017 11:39:42 -0400 Subject: [PATCH 4/7] - lowercased variable names --- src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java b/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java index 15feed2ff..908e8360b 100644 --- a/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java +++ b/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java @@ -228,21 +228,22 @@ private void parseHDLine(final ParsedHeaderLine parsedHeaderLine) { if (!parsedHeaderLine.requireTag(SAMFileHeader.VERSION_TAG)) { return; } - final String SOString = parsedHeaderLine.getValue(SAMFileHeader.SORT_ORDER_TAG); + + final String soString = parsedHeaderLine.getValue(SAMFileHeader.SORT_ORDER_TAG); try { - SAMFileHeader.SortOrder.valueOf(SOString); + if (soString != null) SAMFileHeader.SortOrder.valueOf(soString); } catch (IllegalArgumentException e) { reportErrorParsingLine(HEADER_LINE_START + parsedHeaderLine.getHeaderRecordType() + - " line has non-conforming SO tag value: "+ SOString + ".", + " line has non-conforming SO tag value: "+ soString + ".", SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE, null); } - final String GOString = parsedHeaderLine.getValue(SAMFileHeader.GROUP_ORDER_TAG); + final String goString = parsedHeaderLine.getValue(SAMFileHeader.GROUP_ORDER_TAG); try { - SAMFileHeader.GroupOrder.valueOf(GOString); + if (goString != null) SAMFileHeader.GroupOrder.valueOf(goString); } catch (IllegalArgumentException e) { reportErrorParsingLine(HEADER_LINE_START + parsedHeaderLine.getHeaderRecordType() + - " line has non-conforming GO tag value: "+ GOString + ".", + " line has non-conforming GO tag value: "+ goString + ".", SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE, null); } From 701323163e860feec50e89a4466c989cee8acf98 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Thu, 20 Apr 2017 16:27:15 -0400 Subject: [PATCH 5/7] - added tests covering SO and GO tags --- src/test/java/htsjdk/samtools/ValidateSamFileTest.java | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/test/java/htsjdk/samtools/ValidateSamFileTest.java b/src/test/java/htsjdk/samtools/ValidateSamFileTest.java index 16bd6e1ce..292758b8c 100644 --- a/src/test/java/htsjdk/samtools/ValidateSamFileTest.java +++ b/src/test/java/htsjdk/samtools/ValidateSamFileTest.java @@ -499,10 +499,24 @@ public void duplicateReadsOutOfOrder() throws Exception { "@RG\tID:0\tSM:Hi,Mom!\n" + "E\t147\tchr1\t15\t255\t10M\t=\t2\t-30\tCAACAGAAGC\t)'.*.+2,))\tU2:Z:CAA"; + final String SOTagCorrectlyProcessTestData = + "@HD\tVN:1.0\tSO:NOTKNOWN\n" + + "@SQ\tSN:chr1\tLN:101\n" + + "@RG\tID:0\tSM:Hi,Mom!\n" + + "E\t147\tchr1\t15\t255\t10M\t=\t2\t-30\tCAACAGAAGC\t)'.*.+2,))\tU2:Z:CAA"; + + final String GOTagCorrectlyProcessTestData = + "@HD\tVN:1.0\tGO:NOTKNOWN\n" + + "@SQ\tSN:chr1\tLN:101\n" + + "@RG\tID:0\tSM:Hi,Mom!\n" + + "E\t147\tchr1\t15\t255\t10M\t=\t2\t-30\tCAACAGAAGC\t)'.*.+2,))\tU2:Z:CAA"; + return new Object[][]{ {E2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.E2_BASE_EQUALS_PRIMARY_BASE}, {E2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_E2_LENGTH}, - {U2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_U2_LENGTH} + {U2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_U2_LENGTH}, + {SOTagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE}, + {GOTagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE} }; } From e0b7ef222360f998a17772e37acc492bc8ee40dc Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Thu, 20 Apr 2017 16:52:24 -0400 Subject: [PATCH 6/7] - fixed a test that was broken for a while... --- .../java/htsjdk/samtools/cram/lossy/QualityScorePreservationTest.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/test/java/htsjdk/samtools/cram/lossy/QualityScorePreservationTest.java b/src/test/java/htsjdk/samtools/cram/lossy/QualityScorePreservationTest.java index 575485e82..a33766762 100644 --- a/src/test/java/htsjdk/samtools/cram/lossy/QualityScorePreservationTest.java +++ b/src/test/java/htsjdk/samtools/cram/lossy/QualityScorePreservationTest.java @@ -97,12 +97,10 @@ public void test2() { } } - private SAMFileHeader samFileHeader = new SAMFileHeader(); - private SAMRecord buildSAMRecord(String seqName, String line) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); try { - baos.write("@HD\tVN:1.0\tGO:none SO:coordinate\n".getBytes()); + baos.write("@HD\tVN:1.0\tGO:none\tSO:coordinate\n".getBytes()); baos.write(("@SQ\tSN:" + seqName + "\tLN:247249719\n").getBytes()); baos.write(line.replaceAll("\\s+", "\t").getBytes()); baos.close(); From 2296653fc2afb51022a9e299a2c40286ec3b4a7a Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Fri, 21 Apr 2017 07:22:44 -0400 Subject: [PATCH 7/7] - responding to review comments --- src/main/java/htsjdk/samtools/SAMFileHeader.java | 63 +++++++++++++++--------- 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/src/main/java/htsjdk/samtools/SAMFileHeader.java b/src/main/java/htsjdk/samtools/SAMFileHeader.java index ba13fce43..f2750d4cc 100644 --- a/src/main/java/htsjdk/samtools/SAMFileHeader.java +++ b/src/main/java/htsjdk/samtools/SAMFileHeader.java @@ -24,6 +24,7 @@ package htsjdk.samtools; +import htsjdk.samtools.util.CollectionUtil; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.StringLineReader; @@ -48,8 +49,10 @@ public static final String SORT_ORDER_TAG = "SO"; public static final String GROUP_ORDER_TAG = "GO"; public static final String CURRENT_VERSION = "1.5"; - public static final Set ACCEPTABLE_VERSIONS = - new HashSet<>(Arrays.asList("1.0", "1.3", "1.4", "1.5")); + public static final Set ACCEPTABLE_VERSIONS = CollectionUtil.makeSet("1.0", "1.3", "1.4", "1.5"); + + private SortOrder sortOrder = null; + private GroupOrder groupOrder = null; private static final Log log = Log.getInstance(SAMFileHeader.class); /** @@ -68,10 +71,10 @@ */ public enum SortOrder { unsorted(null), - unknown(null), queryname(SAMRecordQueryNameComparator.class), coordinate(SAMRecordCoordinateComparator.class), - duplicate(SAMRecordDuplicateComparator.class); // NB: this is not in the SAM spec! + duplicate(SAMRecordDuplicateComparator.class), // NB: this is not in the SAM spec! + unknown(null); private final Class comparator; @@ -108,11 +111,9 @@ public SAMRecordComparator getComparatorInstance() { none, query, reference } - private List mReadGroups = - new ArrayList<>(); + private List mReadGroups = new ArrayList<>(); private List mProgramRecords = new ArrayList<>(); - private final Map mReadGroupMap = - new HashMap<>(); + private final Map mReadGroupMap = new HashMap<>(); private final Map mProgramRecordMap = new HashMap<>(); private SAMSequenceDictionary mSequenceDictionary = new SAMSequenceDictionary(); final private List mComments = new ArrayList<>(); @@ -130,7 +131,7 @@ public SAMFileHeader(final SAMSequenceDictionary dict) { } public String getVersion() { - return getAttribute("VN"); + return getAttribute(VERSION_TAG); } public String getCreator() { @@ -251,31 +252,47 @@ public SAMProgramRecord createProgramRecord() { } public SortOrder getSortOrder() { - final String so = getAttribute("SO"); - if (so == null || so.equals("unknown")) { - return SortOrder.unsorted; - } - try { - return SortOrder.valueOf(so); - } catch (IllegalArgumentException e) { - log.warn("Found non conforming header SO tag: "+ so + ". Treating as 'unknown'."); - return SortOrder.unknown; + if (sortOrder == null) { + final String so = getAttribute(SORT_ORDER_TAG); + if (so == null) { + sortOrder = SortOrder.unsorted; + } else { + try { + return SortOrder.valueOf(so); + } catch (IllegalArgumentException e) { + log.warn("Found non conforming header SO tag: " + so + ". Treating as 'unknown'."); + sortOrder = SortOrder.unknown; + } + } } + return sortOrder; } public void setSortOrder(final SortOrder so) { - setAttribute("SO", so.name()); + sortOrder = so; + setAttribute(SORT_ORDER_TAG, so.name()); } public GroupOrder getGroupOrder() { - if (getAttribute("GO") == null) { - return GroupOrder.none; + if (groupOrder == null) { + final String go = getAttribute(GROUP_ORDER_TAG); + if (go == null) { + groupOrder = GroupOrder.none; + } else { + try { + return GroupOrder.valueOf(go); + } catch (IllegalArgumentException e) { + log.warn("Found non conforming header GO tag: " + go + ". Treating as 'none'."); + groupOrder = GroupOrder.none; + } + } } - return GroupOrder.valueOf(getAttribute("GO")); + return groupOrder; } public void setGroupOrder(final GroupOrder go) { - setAttribute("GO", go.name()); + groupOrder = go; + setAttribute(GROUP_ORDER_TAG, go.name()); } /**