diff --git a/src/main/java/htsjdk/samtools/SAMFileHeader.java b/src/main/java/htsjdk/samtools/SAMFileHeader.java index b94598185..f2750d4cc 100644 --- a/src/main/java/htsjdk/samtools/SAMFileHeader.java +++ b/src/main/java/htsjdk/samtools/SAMFileHeader.java @@ -24,6 +24,8 @@ package htsjdk.samtools; +import htsjdk.samtools.util.CollectionUtil; +import htsjdk.samtools.util.Log; import htsjdk.samtools.util.StringLineReader; import java.io.StringWriter; @@ -47,14 +49,17 @@ public static final String SORT_ORDER_TAG = "SO"; public static final String GROUP_ORDER_TAG = "GO"; public static final String CURRENT_VERSION = "1.5"; - public static final Set ACCEPTABLE_VERSIONS = - new HashSet(Arrays.asList("1.0", "1.3", "1.4", "1.5")); + public static final Set ACCEPTABLE_VERSIONS = CollectionUtil.makeSet("1.0", "1.3", "1.4", "1.5"); + private SortOrder sortOrder = null; + private GroupOrder groupOrder = null; + + private static final Log log = Log.getInstance(SAMFileHeader.class); /** * These tags are of known type, so don't need a type field in the text representation. */ public static final Set STANDARD_TAGS = - new HashSet(Arrays.asList(VERSION_TAG, SORT_ORDER_TAG, GROUP_ORDER_TAG)); + new HashSet<>(Arrays.asList(VERSION_TAG, SORT_ORDER_TAG, GROUP_ORDER_TAG)); @Override Set getStandardTags() { @@ -65,11 +70,11 @@ * Ways in which a SAM or BAM may be sorted. */ public enum SortOrder { - unsorted(null), queryname(SAMRecordQueryNameComparator.class), coordinate(SAMRecordCoordinateComparator.class), - duplicate(SAMRecordDuplicateComparator.class); // NB: this is not in the SAM spec! + duplicate(SAMRecordDuplicateComparator.class), // NB: this is not in the SAM spec! + unknown(null); private final Class comparator; @@ -106,16 +111,14 @@ public SAMRecordComparator getComparatorInstance() { none, query, reference } - private List mReadGroups = - new ArrayList(); - private List mProgramRecords = new ArrayList(); - private final Map mReadGroupMap = - new HashMap(); - private final Map mProgramRecordMap = new HashMap(); + private List mReadGroups = new ArrayList<>(); + private List mProgramRecords = new ArrayList<>(); + private final Map mReadGroupMap = new HashMap<>(); + private final Map mProgramRecordMap = new HashMap<>(); private SAMSequenceDictionary mSequenceDictionary = new SAMSequenceDictionary(); - final private List mComments = new ArrayList(); + final private List mComments = new ArrayList<>(); private String textHeader; - private final List mValidationErrors = new ArrayList(); + private final List mValidationErrors = new ArrayList<>(); public SAMFileHeader() { setAttribute(VERSION_TAG, CURRENT_VERSION); @@ -128,11 +131,11 @@ public SAMFileHeader(final SAMSequenceDictionary dict) { } public String getVersion() { - return (String) getAttribute("VN"); + return getAttribute(VERSION_TAG); } public String getCreator() { - return (String) getAttribute("CR"); + return getAttribute("CR"); } public SAMSequenceDictionary getSequenceDictionary() { @@ -249,26 +252,47 @@ public SAMProgramRecord createProgramRecord() { } public SortOrder getSortOrder() { - final String so = getAttribute("SO"); - if (so == null || so.equals("unknown")) { - return SortOrder.unsorted; + if (sortOrder == null) { + final String so = getAttribute(SORT_ORDER_TAG); + if (so == null) { + sortOrder = SortOrder.unsorted; + } else { + try { + return SortOrder.valueOf(so); + } catch (IllegalArgumentException e) { + log.warn("Found non conforming header SO tag: " + so + ". Treating as 'unknown'."); + sortOrder = SortOrder.unknown; + } + } } - return SortOrder.valueOf((String) so); + return sortOrder; } public void setSortOrder(final SortOrder so) { - setAttribute("SO", so.name()); + sortOrder = so; + setAttribute(SORT_ORDER_TAG, so.name()); } public GroupOrder getGroupOrder() { - if (getAttribute("GO") == null) { - return GroupOrder.none; + if (groupOrder == null) { + final String go = getAttribute(GROUP_ORDER_TAG); + if (go == null) { + groupOrder = GroupOrder.none; + } else { + try { + return GroupOrder.valueOf(go); + } catch (IllegalArgumentException e) { + log.warn("Found non conforming header GO tag: " + go + ". Treating as 'none'."); + groupOrder = GroupOrder.none; + } + } } - return GroupOrder.valueOf((String)getAttribute("GO")); + return groupOrder; } public void setGroupOrder(final GroupOrder go) { - setAttribute("GO", go.name()); + groupOrder = go; + setAttribute(GROUP_ORDER_TAG, go.name()); } /** @@ -372,7 +396,7 @@ public String getSAMString() { public static class PgIdGenerator { private int recordCounter; - private final Set idsThatAreAlreadyTaken = new HashSet(); + private final Set idsThatAreAlreadyTaken = new HashSet<>(); public PgIdGenerator(final SAMFileHeader header) { for (final SAMProgramRecord pgRecord : header.getProgramRecords()) { @@ -400,7 +424,6 @@ public String getNonCollidingId(final String recordId) { idsThatAreAlreadyTaken.add(newId); return newId; } - } } } diff --git a/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java b/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java index 402ea3ce8..908e8360b 100644 --- a/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java +++ b/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java @@ -228,6 +228,25 @@ private void parseHDLine(final ParsedHeaderLine parsedHeaderLine) { if (!parsedHeaderLine.requireTag(SAMFileHeader.VERSION_TAG)) { return; } + + final String soString = parsedHeaderLine.getValue(SAMFileHeader.SORT_ORDER_TAG); + try { + if (soString != null) SAMFileHeader.SortOrder.valueOf(soString); + } catch (IllegalArgumentException e) { + reportErrorParsingLine(HEADER_LINE_START + parsedHeaderLine.getHeaderRecordType() + + " line has non-conforming SO tag value: "+ soString + ".", + SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE, null); + } + + final String goString = parsedHeaderLine.getValue(SAMFileHeader.GROUP_ORDER_TAG); + try { + if (goString != null) SAMFileHeader.GroupOrder.valueOf(goString); + } catch (IllegalArgumentException e) { + reportErrorParsingLine(HEADER_LINE_START + parsedHeaderLine.getHeaderRecordType() + + " line has non-conforming GO tag value: "+ goString + ".", + SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE, null); + } + transferAttributes(mFileHeader, parsedHeaderLine.mKeyValuePairs); } diff --git a/src/main/java/htsjdk/samtools/SAMValidationError.java b/src/main/java/htsjdk/samtools/SAMValidationError.java index d560b119e..452e92cf5 100644 --- a/src/main/java/htsjdk/samtools/SAMValidationError.java +++ b/src/main/java/htsjdk/samtools/SAMValidationError.java @@ -171,6 +171,9 @@ HEADER_RECORD_MISSING_REQUIRED_TAG, + /** Header tag contains illegal value */ + HEADER_TAG_NON_CONFORMING_VALUE, + /** Date string is not ISO-8601 */ INVALID_DATE_STRING(Severity.WARNING), diff --git a/src/test/java/htsjdk/samtools/ValidateSamFileTest.java b/src/test/java/htsjdk/samtools/ValidateSamFileTest.java index 16bd6e1ce..292758b8c 100644 --- a/src/test/java/htsjdk/samtools/ValidateSamFileTest.java +++ b/src/test/java/htsjdk/samtools/ValidateSamFileTest.java @@ -499,10 +499,24 @@ public void duplicateReadsOutOfOrder() throws Exception { "@RG\tID:0\tSM:Hi,Mom!\n" + "E\t147\tchr1\t15\t255\t10M\t=\t2\t-30\tCAACAGAAGC\t)'.*.+2,))\tU2:Z:CAA"; + final String SOTagCorrectlyProcessTestData = + "@HD\tVN:1.0\tSO:NOTKNOWN\n" + + "@SQ\tSN:chr1\tLN:101\n" + + "@RG\tID:0\tSM:Hi,Mom!\n" + + "E\t147\tchr1\t15\t255\t10M\t=\t2\t-30\tCAACAGAAGC\t)'.*.+2,))\tU2:Z:CAA"; + + final String GOTagCorrectlyProcessTestData = + "@HD\tVN:1.0\tGO:NOTKNOWN\n" + + "@SQ\tSN:chr1\tLN:101\n" + + "@RG\tID:0\tSM:Hi,Mom!\n" + + "E\t147\tchr1\t15\t255\t10M\t=\t2\t-30\tCAACAGAAGC\t)'.*.+2,))\tU2:Z:CAA"; + return new Object[][]{ {E2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.E2_BASE_EQUALS_PRIMARY_BASE}, {E2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_E2_LENGTH}, - {U2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_U2_LENGTH} + {U2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_U2_LENGTH}, + {SOTagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE}, + {GOTagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE} }; } diff --git a/src/test/java/htsjdk/samtools/cram/lossy/QualityScorePreservationTest.java b/src/test/java/htsjdk/samtools/cram/lossy/QualityScorePreservationTest.java index 575485e82..a33766762 100644 --- a/src/test/java/htsjdk/samtools/cram/lossy/QualityScorePreservationTest.java +++ b/src/test/java/htsjdk/samtools/cram/lossy/QualityScorePreservationTest.java @@ -97,12 +97,10 @@ public void test2() { } } - private SAMFileHeader samFileHeader = new SAMFileHeader(); - private SAMRecord buildSAMRecord(String seqName, String line) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); try { - baos.write("@HD\tVN:1.0\tGO:none SO:coordinate\n".getBytes()); + baos.write("@HD\tVN:1.0\tGO:none\tSO:coordinate\n".getBytes()); baos.write(("@SQ\tSN:" + seqName + "\tLN:247249719\n").getBytes()); baos.write(line.replaceAll("\\s+", "\t").getBytes()); baos.close();