diff --git a/src/main/java/htsjdk/samtools/SamFileValidator.java b/src/main/java/htsjdk/samtools/SamFileValidator.java index cf18a7f8b..1bfc8db85 100644 --- a/src/main/java/htsjdk/samtools/SamFileValidator.java +++ b/src/main/java/htsjdk/samtools/SamFileValidator.java @@ -1,7 +1,7 @@ /* * The MIT License * - * Copyright (c) 2009 The Broad Institute + * Copyright (c) 2009-2016 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -286,8 +286,7 @@ private void validateSamRecordsAndQualityFormat(final Iterable samRec if (cigarIsValid) { try { validateNmTag(record, recordNumber); - } - catch (SAMException e) { + } catch (SAMException e) { if (hasValidSortOrder) { // If a CRAM file has an invalid sort order, the ReferenceFileWalker will throw a // SAMException due to an out of order request when retrieving reference bases during NM @@ -539,12 +538,12 @@ private void validateHeader(final SAMFileHeader fileHeader) { "A platform (PL) attribute was not found for read group ", readGroupID)); } - else { + else { // NB: cannot be null, so not catching a NPE try { SAMReadGroupRecord.PlatformValue.valueOf(platformValue.toUpperCase()); } catch (IllegalArgumentException e) { - addError(new SAMValidationError(Type.INVALID_PLATFORM_VALUE, + addError(new SAMValidationError(Type.INVALID_PLATFORM_VALUE, "The platform (PL) attribute (" + platformValue + ") + was not one of the valid values for read group ", readGroupID)); } diff --git a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileWalker.java b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileWalker.java index d66f0f870..ac7588818 100644 --- a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileWalker.java +++ b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileWalker.java @@ -1,7 +1,7 @@ /* * The MIT License * - * Copyright (c) 2009 The Broad Institute + * Copyright (c) 2009-2016 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -83,7 +83,7 @@ public ReferenceSequence get(final int sequenceIndex) { } referenceSequence = null; - if(referenceSequenceFile.isIndexed()) { + if(referenceSequenceFile.isIndexed() && referenceSequenceFile.getSequenceDictionary() != null) { final SAMSequenceRecord samSequenceRecord = referenceSequenceFile.getSequenceDictionary().getSequence(sequenceIndex); if(samSequenceRecord != null) { referenceSequence = referenceSequenceFile.getSequence(samSequenceRecord.getSequenceName()) ; diff --git a/src/test/java/htsjdk/samtools/ValidateSamFileTest.java b/src/test/java/htsjdk/samtools/ValidateSamFileTest.java index 33a34da3e..406024cda 100644 --- a/src/test/java/htsjdk/samtools/ValidateSamFileTest.java +++ b/src/test/java/htsjdk/samtools/ValidateSamFileTest.java @@ -1,7 +1,7 @@ /* * The MIT License * - * Copyright (c) 2009 The Broad Institute + * Copyright (c) 2009-2016 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -27,6 +27,7 @@ import htsjdk.samtools.BamIndexValidator.IndexValidationStringency; import htsjdk.samtools.metrics.MetricBase; import htsjdk.samtools.metrics.MetricsFile; +import htsjdk.samtools.reference.FastaSequenceFile; import htsjdk.samtools.reference.ReferenceSequence; import htsjdk.samtools.reference.ReferenceSequenceFile; import htsjdk.samtools.util.CloserUtil; @@ -64,6 +65,20 @@ public void testValidSamFile() throws Exception { } @Test + public void testValidCRAMFileWithoutSeqDict() throws Exception { + final File reference = new File(TEST_DATA_DIR, "nm_tag_validation.fa"); + final SamReader samReader = SamReaderFactory + .makeDefault() + .validationStringency(ValidationStringency.SILENT) + .referenceSequence(reference) + .open(new File(TEST_DATA_DIR, "nm_tag_validation.cram")); + final Histogram results = executeValidation(samReader, + new FastaSequenceFile(reference, true), + IndexValidationStringency.EXHAUSTIVE); + Assert.assertTrue(!results.isEmpty()); + } + + @Test public void testSamFileVersion1pt5() throws Exception { final SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(new File(TEST_DATA_DIR, "test_samfile_version_1pt5.bam")); final Histogram results = executeValidation(samReader, null, IndexValidationStringency.EXHAUSTIVE); @@ -231,26 +246,32 @@ public void testNmFlagValidation() throws IOException { final Histogram results = executeValidation(samBuilder.getSamReader(), new ReferenceSequenceFile() { private int index = 0; + @Override public SAMSequenceDictionary getSequenceDictionary() { return null; } + @Override public ReferenceSequence nextSequence() { final byte[] bases = new byte[10000]; Arrays.fill(bases, (byte) 'A'); return new ReferenceSequence("foo", index++, bases); } + @Override public void reset() { this.index = 0; } + @Override public boolean isIndexed() { return false; } + @Override public ReferenceSequence getSequence(final String contig) { throw new UnsupportedOperationException(); } + @Override public ReferenceSequence getSubsequenceAt(final String contig, final long start, final long stop) { throw new UnsupportedOperationException(); } diff --git a/src/test/resources/htsjdk/samtools/ValidateSamFileTest/nm_tag_validation.cram b/src/test/resources/htsjdk/samtools/ValidateSamFileTest/nm_tag_validation.cram new file mode 100644 index 000000000..57c58dfd0 Binary files /dev/null and b/src/test/resources/htsjdk/samtools/ValidateSamFileTest/nm_tag_validation.cram differ diff --git a/src/test/resources/htsjdk/samtools/ValidateSamFileTest/nm_tag_validation.fa b/src/test/resources/htsjdk/samtools/ValidateSamFileTest/nm_tag_validation.fa new file mode 100644 index 000000000..7c2ec2a88 --- /dev/null +++ b/src/test/resources/htsjdk/samtools/ValidateSamFileTest/nm_tag_validation.fa @@ -0,0 +1,71 @@ +>17 17:1-4200 +AAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAA +TGTGCTCTGGGGTCTCTGGGGTCTCACCCACGACCAACTCCCTGGGCCTGGCACCAGGGA +GCTTAACAAACATCTGTCCAGCGAATACCTGCATCCCTAGAAGTGAAGCCACCGCCCAAA +GACACGCCCATGTCCAGCTTAACCTGCATCCCTAGAAGTGAAGGCACCGCCCAAAGACAC +GCCCATGTCCAGCTTATTCTGCCCAGTTCCTCTCCAGAAAGGCTGCATGGTTGACACACA +GTGCCTGCGACAAAGCTGAATGCTATCATTTAAAAACTCCTTGCTGGTTTGAGAGGCAGA +AAATGATATCTCATAGTTGCTTTACTTTGCATATTTTAAAATTGTGACTTTCATGGCATA +AATAATACTGGTTTATTACAGAAGCACTAGAAAATGCATGTGGACAAAAGTTGGGATTAG +GAGAGAGAAATGAAGACATATGTCCACACAAAAACCTGTTCATTGCAGCTTTCTACCATC +ACCAAAAATTGCAAACAACCACACGCCCTTCAACTGGGGAACTCATCAACAACAAACTTG +TGGTTTACCCACACAATGGAAGACCACTTAGCAACAAAAAGGACCAAACTCCTGGTACAT +GCAACTGACAGATGAATCTCAAACGCATTCCTCCGTGTGAAAGAAGCCGGACTCACAGGG +CAACACACTATCTGACTGTTTCATGGGAAAGTCTGGAAACGGCAACACCATTGAGACAGA +AAACAGGTGAGTGGTTGCCTGGGGCCAGGGAACTTTCTGGGGTCATATTCTCTGTGTTGA +TTCTGGTGGTGGAAACAAGACTGTCCCAGCCTGGGTGATACAGCGAGACCCCATCTCTAC +CAAAAAATTAAAAATTAGCTGGGCATGGTGGTGCATGCCTGTAGTCCCAGCTATTCACAG +TGCTGAGGTGGGAAGATGCTTGAGCCCAGGAGTTCAAGGCTGCAATGAGCTATGATTGCG +CCACTGCACTTTGGCCTGGACAACAGAGCAAAACCCTGTCTCTAAAAAAAGAAAAGAAAA +GAAAAACTCACTGGATATGAATGATACAGGTTGAGGATCCATTATCTGAAATGCTTGGAC +CAGATGTTTTGAATTTTGGATTTTTTCATATTTTGTAATCTTTGCAGTATATTTACCAGT +TCAGCATCCCTAACTCAAAAATTCAAAAATCTGAAATCCCAAACGCGCCAATAAGCATTC +CCTTTGAGCGTCATGTCGGTGCTTGGAATGTTTGGGGTTTTGGATTTACAGCTTTGGGAC +GCTCAACCTGTACCTCAATAAACCTGATTTTAAAAAAGTTTGGGGGGATTCCCCTAAGCC +CGCCACCCGGAGACAGCGGATTTCCTTAGTTACTTACTATGCTCCTTGGCCATTTCTCTA +GGTATTGGTATATTGTGTCTGCTGTGAACTGTCCTTGGCCTGTTTGGTGACGGGTGAGGA +GCAGGGACAGAAGGGTCCTGCGTGCCCTGCCTTCACAAGCCCCTGGAAGGAAAGTTGTTT +TGGGATCTCTGCACCCTCAGCCTGGACAACTTGTGCCCATCTGGTGACCCCTCACTCAGC +CACCAGACTTCCACGACAGGCTCCAGCCTCGGCACCTTCAGCCATGGACAGTTCCGCCAG +CGTTGCCCTCTGTTCTGCTGTTTTCTCTACCAGAAGTGCCCTTCCCTCCTCACCTGACCA +CTCTGGGGAAATCCCTCAGCACCCTCCCTGAGCATACCCTACTCTGGCACAAGCCCACCC +TGCAAAGCCCCTGAGGCCCGCCCTGTGGCGTCTCTCCCTCCCTTGCTGTCAGGACAGTGG +TCCTGGCCACCGGGGCTCACGGAGCCGCCCTGTGCCGTGTACCTCTGAGCCCTCTGCACA +GTGCCTTCTGCTTGCCTGTGGCTTTGAGAAGAAACCCCTTCTGGTTATACATAAGACAGC +CAGAGAAGGGAGTTGCCCAGGGTGGCACAGCACGTTGCTGCCAGTTACTGCCATTTTCAC +GGGCATGAAATGGAGATAACAACAGGAGCGACCGCACAGGCTGCTGAGCGCGTCACACGC +AGCCATCGCGCAGCTCAGGGATATTACGTGTAACTCGACATGTCAGCGATTGTCACAGGC +ACTGCTACTCCTGGGGTTTTCCATCAAACCCTCAAGAGCTGGGCCTGGGGTCAACTTCCG +GCCTGGGGAAACTGGGGCAAGTATCACCAGAGATGAGCTTTATAAAAATAATGGTGCTAG +CTGGGCATGGTGGCTTGCACCTGTAATCCCAGCACTTTGGGAGGCCGAGCTAGGAGGATC +GTTTGAGTCCAGCAGTTTGAGACCAGCCTGGCCAATACGGCAAAACCCAGTCTCTACAAA +AAATACAAAAAACAACTAGCCAGGCGTGGTGGTGCACACCTGTAGTCCCAGCTACTCAGG +AGGCTGAGGGGGAAGGACTGCTTGAGCCCAGGAGTTTGAGGCTGCTGTGAGCTGTGATCG +CATCACTGCATTCCAGCCCGGTGACAGAGTGAGTCACTGTCTCAAAAAAGAAAGGAAGAA +ATAAAGAAAACAAATAAAAATAATAGTGCAGACAAAAGGCCTTGACCCATCTAGCTTTGG +CCCTCAGCATCAACCGCTAGATACGTCCCTCCCTTTCTTCTGGGGCACAGGTCACACTCT +CTTCCAGGTCTAGGATGCAGCTGAGGGGTGCCCCTCTTACCATCTAATCTGTGCCCTTAT +TTCCTCTGCTTTAGTGAGGAAGAGGCCCCTGGTCCATGAAGGGGCCTTTCAGAGACGGGG +ACCCCTGAGGAGCCCCGAGCAGCAGCCGTCGTGTCTCACCCAGGGTGTCTGAAACAGATG +TGGAGGTCTCGGGTGAGGCGTGGCTCAGATACAGGGAGTGGCCCACAGCTCGGCCTGTCT +TTGAAAGGCCACGTGACCTGGCCCACGGCTGGCAGGTGGGACCCAGCTGCAGGGGTCCAG +CAGCACCCACAGCAGCCACCTGTGGCAGGGAGGAGCTTGTGGTACAGTGGACAGGCCCTG +CCCAGATGGCCCCCCGCCTGCCTGTGGAAGTTGACCAGACCATCTGTCACAGCAGGTAAG +ACTCTGCTTTCTGGGCAACCCAGCAGGTGACCCTGGAATTCCTGTCCATCTGGCAGGTGG +GCATTGAAACTGGTTTAAAAATGTCACACCATAGGCCGGGCACAGTGGCTCACGCCTGTA +ATCCCAGCCCTTTGGGAGGCCAGGGTGGGTGGATCACTTGAGGTCAGGAGTTCAAGACCA +GCCTGGCCAACATGGTGAAACCCCGTCTACTAAAAATACAAAAATTAGCCTGGCGTGGTG +GCGCATGCCTGTAATCCCAGCTACTTGGGAAGCTGAGGGATGAGAACTGCTTGAACCTGG +GAGGCAGACGTTGCAGTGAGCTGAGATCACGCCACTGCACTCCAGCCTGGGCAACAGAGT +AAGACTCTGTCTCAAAAAAAAAAAAATCACACCATTTTGGCTTCAGATTGCATATCCTCC +TGCAAGGATATATACGCGTGAAATTCAAGTCAATGACAAATCAGAAGAAAAAACATATAT +ATACGCAAACCAGTATCCTACTGTGTGTGTCGTTTGTTGTGTTTTCGACAGCTGTCCGTG +TTATAATAATTCCTCTAGTTCAAATTTATTCATTTTTAACTTCATAGTACCACATTCTAC +ACACTGCCCATGTCCCCTCAAGCTTCCCCTGGCTCCTGCAACCACAAATCTACTCTCTGC +CTCTGTGGGTTGACCTATTCTGGACACGTCATAGAAATAGAGTCCTGCAACACGTGGCCG +TCTGTGTCTGGCTTCTCTCGCTTAGCATCTTGTTTCCAAGGTCCTCCCACAGTGTAGCAT +GCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAT +GGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACACACCCGCT +ACACTCCTTCTTAGGGCTGATATTCCACGCACCCGCTACACTCCTTCTTAGGGCTGATAT +TCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTT +CTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCAC diff --git a/src/test/resources/htsjdk/samtools/ValidateSamFileTest/nm_tag_validation.fa.fai b/src/test/resources/htsjdk/samtools/ValidateSamFileTest/nm_tag_validation.fa.fai new file mode 100644 index 000000000..c2112667e --- /dev/null +++ b/src/test/resources/htsjdk/samtools/ValidateSamFileTest/nm_tag_validation.fa.fai @@ -0,0 +1 @@ +17 4200 14 60 61