From 5d69cbcc73b3c75f6a774810519c5b3e2b93309b Mon Sep 17 00:00:00 2001 From: Ron Levine Date: Fri, 26 May 2017 17:56:13 -0400 Subject: [PATCH] ValidateSamFile returns informative error codes --- build.gradle | 2 +- src/main/java/picard/sam/ValidateSamFile.java | 164 +++++++++----- src/test/java/picard/sam/ValidateSamFileTest.java | 35 +++ src/test/java/picard/util/FifoBufferTest.java | 3 +- .../ValidateSamFile/bad/grouped-unpaired-mate.sam | 9 + .../sam/ValidateSamFile/bad/missing-rg-info.sam | 246 +++++++++++++++++++++ .../ValidateSamFile/bad/sorted-pair-missing-rg.sam | 12 + .../sam/ValidateSamFile/bad/unpaired-mate.sam | 13 ++ .../sam/ValidateSamFile/good/sorted-pair.sam | 12 + 9 files changed, 438 insertions(+), 58 deletions(-) create mode 100644 src/test/java/picard/sam/ValidateSamFileTest.java create mode 100755 testdata/picard/sam/ValidateSamFile/bad/grouped-unpaired-mate.sam create mode 100644 testdata/picard/sam/ValidateSamFile/bad/missing-rg-info.sam create mode 100644 testdata/picard/sam/ValidateSamFile/bad/sorted-pair-missing-rg.sam create mode 100644 testdata/picard/sam/ValidateSamFile/bad/unpaired-mate.sam create mode 100644 testdata/picard/sam/ValidateSamFile/good/sorted-pair.sam diff --git a/build.gradle b/build.gradle index a2da01c82..6e12edd5d 100644 --- a/build.gradle +++ b/build.gradle @@ -47,7 +47,7 @@ jacoco { toolVersion = "0.7.5.201505241946" } -final htsjdkVersion = System.getProperty('htsjdk.version', '2.9.1') +final htsjdkVersion = System.getProperty('htsjdk.version', '2.10.0') dependencies { compile 'com.google.guava:guava:15.0' diff --git a/src/main/java/picard/sam/ValidateSamFile.java b/src/main/java/picard/sam/ValidateSamFile.java index 479d4dd6d..8e7c5aa70 100644 --- a/src/main/java/picard/sam/ValidateSamFile.java +++ b/src/main/java/picard/sam/ValidateSamFile.java @@ -33,6 +33,7 @@ import htsjdk.samtools.reference.ReferenceSequenceFile; import htsjdk.samtools.reference.ReferenceSequenceFileFactory; import htsjdk.samtools.util.IOUtil; +import htsjdk.samtools.util.Log; import picard.PicardException; import picard.cmdline.CommandLineProgram; import picard.cmdline.CommandLineProgramProperties; @@ -57,6 +58,7 @@ programGroup = SamOrBam.class ) public class ValidateSamFile extends CommandLineProgram { + private static final Log log = Log.getInstance(ValidateSamFile.class); static final String USAGE_SUMMARY = "Validates a SAM or BAM file. "; static final String USAGE_DETAILS = "

This tool reports on the validity of a SAM or BAM file relative to the SAM format " + "specification. This is useful for troubleshooting errors encountered with other tools that may be caused by improper " + @@ -133,75 +135,125 @@ public static void main(final String[] args) { System.exit(new ValidateSamFile().instanceMain(args)); } + /** + * Return types for doWork() + */ + enum ReturnTypes { + FAILED(-1), // failed to complete execution + SUCCESSFUL(0), // ran successfully + WARNINGS(1), // warnings but no errors + ERRORS_WARNINGS(2), // errors and warnings + ERRORS(3); // errors but no warnings + + private final int value; + + ReturnTypes(final int value) { + this.value = value; + } + + int value() { + return value; + } + } + + /** + * Do the work after command line has been parsed. + * + * @return -1 - failed to complete execution, 0 - ran successfully without warnings or errors, 1 - warnings but no errors, + * 2 - errors and warnings, 3 - errors but no warnings + * + */ @Override protected int doWork() { - IOUtil.assertFileIsReadable(INPUT); - ReferenceSequenceFile reference = null; - if (REFERENCE_SEQUENCE != null) { - IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE); - reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE); + try { + IOUtil.assertFileIsReadable(INPUT); + ReferenceSequenceFile reference = null; + if (REFERENCE_SEQUENCE != null) { + IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE); + reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE); - } - final PrintWriter out; - if (OUTPUT != null) { - IOUtil.assertFileIsWritable(OUTPUT); - try { - out = new PrintWriter(OUTPUT); - } catch (FileNotFoundException e) { - // we already asserted this so we should not get here - throw new PicardException("Unexpected exception", e); } - } else { - out = new PrintWriter(System.out); - } + final PrintWriter out; + if (OUTPUT != null) { + IOUtil.assertFileIsWritable(OUTPUT); + try { + out = new PrintWriter(OUTPUT); + } catch (FileNotFoundException e) { + // we already asserted this so we should not get here + throw new PicardException("Unexpected exception", e); + } + } else { + out = new PrintWriter(System.out); + } - boolean result; + boolean result; - final SamReaderFactory factory = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE) - .validationStringency(ValidationStringency.SILENT) - .enable(SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS); - final SamReader samReader = factory.open(INPUT); + final SamReaderFactory factory = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE) + .validationStringency(ValidationStringency.SILENT) + .enable(SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS); + final SamReader samReader = factory.open(INPUT); - if (samReader.type() != SamReader.Type.BAM_TYPE) VALIDATE_INDEX = false; + if (samReader.type() != SamReader.Type.BAM_TYPE) VALIDATE_INDEX = false; - factory.setOption(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, VALIDATE_INDEX); - factory.reapplyOptions(samReader); + factory.setOption(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, VALIDATE_INDEX); + factory.reapplyOptions(samReader); - final SamFileValidator validator = new SamFileValidator(out, MAX_OPEN_TEMP_FILES); - validator.setErrorsToIgnore(IGNORE); + final SamFileValidator validator = new SamFileValidator(out, MAX_OPEN_TEMP_FILES); + validator.setErrorsToIgnore(IGNORE); - if (IGNORE_WARNINGS) { - validator.setIgnoreWarnings(IGNORE_WARNINGS); - } - if (MODE == Mode.SUMMARY) { - validator.setVerbose(false, 0); - } else { - validator.setVerbose(true, MAX_OUTPUT); - } - if (IS_BISULFITE_SEQUENCED) { - validator.setBisulfiteSequenced(IS_BISULFITE_SEQUENCED); - } - if (VALIDATE_INDEX) { - validator.setIndexValidationStringency(VALIDATE_INDEX ? IndexValidationStringency.EXHAUSTIVE : IndexValidationStringency.NONE); - } - if (IOUtil.isRegularPath(INPUT)) { - // Do not check termination if reading from a stream - validator.validateBamFileTermination(INPUT); - } + if (IGNORE_WARNINGS) { + validator.setIgnoreWarnings(IGNORE_WARNINGS); + } + if (MODE == Mode.SUMMARY) { + validator.setVerbose(false, 0); + } else { + validator.setVerbose(true, MAX_OUTPUT); + } + if (IS_BISULFITE_SEQUENCED) { + validator.setBisulfiteSequenced(IS_BISULFITE_SEQUENCED); + } + if (VALIDATE_INDEX) { + validator.setIndexValidationStringency(VALIDATE_INDEX ? IndexValidationStringency.EXHAUSTIVE : IndexValidationStringency.NONE); + } + if (IOUtil.isRegularPath(INPUT)) { + // Do not check termination if reading from a stream + validator.validateBamFileTermination(INPUT); + } - result = false; + result = false; - switch (MODE) { - case SUMMARY: - result = validator.validateSamFileSummary(samReader, reference); - break; - case VERBOSE: - result = validator.validateSamFileVerbose(samReader, reference); - break; - } - out.flush(); + switch (MODE) { + case SUMMARY: + result = validator.validateSamFileSummary(samReader, reference); + break; + case VERBOSE: + result = validator.validateSamFileVerbose(samReader, reference); + break; + } + out.flush(); - return result ? 0 : 1; + if (result) { + return ReturnTypes.SUCCESSFUL.value(); // ran successfully with no warnings or errors + } else { + if (validator.getNumErrors() == 0) { + if (validator.getNumWarnings() > 0) { + return ReturnTypes.WARNINGS.value(); // warnings but no errors + } else { + log.error("SAM file validation fails without warnings or errors."); + return ReturnTypes.FAILED.value(); + } + } else { + if (validator.getNumWarnings() > 0) { + return ReturnTypes.ERRORS_WARNINGS.value(); // errors and warnings + } else { + return ReturnTypes.ERRORS.value(); // errors but no warnings + } + } + } + } catch (Exception e) { + log.error(e.getMessage()); + return ReturnTypes.FAILED.value(); // failed to complete execution + } } @Override diff --git a/src/test/java/picard/sam/ValidateSamFileTest.java b/src/test/java/picard/sam/ValidateSamFileTest.java new file mode 100644 index 000000000..81db4adec --- /dev/null +++ b/src/test/java/picard/sam/ValidateSamFileTest.java @@ -0,0 +1,35 @@ +package picard.sam; + +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; +import picard.cmdline.CommandLineProgramTest; + +import java.io.File; + +public class ValidateSamFileTest extends CommandLineProgramTest { + + private static final String TEST_DATA_DIR = "testdata/picard/sam/ValidateSamFile/"; + + @Override + public String getCommandLineProgramName() { + return ValidateSamFile.class.getSimpleName(); + } + + @DataProvider + public Object[][] samFiles() { + return new Object[][] { + {"nofile", ValidateSamFile.ReturnTypes.FAILED.value()}, + {"good/sorted-pair.sam", ValidateSamFile.ReturnTypes.SUCCESSFUL.value()}, + {"bad/unpaired-mate.sam", ValidateSamFile.ReturnTypes.ERRORS.value()}, + {"bad/missing-rg-info.sam", ValidateSamFile.ReturnTypes.ERRORS_WARNINGS.value()}, + {"bad/sorted-pair-missing-rg.sam", ValidateSamFile.ReturnTypes.WARNINGS.value()} + }; + } + + @Test(dataProvider = "samFiles") + public void test(final String samFileName, final int exitStatus) { + final int validateExitStatus = runPicardCommandLine(new String[]{"I=" + new File(TEST_DATA_DIR + samFileName).getAbsolutePath()}); + Assert.assertEquals(validateExitStatus, exitStatus); + } +} diff --git a/src/test/java/picard/util/FifoBufferTest.java b/src/test/java/picard/util/FifoBufferTest.java index 4f67877aa..41b2cc5e6 100644 --- a/src/test/java/picard/util/FifoBufferTest.java +++ b/src/test/java/picard/util/FifoBufferTest.java @@ -20,7 +20,8 @@ public class FifoBufferTest { /* Simply invokes the real test method many times with different inputs. */ - @Test public void testFifoBuffer() throws IOException { + @Test + public void testFifoBuffer() throws IOException { test(1); test(2); test(3); diff --git a/testdata/picard/sam/ValidateSamFile/bad/grouped-unpaired-mate.sam b/testdata/picard/sam/ValidateSamFile/bad/grouped-unpaired-mate.sam new file mode 100755 index 000000000..82b961d5c --- /dev/null +++ b/testdata/picard/sam/ValidateSamFile/bad/grouped-unpaired-mate.sam @@ -0,0 +1,9 @@ +@HD VN:1.0 SO:unsorted +@RG ID:rg1 SM:s1 PU:rg1 PL:ILLUMINA +@RG ID:rg2 SM:s2 PU:rg2 PL:ILLUMINA +foo:record:1 77 * 0 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +foo:record:1 141 * 0 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +foo:record:2 141 * 0 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +foo:record:3 77 * 0 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:1 77 * 0 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg2 +bar:record:2 141 * 0 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg2 diff --git a/testdata/picard/sam/ValidateSamFile/bad/missing-rg-info.sam b/testdata/picard/sam/ValidateSamFile/bad/missing-rg-info.sam new file mode 100644 index 000000000..7199599b9 --- /dev/null +++ b/testdata/picard/sam/ValidateSamFile/bad/missing-rg-info.sam @@ -0,0 +1,246 @@ +@HD VN:1.5 SO:queryname +@SQ SN:20 LN:63025520 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta AS:GRCh37 M5:0dec9660ec1efaaf33281c0d5ea2560f SP:Homo Sapiens +@PG ID:GATK IndelRealigner VN:3.1-144-g00f68a3 CL:knownAlleles=[(RodBinding name=knownAlleles source=/ref/b37.dbsnp.vcf), (RodBinding name=knownAlleles2 source=/ref/b37.known_indels.vcf), (RodBinding name=knownAlleles3 source=/ref/b37.variantEvalGoldStandard.vcf)] targetIntervals=/seq/picard_aggregation/D5185/Solexa-272222/v1/1/NA12878.merged.intervals LODThresholdForCleaning=5.0 consensusDeterminationModel=USE_READS entropyThreshold=0.15 maxReadsInMemory=150000 maxIsizeForMovement=3000 maxPositionalMoveAllowed=200 maxConsensuses=30 maxReadsForConsensuses=120 maxReadsForRealignment=20000 noOriginalAlignmentTags=false nWayOut=null generate_nWayOut_md5s=false check_early=false noPGTag=false keepPGTags=false indelsFileForDebugging=null statisticsFileForDebugging=/seq/picard_aggregation/D5185/Solexa-272222/v1/1/NA12878.indel.stats SNPsFileForDebugging=null +@PG ID:MarkDuplicates PN:MarkDuplicates PP:bwamem VN:1.750(2cd40696172f5af6d80af6a794d9daa9d6d48a1e_1408563114) CL:picard.sam.MarkDuplicates INPUT=[/H0164ALXX/C1-310_2014-08-20_2014-08-23/2/Solexa-272222/H0164ALXX.2.aligned.bam] OUTPUT=/H0164ALXX/C1-310_2014-08-20_2014-08-23/2/Solexa-272222/H0164ALXX.2.aligned.duplicates_marked.bam METRICS_FILE=/H0164ALXX/C1-310_2014-08-20_2014-08-23/2/Solexa-272222/H0164ALXX.2.duplicate_metrics OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 TMP_DIR=[/local/scratch/H0164ALXX, /seq/picardtemp4/pod4/tmp/proctmp/H0164ALXX] VALIDATION_STRINGENCY=SILENT CREATE_INDEX=true CREATE_MD5_FILE=true PROGRAM_RECORD_ID=MarkDuplicates PROGRAM_GROUP_NAME=MarkDuplicates REMOVE_DUPLICATES=false ASSUME_SORTED=false MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=50000 MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=8000 SORTING_COLLECTION_SIZE_RATIO=0.25 READ_NAME_REGEX=[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).* VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 +@PG ID:bwamem PN:bwamem VN:0.7.7-r441 CL:/seq/software/picard/1.750/3rd_party/bwa_mem/bwa mem -M -t 10 -p /ref/b37.fasta /dev/stdin > /dev/stdout +@PG ID:GATK PrintReads VN:3.6-0-g89b7209 CL:readGroup=null platform=null number=-1 sample_file=[] sample_name=[] simplify=false no_pg_tag=false +H0164ALXX140820:2:1101:17727:54981 83 20 10000954 48 151M = 10000786 -319 TAATATTTGTAACTTACAATTACTTCAACTGAATAATAAAAGAATTGGACTAGATTTCTCCAACATCTCTCTCTTTTGGCTTTATGTTAGATAATGCTAAATTTTCATCATATCCAAACATGCTATATAATTTTATGAACTGTTACAGAGT A-