diff --git a/src/main/java/picard/illumina/CollectIlluminaLaneMetrics.java b/src/main/java/picard/illumina/CollectIlluminaLaneMetrics.java index 913180c63..fdbd4f8d7 100644 --- a/src/main/java/picard/illumina/CollectIlluminaLaneMetrics.java +++ b/src/main/java/picard/illumina/CollectIlluminaLaneMetrics.java @@ -24,6 +24,7 @@ package picard.illumina; +import htsjdk.samtools.ValidationStringency; import htsjdk.samtools.metrics.MetricBase; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.util.IOUtil; @@ -118,7 +119,7 @@ protected int doWork() { } } - IlluminaLaneMetricsCollector.collectLaneMetrics(RUN_DIRECTORY, OUTPUT_DIRECTORY, OUTPUT_PREFIX, laneMetricsFile, phasingMetricsFile, READ_STRUCTURE, FILE_EXTENSION == null ? "" : FILE_EXTENSION); + IlluminaLaneMetricsCollector.collectLaneMetrics(RUN_DIRECTORY, OUTPUT_DIRECTORY, OUTPUT_PREFIX, laneMetricsFile, phasingMetricsFile, READ_STRUCTURE, FILE_EXTENSION == null ? "" : FILE_EXTENSION, VALIDATION_STRINGENCY); return 0; } @@ -134,10 +135,13 @@ public static void main(final String[] args) { private final static Log LOG = Log.getInstance(IlluminaLaneMetricsCollector.class); /** Returns a partitioned collection of lane number to Tile objects from the provided basecall directory. */ - public static Map> readLaneTiles(final File illuminaRunDirectory, final ReadStructure readStructure) { + public static Map> readLaneTiles(final File illuminaRunDirectory, final ReadStructure readStructure, final ValidationStringency validationStringency) { final Collection tiles; try { - tiles = TileMetricsUtil.parseTileMetrics(TileMetricsUtil.renderTileMetricsFileFromBasecallingDirectory(illuminaRunDirectory), readStructure); + tiles = TileMetricsUtil.parseTileMetrics(TileMetricsUtil.renderTileMetricsFileFromBasecallingDirectory(illuminaRunDirectory), + readStructure, + validationStringency + ); } catch (final FileNotFoundException e) { throw new PicardException("Unable to open laneMetrics file.", e); } @@ -149,8 +153,9 @@ public static void main(final String[] args) { public static void collectLaneMetrics(final File runDirectory, final File outputDirectory, final String outputPrefix, final MetricsFile> laneMetricsFile, final MetricsFile> phasingMetricsFile, - final ReadStructure readStructure, final String fileExtension) { - final Map> laneTiles = readLaneTiles(runDirectory, readStructure); + final ReadStructure readStructure, final String fileExtension, + final ValidationStringency validationStringency) { + final Map> laneTiles = readLaneTiles(runDirectory, readStructure, validationStringency); writeLaneMetrics(laneTiles, outputDirectory, outputPrefix, laneMetricsFile, fileExtension); writePhasingMetrics(laneTiles, outputDirectory, outputPrefix, phasingMetricsFile, fileExtension); } @@ -189,10 +194,10 @@ private static double calculateLaneDensityFromTiles(final Collection tiles double area = 0; double clusters = 0; for (final Tile tile : tiles) { - area += (tile.getClusterCount() / tile.getClusterDensity()); + if (tile.getClusterDensity() > 0) area += (tile.getClusterCount() / tile.getClusterDensity()); clusters += tile.getClusterCount(); } - return clusters / area; + return (area > 0) ? clusters / area : 0.0; } } } diff --git a/src/main/java/picard/illumina/parser/TileMetricsUtil.java b/src/main/java/picard/illumina/parser/TileMetricsUtil.java index 35863d3b2..656c7be86 100644 --- a/src/main/java/picard/illumina/parser/TileMetricsUtil.java +++ b/src/main/java/picard/illumina/parser/TileMetricsUtil.java @@ -24,9 +24,12 @@ package picard.illumina.parser; +import htsjdk.samtools.ValidationStringency; import htsjdk.samtools.util.CollectionUtil; import htsjdk.samtools.util.IterableAdapter; +import htsjdk.samtools.util.Log; import picard.PicardException; +import picard.illumina.CollectIlluminaLaneMetrics; import picard.illumina.parser.readers.TileMetricsOutReader; import picard.illumina.parser.readers.TileMetricsOutReader.IlluminaTileMetrics; @@ -54,6 +57,8 @@ /** The expected name of the tile metrics output file. */ public static String TILE_METRICS_OUT_FILE_NAME = "TileMetricsOut.bin"; + private final static Log LOG = Log.getInstance(TileMetricsUtil.class); + /** Returns the path to the TileMetrics file given the basecalling directory. */ public static File renderTileMetricsFileFromBasecallingDirectory(final File illuminaRunDirectory) { return new File(new File(illuminaRunDirectory, INTEROP_SUBDIRECTORY_NAME), TILE_METRICS_OUT_FILE_NAME); @@ -68,7 +73,8 @@ public static File renderTileMetricsFileFromBasecallingDirectory(final File illu * - Phasing & Prephasing for first template read (if available) * - Phasing & Prephasing for second template read (if available) */ - public static Collection parseTileMetrics(final File tileMetricsOutFile, final ReadStructure readStructure) throws FileNotFoundException { + public static Collection parseTileMetrics(final File tileMetricsOutFile, final ReadStructure readStructure, + final ValidationStringency validationStringency) throws FileNotFoundException { // Get the tile metrics lines from TileMetricsOut, keeping only the last value for any Lane/Tile/Code combination final Collection tileMetrics = determineLastValueForLaneTileMetricsCode(new TileMetricsOutReader (tileMetricsOutFile)); @@ -91,7 +97,7 @@ public static File renderTileMetricsFileFromBasecallingDirectory(final File illu final IlluminaTileMetrics clusterRecord = CollectionUtil.getSoleElement(codeMetricsMap.get(IlluminaMetricsCode.CLUSTER_ID.getMetricsCode())); // Snag the phasing data for each read in the read structure. For both types of phasing values, this is the median of all of the individual values seen - final Collection tilePhasingValues = getTilePhasingValues(codeMetricsMap, readStructure); + final Collection tilePhasingValues = getTilePhasingValues(codeMetricsMap, readStructure, validationStringency); tiles.add(new Tile(densityRecord.getLaneNumber(), densityRecord.getTileNumber(), densityRecord.getMetricValue(), clusterRecord.getMetricValue(), tilePhasingValues.toArray(new TilePhasingValue[tilePhasingValues.size()]))); @@ -101,7 +107,7 @@ public static File renderTileMetricsFileFromBasecallingDirectory(final File illu } /** Pulls out the phasing & prephasing value for the template reads and returns a collection of TilePhasingValues representing these */ - private static Collection getTilePhasingValues(final Map> codeMetricsMap, final ReadStructure readStructure) { + private static Collection getTilePhasingValues(final Map> codeMetricsMap, final ReadStructure readStructure, final ValidationStringency validationStringency) { boolean isFirstRead = true; final Collection tilePhasingValues = new ArrayList<>(); for (int descriptorIndex = 0; descriptorIndex < readStructure.descriptors.size(); descriptorIndex++) { @@ -111,13 +117,32 @@ public static File renderTileMetricsFileFromBasecallingDirectory(final File illu final int phasingCode = IlluminaMetricsCode.getPhasingCode(descriptorIndex, IlluminaMetricsCode.PHASING_BASE); final int prePhasingCode = IlluminaMetricsCode.getPhasingCode(descriptorIndex, IlluminaMetricsCode.PREPHASING_BASE); - if (!(codeMetricsMap.containsKey(phasingCode) && codeMetricsMap.containsKey(prePhasingCode))) { - throw new PicardException("Don't have both phasing and prephasing values for tile"); + final float phasingValue, prePhasingValue; + + // If both the phasing and pre-phasing data are missing, then likely something went wrong when imaging + // this tile, for example a grain of sand disrupting the path of light to the sensor. If only one of them + // is missing, then likely the data is corrupt. + if (codeMetricsMap.containsKey(phasingCode) && codeMetricsMap.containsKey(prePhasingCode)) { + phasingValue = CollectionUtil.getSoleElement(codeMetricsMap.get(phasingCode)).getMetricValue(); + prePhasingValue = CollectionUtil.getSoleElement(codeMetricsMap.get(prePhasingCode)).getMetricValue(); + } else { + final String message = String.format( + "Don't have both phasing and prephasing values for %s read cycle %s. Phasing code was %d and prephasing code was %d.", + tileTemplateRead.toString(), descriptorIndex + 1, phasingCode, prePhasingCode + ); + if (!codeMetricsMap.containsKey(phasingCode) && !codeMetricsMap.containsKey(prePhasingCode) && validationStringency != ValidationStringency.STRICT) { + // Ignore the error, and use the default (zero) for the phasing values + if (validationStringency == ValidationStringency.LENIENT) { + LOG.warn(message); + } + } else { + throw new PicardException(message); + } + phasingValue = 0; + prePhasingValue = 0; } - tilePhasingValues.add(new TilePhasingValue(tileTemplateRead, - CollectionUtil.getSoleElement(codeMetricsMap.get(phasingCode)).getMetricValue(), - CollectionUtil.getSoleElement(codeMetricsMap.get(prePhasingCode)).getMetricValue())); + tilePhasingValues.add(new TilePhasingValue(tileTemplateRead, phasingValue, prePhasingValue)); isFirstRead = false; } } diff --git a/src/test/java/picard/illumina/IlluminaLaneMetricsCollectorTest.java b/src/test/java/picard/illumina/IlluminaLaneMetricsCollectorTest.java index 3910852f4..bf6b86d0d 100644 --- a/src/test/java/picard/illumina/IlluminaLaneMetricsCollectorTest.java +++ b/src/test/java/picard/illumina/IlluminaLaneMetricsCollectorTest.java @@ -1,17 +1,23 @@ package picard.illumina; +import htsjdk.samtools.ValidationStringency; import htsjdk.samtools.util.IOUtil; +import picard.PicardException; import picard.illumina.parser.ReadStructure; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.io.File;import java.lang.Exception;import java.lang.Object;import java.lang.String; +import java.io.File; +import java.io.IOException; +import java.lang.Exception;import java.lang.Object;import java.lang.String; +import java.nio.file.Files; import java.util.Arrays; /** @author mccowan */ public class IlluminaLaneMetricsCollectorTest { final static File TEST_DIRECTORY = new File("testdata/picard/illumina/IlluminaLaneMetricsCollectorTest"); final static File TILE_RUN_DIRECTORY = new File(TEST_DIRECTORY, "tileRuns"); + final static File TEST_MISSING_PHASING_DIRECTORY = new File(TEST_DIRECTORY, "missing_phasing"); private static File buildOutputFile(final File directory, final String prefix, final String extension) { return new File(directory, String.format("%s.%s", prefix, extension)); @@ -56,9 +62,9 @@ public void testCollectIlluminaLaneMetrics(final String testRun, final ReadStruc if (useReadStructure) clp.READ_STRUCTURE = readStructure; clp.doWork(); - final File phasingMetricsPhile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaPhasingMetrics.getExtension()); - final File canonicalPhasingPhile = buildOutputFile(runDirectory, testRun, IlluminaPhasingMetrics.getExtension()); - IOUtil.assertFilesEqual(canonicalPhasingPhile, phasingMetricsPhile); + final File phasingMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaPhasingMetrics.getExtension()); + final File canonicalPhasingFile = buildOutputFile(runDirectory, testRun, IlluminaPhasingMetrics.getExtension()); + IOUtil.assertFilesEqual(canonicalPhasingFile, phasingMetricsFile); final File laneMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaLaneMetrics.getExtension()); final File canonicalLaneFile = buildOutputFile(runDirectory, testRun, IlluminaLaneMetrics.getExtension()); @@ -77,4 +83,54 @@ public void testCollectIlluminaLaneMetrics(final String testRun, final ReadStruc {"A67HY", new ReadStructure("8B8B")} }; } + + /** Ensures that an exception is thrown when we encounter a tile without phasing/pre-phasing metrics. */ + @Test(expectedExceptions = PicardException.class) + public void testMissingPhasingValuesStrict() { + final ReadStructure readStructure = new ReadStructure("151T8B8B151T"); + for (final boolean useReadStructure : Arrays.asList(true, false)) { + final File runDirectory = TEST_MISSING_PHASING_DIRECTORY; + final CollectIlluminaLaneMetrics clp = new CollectIlluminaLaneMetrics(); + clp.OUTPUT_DIRECTORY = IOUtil.createTempDir("illuminaLaneMetricsCollectorTest", null); + clp.RUN_DIRECTORY = runDirectory; + clp.OUTPUT_PREFIX = "test"; + clp.VALIDATION_STRINGENCY = ValidationStringency.STRICT; + if (useReadStructure) clp.READ_STRUCTURE = readStructure; + clp.doWork(); + + final File phasingMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaPhasingMetrics.getExtension()); + final File canonicalPhasingFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaPhasingMetrics.getExtension()); + IOUtil.assertFilesEqual(canonicalPhasingFile, phasingMetricsFile); + + final File laneMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaLaneMetrics.getExtension()); + final File canonicalLaneFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaLaneMetrics.getExtension()); + IOUtil.assertFilesEqual(canonicalLaneFile, laneMetricsFile); + IOUtil.deleteDirectoryTree(clp.OUTPUT_DIRECTORY); + } + } + + /** Silently continue if we encounter a tile without phasing/pre-phasing metrics. */ + @Test + public void testMissingPhasingValuesSilent() throws IOException { + final ReadStructure readStructure = new ReadStructure("151T8B8B151T"); + for (final boolean useReadStructure : Arrays.asList(true, false)) { + final File runDirectory = TEST_MISSING_PHASING_DIRECTORY; + final CollectIlluminaLaneMetrics clp = new CollectIlluminaLaneMetrics(); + clp.OUTPUT_DIRECTORY = IOUtil.createTempDir("illuminaLaneMetricsCollectorTest", null); + clp.RUN_DIRECTORY = runDirectory; + clp.OUTPUT_PREFIX = "test"; + clp.VALIDATION_STRINGENCY = ValidationStringency.SILENT; + if (useReadStructure) clp.READ_STRUCTURE = readStructure; + clp.doWork(); + + final File phasingMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaPhasingMetrics.getExtension()); + final File canonicalPhasingFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaPhasingMetrics.getExtension()); + IOUtil.assertFilesEqual(canonicalPhasingFile, phasingMetricsFile); + + final File laneMetricsFile = buildOutputFile(clp.OUTPUT_DIRECTORY, clp.OUTPUT_PREFIX, IlluminaLaneMetrics.getExtension()); + final File canonicalLaneFile = buildOutputFile(runDirectory, runDirectory.getName(), IlluminaLaneMetrics.getExtension()); + IOUtil.assertFilesEqual(canonicalLaneFile, laneMetricsFile); + IOUtil.deleteDirectoryTree(clp.OUTPUT_DIRECTORY); + } + } } diff --git a/src/test/java/picard/sam/SplitSamByLibraryTest.java b/src/test/java/picard/sam/SplitSamByLibraryTest.java index 67ec32bd7..06a3989dc 100755 --- a/src/test/java/picard/sam/SplitSamByLibraryTest.java +++ b/src/test/java/picard/sam/SplitSamByLibraryTest.java @@ -55,7 +55,7 @@ public void basicPositiveTest() { Assert.assertEquals(splitter.doWork(), 0, "SAM file split should have succeeded but didn't."); File f = new File("unknown.sam"); - Assert.assertTrue(f.exists(), "uknown.sam should exist but doesn't"); + Assert.assertTrue(f.exists(), "unknown.sam should exist but doesn't"); Assert.assertEquals(countReads(f), 2, "unknown.sam has the wrong number of reads"); f.delete(); @@ -83,7 +83,7 @@ public void testNoUnknownFile() { // The unknown file should exist and have two reads File f = new File("unknown.sam"); - Assert.assertFalse(f.exists(), "uknown.sam should not exist but does"); + Assert.assertFalse(f.exists(), "unknown.sam should not exist but does"); if (f.exists()) f.delete(); f = new File("lib-1.sam"); diff --git a/testdata/picard/illumina/IlluminaLaneMetricsCollectorTest/missing_phasing/InterOp/TileMetricsOut.bin b/testdata/picard/illumina/IlluminaLaneMetricsCollectorTest/missing_phasing/InterOp/TileMetricsOut.bin new file mode 100644 index 000000000..8c10c8f85 Binary files /dev/null and b/testdata/picard/illumina/IlluminaLaneMetricsCollectorTest/missing_phasing/InterOp/TileMetricsOut.bin differ diff --git a/testdata/picard/illumina/IlluminaLaneMetricsCollectorTest/missing_phasing/RunInfo.xml b/testdata/picard/illumina/IlluminaLaneMetricsCollectorTest/missing_phasing/RunInfo.xml new file mode 100644 index 000000000..3569087a8 --- /dev/null +++ b/testdata/picard/illumina/IlluminaLaneMetricsCollectorTest/missing_phasing/RunInfo.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/testdata/picard/illumina/IlluminaLaneMetricsCollectorTest/missing_phasing/missing_phasing.illumina_lane_metrics b/testdata/picard/illumina/IlluminaLaneMetricsCollectorTest/missing_phasing/missing_phasing.illumina_lane_metrics new file mode 100644 index 000000000..48b063e55 --- /dev/null +++ b/testdata/picard/illumina/IlluminaLaneMetricsCollectorTest/missing_phasing/missing_phasing.illumina_lane_metrics @@ -0,0 +1,9 @@ + +## METRICS CLASS picard.illumina.IlluminaLaneMetrics +CLUSTER_DENSITY LANE +148913.651644 1 +144193.648422 2 +148975.949702 3 +145278.617619 4 + + diff --git a/testdata/picard/illumina/IlluminaLaneMetricsCollectorTest/missing_phasing/missing_phasing.illumina_phasing_metrics b/testdata/picard/illumina/IlluminaLaneMetricsCollectorTest/missing_phasing/missing_phasing.illumina_phasing_metrics new file mode 100644 index 000000000..4edd34fea --- /dev/null +++ b/testdata/picard/illumina/IlluminaLaneMetricsCollectorTest/missing_phasing/missing_phasing.illumina_phasing_metrics @@ -0,0 +1,13 @@ + +## METRICS CLASS picard.illumina.IlluminaPhasingMetrics +LANE TYPE_NAME PHASING_APPLIED PREPHASING_APPLIED +1 FIRST 0.068819 0.052681 +1 SECOND 0.133818 0.099066 +2 FIRST 0.062264 0.053854 +2 SECOND 0.130112 0.10689 +3 FIRST 0.068925 0.052412 +3 SECOND 0.1306 0.101055 +4 FIRST 0.054477 0.045521 +4 SECOND 0.128735 0.095261 + +