diff --git a/src/main/java/picard/fingerprint/CrosscheckFingerprints.java b/src/main/java/picard/fingerprint/CrosscheckFingerprints.java index b737b6d94..cf59db706 100644 --- a/src/main/java/picard/fingerprint/CrosscheckFingerprints.java +++ b/src/main/java/picard/fingerprint/CrosscheckFingerprints.java @@ -59,15 +59,15 @@ * @author Yossi Farjoun */ @CommandLineProgramProperties( - usage = "Checks if all fingerprints within a set of files appear to come from the same individual." + - "The fingerprints are calculated initially at the readgroup level (if present) but can be" + + usage = "Checks if all fingerprints within a set of files appear to come from the same individual. " + + "The fingerprints are calculated initially at the readgroup level (if present) but can be " + "\"rolled-up\" by library, sample or file, to increase power and provide results at the " + "desired resolution. Regular output is in a \"Moltenized\" format, one row per comparison. " + "In this format the output will include the LOD score and also tumor-aware LOD score which can " + - "help assess identity even in the presence of a severe LOH sample with high purity." + - "A matrix output is also availalble to facilitate visual inspection of crosscheck results." + + "help assess identity even in the presence of a severe LOH sample with high purity. " + + "A matrix output is also available to facilitate visual inspection of crosscheck results." + "\n" + - "A separate CLP, ClusterCrosscheckMetrics, can cluster the results as a connected graph" + + "A separate CLP, ClusterCrosscheckMetrics, can cluster the results as a connected graph " + "according to LOD greater than a threshold. ", usageShort = "Checks if all fingerprints appear to come from the same individual.", programGroup = Fingerprinting.class @@ -84,7 +84,7 @@ @Option(shortName = "MO", optional = true, doc = "Optional output file to write matrix of LOD scores to. This is less informative than the metrics output " + - "and only contains Normal-Normal LOD score (i.e. doesn't account for Loss of heterogeneity)." + + "and only contains Normal-Normal LOD score (i.e. doesn't account for Loss of heterogeneity). " + "It is however sometimes easier to use visually.") public File MATRIX_OUTPUT = null; @@ -100,8 +100,8 @@ "\n\n" + "LOD score 0 means equal likelihood" + "that the groups match vs. come from different individuals, negative LOD scores mean N logs more likely " + - "that the groups are from different individuals, and positive numbers mean N logs more likely that" + - " the groups are from the sample individual. ") + "that the groups are from different individuals, and positive numbers mean N logs more likely that " + + "the groups are from the sample individual. ") public double LOD_THRESHOLD = 0; @Option(doc = "Specificies which data-type should be used as the basic comparison unit. Fingerprints from readgroups can " + @@ -116,8 +116,8 @@ "marking has been overly aggressive and coverage is low.") public boolean ALLOW_DUPLICATE_READS = false; - @Option(doc = "Assumed genotyping error rate that provides a floor on the probability that a genotype comes from" + - " the expected sample.") + @Option(doc = "Assumed genotyping error rate that provides a floor on the probability that a genotype comes from " + + "the expected sample.") public double GENOTYPING_ERROR_RATE = 0.01; @Option(doc = "If true then only groups that do not relate to each other as expected will have their LODs reported.") diff --git a/src/main/java/picard/fingerprint/CrosscheckReadGroupFingerprints.java b/src/main/java/picard/fingerprint/CrosscheckReadGroupFingerprints.java index b314b7a64..a959b323c 100644 --- a/src/main/java/picard/fingerprint/CrosscheckReadGroupFingerprints.java +++ b/src/main/java/picard/fingerprint/CrosscheckReadGroupFingerprints.java @@ -93,14 +93,16 @@ protected int doWork() { EXPECT_ALL_GROUPS_TO_MATCH = EXPECT_ALL_READ_GROUPS_TO_MATCH; } - if (CROSSCHECK_LIBRARIES) { + if (CROSSCHECK_LIBRARIES) { // if cross-checking library we want matrix output CROSSCHECK_BY = CrosscheckMetric.DataType.LIBRARY; - } else if (CROSSCHECK_SAMPLES) { + MATRIX_OUTPUT = OUTPUT; + OUTPUT = new File("/dev/null"); + } else if (CROSSCHECK_SAMPLES) { // if cross-checking sample we want matrix output CROSSCHECK_BY = CrosscheckMetric.DataType.SAMPLE; - } else { - CROSSCHECK_BY = CrosscheckMetric.DataType.READGROUP; MATRIX_OUTPUT = OUTPUT; OUTPUT = new File("/dev/null"); + } else { + CROSSCHECK_BY = CrosscheckMetric.DataType.READGROUP; } return super.doWork(); diff --git a/src/test/java/picard/fingerprint/CrosscheckReadGroupFingerprintsTest.java b/src/test/java/picard/fingerprint/CrosscheckReadGroupFingerprintsTest.java deleted file mode 100644 index 4627e1617..000000000 --- a/src/test/java/picard/fingerprint/CrosscheckReadGroupFingerprintsTest.java +++ /dev/null @@ -1,271 +0,0 @@ -package picard.fingerprint; - -import htsjdk.samtools.metrics.MetricsFile; -import org.testng.Assert; -import org.testng.annotations.BeforeTest; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; -import picard.vcf.SamTestUtils; - -import java.io.*; -import java.lang.reflect.Field; -import java.nio.charset.Charset; -import java.nio.file.Files; -import java.util.*; -import java.util.stream.Stream; - -import static picard.fingerprint.FingerprintIdDetails.multipleValuesString; - -/** - * Tests for CrosscheckReadGroupFingerprints - */ -public class CrosscheckReadGroupFingerprintsTest { - - private final static File TEST_DIR = new File("testdata/picard/fingerprint/"); - private final static File HAPLOTYPE_MAP = new File(TEST_DIR, "Homo_sapiens_assembly19.haplotype_database.subset.txt"); - - static private final File NA12891_r1_sam = new File(TEST_DIR, "NA12891.over.fingerprints.r1.sam"); - static private final File NA12891_r2_sam = new File(TEST_DIR, "NA12891.over.fingerprints.r2.sam"); - - //this is a copy of a previous one, but with a different sample name - static private final File NA12891_named_NA12892_r1_sam = new File(TEST_DIR, "NA12891_named_NA12892.over.fingerprints.r1.sam"); - - static private final File NA12892_r1_sam = new File(TEST_DIR, "NA12892.over.fingerprints.r1.sam"); - static private final File NA12892_r2_sam = new File(TEST_DIR, "NA12892.over.fingerprints.r2.sam"); - - static private File NA12891_r1, NA12891_r2, NA12891_named_NA12892_r1, NA12892_r1, NA12892_r2; - - static private final int NA12891_r1_RGs = 27; - static private final int NA12891_r2_RGs = 26; - static private final int NA12892_r1_RGs = 25; - static private final int NA12892_r2_RGs = 26; - - private static final Map> lookupMap = new HashMap<>(4); - - @BeforeTest - static public void setup() throws IOException { - NA12891_r1 = SamTestUtils.createIndexedBam(NA12891_r1_sam, NA12891_r1_sam); - NA12891_r2 = SamTestUtils.createIndexedBam(NA12891_r2_sam, NA12891_r2_sam); - NA12891_named_NA12892_r1 = SamTestUtils.createIndexedBam(NA12891_named_NA12892_r1_sam, NA12891_named_NA12892_r1_sam); - NA12892_r1 = SamTestUtils.createIndexedBam(NA12892_r1_sam, NA12892_r1_sam); - NA12892_r2 = SamTestUtils.createIndexedBam(NA12892_r2_sam, NA12892_r2_sam); - - lookupMap.put(CrosscheckMetric.DataType.FILE, new ArrayList<>()); - lookupMap.get(CrosscheckMetric.DataType.FILE).addAll(Arrays.asList("LEFT_FILE", "RIGHT_FILE")); - - lookupMap.put(CrosscheckMetric.DataType.SAMPLE, new ArrayList<>()); - lookupMap.get(CrosscheckMetric.DataType.SAMPLE).addAll(Arrays.asList("LEFT_SAMPLE", "RIGHT_SAMPLE")); - lookupMap.get(CrosscheckMetric.DataType.SAMPLE).addAll(lookupMap.get(CrosscheckMetric.DataType.FILE)); - - lookupMap.put(CrosscheckMetric.DataType.LIBRARY, new ArrayList<>()); - lookupMap.get(CrosscheckMetric.DataType.LIBRARY).addAll(Arrays.asList("LEFT_LIBRARY", "RIGHT_LIBRARY")); - lookupMap.get(CrosscheckMetric.DataType.LIBRARY).addAll(lookupMap.get(CrosscheckMetric.DataType.SAMPLE)); - - lookupMap.put(CrosscheckMetric.DataType.READGROUP, new ArrayList<>()); - lookupMap.get(CrosscheckMetric.DataType.READGROUP).addAll(Arrays.asList("LEFT_RUN_BARCODE", "LEFT_LANE", - "LEFT_MOLECULAR_BARCODE_SEQUENCE","RIGHT_RUN_BARCODE", - "RIGHT_LANE", "RIGHT_MOLECULAR_BARCODE_SEQUENCE")); - lookupMap.get(CrosscheckMetric.DataType.READGROUP).addAll(lookupMap.get(CrosscheckMetric.DataType.LIBRARY)); - } - - @DataProvider(name = "bamFilesRGs") - public Object[][] bamFilesRGs() { - return new Object[][] { - {NA12891_r1, NA12891_r2, false, 0, (NA12891_r1_RGs + NA12891_r2_RGs) + 1}, - {NA12891_r1, NA12892_r1, false, 0, (NA12891_r1_RGs + NA12892_r1_RGs) + 1}, - {NA12891_r1, NA12892_r2, false, 0, (NA12891_r1_RGs + NA12892_r2_RGs) + 1}, - {NA12892_r1, NA12892_r2, false, 0, (NA12892_r1_RGs + NA12892_r2_RGs) + 1}, - {NA12892_r2, NA12891_r2, false, 0, (NA12892_r2_RGs + NA12891_r2_RGs) + 1}, - {NA12892_r2, NA12891_r1, false, 0, (NA12892_r2_RGs + NA12891_r1_RGs) + 1}, - {NA12891_r1, NA12891_r2, true, 0, (NA12891_r1_RGs + NA12891_r2_RGs) + 1}, - {NA12891_r1, NA12892_r1, true, 1, (NA12891_r1_RGs + NA12892_r1_RGs) + 1}, - {NA12891_r1, NA12892_r2, true, 1, (NA12891_r1_RGs + NA12892_r2_RGs) + 1}, - {NA12892_r1, NA12892_r2, true, 0, (NA12892_r1_RGs + NA12892_r2_RGs) + 1}, - {NA12892_r2, NA12891_r2, true, 1, (NA12892_r2_RGs + NA12891_r2_RGs) + 1}, - {NA12892_r2, NA12891_r1, true, 1, (NA12892_r2_RGs + NA12891_r1_RGs) + 1} - }; - } - - @Test(dataProvider = "bamFilesRGs") - public void testCrossCheckRGs(final File file1, final File file2, final boolean expectAllMatch, final int expectedRetVal, final int expectedNMetrics) throws IOException { - - File metrics = File.createTempFile("Fingerprinting", "NA1291.RG.crosscheck_metrics"); - metrics.deleteOnExit(); - - final String[] args = new String[]{ - "INPUT=" + file1.getAbsolutePath(), - "INPUT=" + file2.getAbsolutePath(), - "OUTPUT=" + metrics.getAbsolutePath(), - "HAPLOTYPE_MAP=" + HAPLOTYPE_MAP, - "LOD_THRESHOLD=" + -2.0, - "EXPECT_ALL_GROUPS_TO_MATCH=" + expectAllMatch - }; - - doMatrixTest(args, metrics, expectedRetVal, expectedNMetrics, CrosscheckMetric.DataType.READGROUP, expectAllMatch); - } - - @DataProvider(name = "bamFilesLBs") - public Object[][] bamFilesLBs() { - - return new Object[][]{ - {NA12891_r1, NA12891_r2, 0}, - {NA12891_r1, NA12892_r1, 0}, - {NA12892_r2, NA12891_r2, 0}, - {NA12892_r2, NA12891_r1, 0}, - {NA12891_r1, NA12891_named_NA12892_r1_sam, 1}, //error since expected match but found a mismatch - {NA12892_r1, NA12891_named_NA12892_r1_sam, 1}, //error since expected mismatch but found a match - {NA12891_r2, NA12891_named_NA12892_r1_sam, 1}, //error since expected match but found a mismatch - {NA12892_r2, NA12891_named_NA12892_r1_sam, 1}, //error since expected mismatch but found a match - }; - } - - @Test(dataProvider = "bamFilesLBs") - public void testCrossCheckLBs(final File file1, final File file2, final int expectedRetVal) throws IOException { - File metrics = File.createTempFile("Fingerprinting", "NA1291.LB.crosscheck_metrics"); - metrics.deleteOnExit(); - - final String[] args = new String[]{ - "INPUT=" + file1.getAbsolutePath(), - "INPUT=" + file2.getAbsolutePath(), - "OUTPUT=" + metrics.getAbsolutePath(), - "HAPLOTYPE_MAP=" + HAPLOTYPE_MAP, - "LOD_THRESHOLD=" + -1.0, - "CROSSCHECK_LIBRARIES=true" - }; - final int numLibs=2; - doTest(args, metrics, expectedRetVal, numLibs * (numLibs + 1) / 2, CrosscheckMetric.DataType.LIBRARY); - } - - @DataProvider(name = "bamFilesSources") - public Object[][] bamFilesSources() { - - return new Object[][]{ - {NA12891_r1, NA12891_r2, 0}, - {NA12892_r1, NA12892_r2, 0}, - {NA12891_r1, NA12892_r1, 0}, - {NA12892_r2, NA12891_r2, 0}, - {NA12892_r2, NA12891_named_NA12892_r1, 1}, // the two files contain different samples one has wrong name - {NA12891_r2, NA12891_named_NA12892_r1, 1}, // unexpected match - {NA12892_r1, NA12891_named_NA12892_r1, 1}, // the two files contain different samples one has wrong name - {NA12891_r1, NA12891_named_NA12892_r1, 1}, // unexpected match - }; - } - - @DataProvider(name = "bamFilesSMs") - public Object[][] bamFilesSMs() { - - return new Object[][] { - {NA12891_r1, NA12891_r2, 0, 1}, - {NA12891_r1, NA12892_r1, 0, 2}, - {NA12892_r2, NA12891_r2, 0, 2}, - {NA12892_r2, NA12891_named_NA12892_r1, 0, 1}, // no error since only one sample in aggregate - {NA12891_r2, NA12891_named_NA12892_r1, 1, 2}, // unexpected match - {NA12892_r1, NA12891_named_NA12892_r1, 0, 1}, // no error since only one sample in aggregate - {NA12891_r1, NA12891_named_NA12892_r1, 1, 2}, // unexpected match - }; - } - - @Test(dataProvider = "bamFilesSMs") - public void testCrossCheckSMs(final File file1, final File file2, final int expectedRetVal, final int numberOfSamples) throws IOException { - File metrics = File.createTempFile("Fingerprinting", "NA1291.SM.crosscheck_metrics"); - metrics.deleteOnExit(); - - final String[] args = new String[]{ - "INPUT=" + file1.getAbsolutePath(), - "INPUT=" + file2.getAbsolutePath(), - "OUTPUT=" + metrics.getAbsolutePath(), - "HAPLOTYPE_MAP=" + HAPLOTYPE_MAP, - "LOD_THRESHOLD=" + -1.0, - "CROSSCHECK_SAMPLES=true" - }; - doTest(args, metrics, expectedRetVal, numberOfSamples * (numberOfSamples + 1) / 2, CrosscheckMetric.DataType.SAMPLE); - } - - private void doTest(final String[] args, final File metrics, final int expectedRetVal, final int expectedNMetrics, final CrosscheckMetric.DataType expectedType) throws IOException { - doTest(args, metrics, expectedRetVal, expectedNMetrics, expectedType, false); - } - - private void doTest(final String[] args, final File metrics, final int expectedRetVal, final int expectedNMetrics, final CrosscheckMetric.DataType expectedType, final boolean expectAllMatch) throws IOException { - - final CrosscheckReadGroupFingerprints crossChecker = new CrosscheckReadGroupFingerprints(); - Assert.assertEquals(crossChecker.instanceMain(args), expectedRetVal); - - final MetricsFile> metricsOutput = new MetricsFile<>(); - metricsOutput.read(new FileReader(metrics)); - - Assert.assertFalse(metricsOutput.getMetrics().stream() - .anyMatch(m -> m.DATA_TYPE != expectedType)); - - Assert.assertFalse(metricsOutput.getMetrics().stream() - .anyMatch(m-> m.LOD_SCORE_NORMAL_TUMOR == null)); - Assert.assertFalse(metricsOutput.getMetrics().stream() - .anyMatch(m-> m.LOD_SCORE == null)); - Assert.assertFalse(metricsOutput.getMetrics().stream() - .anyMatch(m-> m.LOD_SCORE_TUMOR_NORMAL == null)); - - if (expectAllMatch) { - Assert.assertTrue(metricsOutput.getMetrics().stream() - .allMatch(m -> m.RESULT == CrosscheckMetric.FingerprintResult.INCONCLUSIVE || - m.RESULT.isMatch() == m.LEFT_SAMPLE.equals(m.RIGHT_SAMPLE))); - } else if (expectedRetVal == 0) { - Assert.assertTrue(metricsOutput.getMetrics().stream() - .allMatch(m -> m.RESULT == CrosscheckMetric.FingerprintResult.INCONCLUSIVE || - m.RESULT.isExpected())); - } else { - Assert.assertTrue(metricsOutput.getMetrics().stream() - .anyMatch(m -> !m.RESULT.isExpected())); - } - - Assert.assertEquals(metricsOutput.getMetrics().size(), expectedNMetrics); - - // at the readgroup level things aren't always conclusive... - if (!metricsOutput.getMetrics().isEmpty() && expectedType != CrosscheckMetric.DataType.READGROUP) { - Assert.assertTrue(metricsOutput.getMetrics().stream() - .anyMatch(m -> m.RESULT != CrosscheckMetric.FingerprintResult.INCONCLUSIVE)); - } - - //check that fields that should have an actual value, indeed do - for(final String fieldName : lookupMap.get(expectedType)) { - try { - final Field field = CrosscheckMetric.class.getField(fieldName); - Assert.assertTrue(metricsOutput.getMetrics().stream().allMatch(m -> { - try { - return field.get(m) != multipleValuesString && field.get(m) != null; - } catch (IllegalAccessException e) { - e.printStackTrace(); - return false; - } - })); - - } catch (NoSuchFieldException e) { - e.printStackTrace(); - assert false; - } - } - } - - private void doMatrixTest(final String[] args, final File metrics, final int expectedRetVal, final int expectedNMetrics, final CrosscheckMetric.DataType expectedType, final boolean expectAllMatch) throws IOException { - - final CrosscheckReadGroupFingerprints crossChecker = new CrosscheckReadGroupFingerprints(); - Assert.assertEquals(crossChecker.instanceMain(args), expectedRetVal); - Assert.assertTrue(metrics.canRead()); - - try (Stream lines = Files.lines(metrics.toPath(), Charset.defaultCharset())) { - long numOfLines = lines.count(); - Assert.assertEquals(numOfLines, expectedNMetrics); - } - } - - @Test - public void canWriteToDevNull() throws IOException { - File f = new File("/dev/null"); - Assert.assertTrue(f.canRead()); - - final OutputStream stream = new FileOutputStream(f); - final BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(stream)); - - writer.write("Just a test"); - writer.close(); - - } -} \ No newline at end of file