diff --git a/src/main/java/picard/sam/markduplicates/UmiAwareDuplicateSetIterator.java b/src/main/java/picard/sam/markduplicates/UmiAwareDuplicateSetIterator.java index ee1ddfa76..40def6f4b 100644 --- a/src/main/java/picard/sam/markduplicates/UmiAwareDuplicateSetIterator.java +++ b/src/main/java/picard/sam/markduplicates/UmiAwareDuplicateSetIterator.java @@ -1,7 +1,7 @@ /* * The MIT License * - * Copyright (c) 2016 The Broad Institute + * Copyright (c) 2017 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -36,11 +36,14 @@ import htsjdk.samtools.DuplicateSet; import htsjdk.samtools.DuplicateSetIterator; +import htsjdk.samtools.SAMRecord; import htsjdk.samtools.util.CloseableIterator; import picard.PicardException; import java.util.*; +import static htsjdk.samtools.util.StringUtil.hammingDistance; + /** * UmiAwareDuplicateSetIterator is an iterator that wraps a duplicate set iterator * in such a way that each duplicate set may be broken up into subsets according @@ -55,22 +58,28 @@ private final String inferredUmiTag; private final boolean allowMissingUmis; private boolean isOpen = false; + private UmiMetrics metrics; + private boolean haveWeSeenFirstRead = false; + + private long observedUmiBases = 0; /** * Creates a UMI aware duplicate set iterator * - * @param wrappedIterator UMI aware duplicate set iterator is a wrapper + * @param wrappedIterator Iterator of DuplicatesSets to use and break-up by UMI. * @param maxEditDistanceToJoin The edit distance between UMIs that will be used to union UMIs into groups - * @param umiTag The tag used in the bam file that designates the UMI - * @param assignedUmiTag The tag in the bam file that designates the assigned UMI + * @param umiTag The tag used in the bam file that designates the UMI + * @param assignedUmiTag The tag in the bam file that designates the assigned UMI */ UmiAwareDuplicateSetIterator(final DuplicateSetIterator wrappedIterator, final int maxEditDistanceToJoin, - final String umiTag, final String assignedUmiTag, final boolean allowMissingUmis) { + final String umiTag, final String assignedUmiTag, final boolean allowMissingUmis, + final UmiMetrics metrics) { this.wrappedIterator = wrappedIterator; this.maxEditDistanceToJoin = maxEditDistanceToJoin; this.umiTag = umiTag; this.inferredUmiTag = assignedUmiTag; this.allowMissingUmis = allowMissingUmis; + this.metrics = metrics; isOpen = true; nextSetsIterator = Collections.emptyIterator(); } @@ -79,18 +88,17 @@ public void close() { isOpen = false; wrappedIterator.close(); + metrics.calculateDerivedFields(); } @Override public boolean hasNext() { - if(!isOpen) { + if (!isOpen) { return false; - } - else { - if(nextSetsIterator.hasNext() || wrappedIterator.hasNext()) { + } else { + if (nextSetsIterator.hasNext() || wrappedIterator.hasNext()) { return true; - } - else { + } else { isOpen = false; return false; } @@ -119,6 +127,43 @@ private void process(final DuplicateSet set) { } final UmiGraph umiGraph = new UmiGraph(set, umiTag, inferredUmiTag, allowMissingUmis); - nextSetsIterator = umiGraph.joinUmisIntoDuplicateSets(maxEditDistanceToJoin).iterator(); + + List duplicateSets = umiGraph.joinUmisIntoDuplicateSets(maxEditDistanceToJoin); + + // Collect statistics on numbers of observed and inferred UMIs + // and total numbers of observed and inferred UMIs + for (DuplicateSet ds : duplicateSets) { + List records = ds.getRecords(); + SAMRecord representativeRead = ds.getRepresentative(); + String inferredUmi = representativeRead.getStringAttribute(inferredUmiTag); + + for (SAMRecord rec : records) { + String currentUmi = rec.getStringAttribute(umiTag); + + if (currentUmi != null) { + // All UMIs should be the same length, the code presently does not support variable length UMIs + // TODO: Add support for variable length UMIs + if (!haveWeSeenFirstRead) { + metrics.MEAN_UMI_LENGTH = currentUmi.length(); + haveWeSeenFirstRead = true; + } else { + if (metrics.MEAN_UMI_LENGTH != currentUmi.length()) { + throw new PicardException("UMIs of differing lengths were found."); + } + } + + // Update UMI metrics associated with each record + metrics.OBSERVED_BASE_ERRORS += hammingDistance(currentUmi, inferredUmi); + observedUmiBases += currentUmi.length(); + metrics.addUmiObservation(currentUmi, inferredUmi); + } + } + } + + // Update UMI metrics associated with each duplicate set + metrics.DUPLICATE_SETS_WITH_UMI += duplicateSets.size(); + metrics.DUPLICATE_SETS_IGNORING_UMI++; + + nextSetsIterator = duplicateSets.iterator(); } } diff --git a/src/main/java/picard/sam/markduplicates/UmiAwareMarkDuplicatesWithMateCigar.java b/src/main/java/picard/sam/markduplicates/UmiAwareMarkDuplicatesWithMateCigar.java index aba8799f9..bdaeec724 100644 --- a/src/main/java/picard/sam/markduplicates/UmiAwareMarkDuplicatesWithMateCigar.java +++ b/src/main/java/picard/sam/markduplicates/UmiAwareMarkDuplicatesWithMateCigar.java @@ -1,7 +1,7 @@ /* * The MIT License * - * Copyright (c) 2016 The Broad Institute + * Copyright (c) 2017 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -27,11 +27,14 @@ import htsjdk.samtools.DuplicateSet; import htsjdk.samtools.DuplicateSetIterator; import htsjdk.samtools.SAMRecordDuplicateComparator; +import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.util.*; import picard.cmdline.CommandLineProgramProperties; import picard.cmdline.Option; import picard.cmdline.programgroups.Alpha; +import java.io.File; + /** * This is a simple tool to mark duplicates making use of UMIs in the reads. * @@ -65,6 +68,10 @@ @Option(shortName = "MAX_EDIT_DISTANCE_TO_JOIN", doc = "Largest edit distance that UMIs must have in order to be considered as coming from distinct source molecules.", optional = true) public int MAX_EDIT_DISTANCE_TO_JOIN = 1; + // The UMI_METRICS file provides various statistical measurements collected about the UMIs during deduplication. + @Option(shortName = "UMI_METRICS", doc = "UMI Metrics") + public File UMI_METRICS_FILE; + @Option(shortName = "UMI_TAG_NAME", doc = "Tag name to use for UMI", optional = true) public String UMI_TAG_NAME = "RX"; @@ -78,6 +85,22 @@ public boolean ALLOW_MISSING_UMIS = false; private final Log log = Log.getInstance(UmiAwareMarkDuplicatesWithMateCigar.class); + private UmiMetrics metrics = new UmiMetrics(); + + @Override + protected int doWork() { + // Before we do anything, make sure the UMI_METRICS_FILE can be written to. + IOUtil.assertFileIsWritable(UMI_METRICS_FILE); + + // Perform Mark Duplicates work + int retval = super.doWork(); + + // Write metrics specific to UMIs + MetricsFile metricsFile = getMetricsFile(); + metricsFile.addMetric(metrics); + metricsFile.write(UMI_METRICS_FILE); + return retval; + } @Override protected CloseableIterator getDuplicateSetIterator(final SamHeaderAndIterator headerAndIterator, final SAMRecordDuplicateComparator comparator) { @@ -85,6 +108,6 @@ new DuplicateSetIterator(headerAndIterator.iterator, headerAndIterator.header, false, - comparator), MAX_EDIT_DISTANCE_TO_JOIN, UMI_TAG_NAME, ASSIGNED_UMI_TAG, ALLOW_MISSING_UMIS); + comparator), MAX_EDIT_DISTANCE_TO_JOIN, UMI_TAG_NAME, ASSIGNED_UMI_TAG, ALLOW_MISSING_UMIS, metrics); } } diff --git a/src/main/java/picard/sam/markduplicates/UmiMetrics.java b/src/main/java/picard/sam/markduplicates/UmiMetrics.java new file mode 100644 index 000000000..8c43fb8c1 --- /dev/null +++ b/src/main/java/picard/sam/markduplicates/UmiMetrics.java @@ -0,0 +1,129 @@ +/* + * The MIT License + * + * Copyright (c) 2017 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +package picard.sam.markduplicates; + +import java.util.stream.Collectors; +import htsjdk.samtools.metrics.MetricBase; +import htsjdk.samtools.util.Histogram; +import htsjdk.samtools.util.QualityUtil; +import picard.util.MathUtil; + +/** + * Metrics that are calculated during the process of marking duplicates + * within a stream of SAMRecords using the UmiAwareDuplicateSetIterator. + */ +public class UmiMetrics extends MetricBase { + private final Histogram observedUmis = new Histogram<>(); + private final Histogram inferredUmis = new Histogram<>(); + private long observedUmiBases = 0; + + /** Number of bases in each UMI */ + public double MEAN_UMI_LENGTH = 0.0; + + /** Number of different UMI sequences observed */ + public long OBSERVED_UNIQUE_UMIS = 0; + + /** Number of different inferred UMI sequences derived */ + public long INFERRED_UNIQUE_UMIS = 0; + + /** Number of errors inferred by comparing the observed and inferred UMIs */ + public long OBSERVED_BASE_ERRORS = 0; + + /** Number of duplicate sets found before taking UMIs into account */ + public long DUPLICATE_SETS_IGNORING_UMI = 0; + + /** Number of duplicate sets found after taking UMIs into account */ + public long DUPLICATE_SETS_WITH_UMI = 0; + + /** + * Entropy (in base 4) of the observed UMI sequences, indicating the + * effective number of bases in the UMIs. If this is significantly + * smaller than UMI_LENGTH, it indicates that the UMIs are not + * distributed uniformly. + */ + public double OBSERVED_UMI_ENTROPY = 0; + + /** Entropy (in base 4) of the inferred UMI sequences, indicating the + * effective number of bases in the inferred UMIs. If this is significantly + * smaller than UMI_LENGTH, it indicates that the UMIs are not + * distributed uniformly. + */ + public double INFERRED_UMI_ENTROPY = 0; + + /** Estimation of Phred scaled quality scores for UMIs */ + public double UMI_BASE_QUALITIES = 0.0; + + public UmiMetrics() {} + + public UmiMetrics(final double length, final int observedUniqueUmis, final int inferredUniqueUmis, + final int observedBaseErrors, final int duplicateSetsWithoutUmi, + final int duplicateSetsWithUmi, final double effectiveLengthOfInferredUmis, + final double effectiveLengthOfObservedUmis, final double estimatedBaseQualityOfUmis) { + MEAN_UMI_LENGTH = length; + OBSERVED_UNIQUE_UMIS = observedUniqueUmis; + INFERRED_UNIQUE_UMIS = inferredUniqueUmis; + OBSERVED_BASE_ERRORS = observedBaseErrors; + DUPLICATE_SETS_IGNORING_UMI = duplicateSetsWithoutUmi; + DUPLICATE_SETS_WITH_UMI = duplicateSetsWithUmi; + INFERRED_UMI_ENTROPY = effectiveLengthOfInferredUmis; + OBSERVED_UMI_ENTROPY = effectiveLengthOfObservedUmis; + UMI_BASE_QUALITIES = estimatedBaseQualityOfUmis; + } + + public void calculateDerivedFields() { + OBSERVED_UNIQUE_UMIS = observedUmis.size(); + INFERRED_UNIQUE_UMIS = inferredUmis.size(); + + OBSERVED_UMI_ENTROPY = effectiveNumberOfBases(observedUmis); + INFERRED_UMI_ENTROPY = effectiveNumberOfBases(inferredUmis); + + UMI_BASE_QUALITIES = QualityUtil.getPhredScoreFromErrorProbability((double) OBSERVED_BASE_ERRORS / (double) observedUmiBases); + } + + /** + * Add an observation of a UMI to the metrics + * @param observedUmi String containing the observed UMI + * @param inferredUmi String containing the UMI inferred after error correcting the observed UMI + */ + public void addUmiObservation(String observedUmi, String inferredUmi) { + observedUmis.increment(observedUmi); + inferredUmis.increment(inferredUmi); + observedUmiBases += observedUmi.length(); + } + + private double effectiveNumberOfBases(Histogram observations) { + double totalObservations = observations.getSumOfValues(); + + // Convert to log base 4 so that the entropy is now a measure + // of the effective number of DNA bases. If we used log(2.0) + // our result would be in bits. + double entropyBaseE = observations.values().stream().collect(Collectors.summingDouble( + v -> {double p = v.getValue() / totalObservations; + return -p * Math.log(p);})); + + return entropyBaseE / MathUtil.LOG_4_BASE_E; + } +} + diff --git a/src/main/java/picard/util/MathUtil.java b/src/main/java/picard/util/MathUtil.java index acaf80c75..b890c733f 100644 --- a/src/main/java/picard/util/MathUtil.java +++ b/src/main/java/picard/util/MathUtil.java @@ -41,6 +41,10 @@ /** The double value closest to 1 while still being less than 1. */ public static final double MAX_PROB_BELOW_ONE = 0.9999999999999999d; + /** Constant to convert between the natural base e and 4.0. Useful for + * entropy calculations on DNA. */ + public static final double LOG_4_BASE_E = Math.log(4.0); + /** * this function mimics the behavior of log_1p but resulting in log _base 10_ of (1+x) instead of natural log of 1+x */ diff --git a/src/test/java/picard/sam/markduplicates/UmiAwareMarkDuplicatesWithMateCigarTest.java b/src/test/java/picard/sam/markduplicates/UmiAwareMarkDuplicatesWithMateCigarTest.java index b78bbab73..eda1476d5 100644 --- a/src/test/java/picard/sam/markduplicates/UmiAwareMarkDuplicatesWithMateCigarTest.java +++ b/src/test/java/picard/sam/markduplicates/UmiAwareMarkDuplicatesWithMateCigarTest.java @@ -1,7 +1,7 @@ /* * The MIT License * - * Copyright (c) 2016 The Broad Institute + * Copyright (c) 2017 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -26,6 +26,7 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import htsjdk.samtools.util.QualityUtil; import picard.PicardException; import java.util.*; @@ -163,4 +164,80 @@ public void testBadUmis(List umis, List assignedUmi, final List< } tester.setExpectedAssignedUmis(assignedUmi).runTest(); } + + @DataProvider(name = "testUmiMetricsDataProvider") + private Object[][] testUmiMetricsDataProvider() { + + // Calculate values of metrics by hand to ensure they are right + // effectiveLength4_1 is the effective UMI length observing 5 UMIs where 4 are the same + double effectiveLength4_1 = -(4./5.)*Math.log(4./5.)/Math.log(4.) -(1./5.)*Math.log(1./5.)/Math.log(4.); + // effectiveLength4_1 is the effective UMI length observing 5 UMIs where 3 are the same and the other two are + // unique + double effectiveLength3_1_1 = -(3./5.)*Math.log(3./5.)/Math.log(4.) -2*(1./5.)*Math.log(1./5.)/Math.log(4.); + + // estimatedBaseQualityk_n is the phred scaled base quality score where k of n bases are incorrect + double estimatedBaseQuality1_20 = QualityUtil.getPhredScoreFromErrorProbability(1./20.); + double estimatedBaseQuality3_20 = QualityUtil.getPhredScoreFromErrorProbability(3./20.); + + return new Object[][]{{ + // Test basic error correction using edit distance of 1 + Arrays.asList(new String[]{"AAAA", "AAAA", "ATTA", "AAAA", "AAAT"}), // Observed UMI + Arrays.asList(new String[]{"AAAA", "AAAA", "ATTA", "AAAA", "AAAA"}), // Expected inferred UMI + Arrays.asList(new Boolean[]{false, true, false, true, true}), // Should it be marked as duplicate? + 1, // Edit Distance to Join + new UmiMetrics(4.0, // MEAN_UMI_LENGTH + 3, // OBSERVED_UNIQUE_UMIS + 2, // INFERRED_UNIQUE_UMIS + 2, // OBSERVED_BASE_ERRORS (Note: This is 2 rather than 1 because we are using paired end reads) + 2, // DUPLICATE_SETS_WITHOUT_UMI + 4, // DUPLICATE_SETS_WITH_UMI + effectiveLength4_1, // EFFECTIVE_LENGTH_OF_INFERRED_UMIS + effectiveLength3_1_1, // EFECTIVE_LENGTH_OF_OBSERVED_UMIS + estimatedBaseQuality1_20) // ESTIMATED_BASE_QUALITY_OF_UMIS + }, { + // Test basic error correction using edit distance of 2 + Arrays.asList(new String[]{"AAAA", "AAAA", "ATTA", "AAAA", "AAAT"}), + Arrays.asList(new String[]{"AAAA", "AAAA", "AAAA", "AAAA", "AAAA"}), + Arrays.asList(new Boolean[]{false, true, true, true, true}), + 2, + new UmiMetrics(4.0, // MEAN_UMI_LENGTH + 3, // OBSERVED_UNIQUE_UMIS + 1, // INFERRED_UNIQUE_UMIS + 6, // OBSERVED_BASE_ERRORS + 2, // DUPLICATE_SETS_WITHOUT_UMI + 2, // DUPLICATE_SETS_WITH_UMI + 0.0, // EFFECTIVE_LENGTH_OF_INFERRED_UMIS + effectiveLength3_1_1, // EFECTIVE_LENGTH_OF_OBSERVED_UMIS + estimatedBaseQuality3_20) // ESTIMATED_BASE_QUALITY_OF_UMIS + }, { + // Test maximum entropy (EFFECTIVE_LENGTH_OF_INFERRED_UMIS) + Arrays.asList(new String[]{"AA", "AT", "AC", "AG", "TA", "TT", "TC", "TG", "CA", "CT", "CC", "CG", "GA", "GT", "GC", "GG"}), + Arrays.asList(new String[]{"AA", "AT", "AC", "AG", "TA", "TT", "TC", "TG", "CA", "CT", "CC", "CG", "GA", "GT", "GC", "GG"}), + Arrays.asList(new Boolean[]{false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false}), + 0, + new UmiMetrics(2.0, // MEAN_UMI_LENGTH + 16, // OBSERVED_UNIQUE_UMIS + 16, // INFERRED_UNIQUE_UMIS + 0, // OBSERVED_BASE_ERRORS + 2, // DUPLICATE_SETS_WITHOUT_UMI + 32, // DUPLICATE_SETS_WITH_UMI + 2.0, // EFFECTIVE_LENGTH_OF_INFERRED_UMIS + 2, // EFECTIVE_LENGTH_OF_OBSERVED_UMIS + -1) // ESTIMATED_BASE_QUALITY_OF_UMIS + }}; + } + + @Test(dataProvider = "testUmiMetricsDataProvider") + public void testUmiMetrics(List umis, List assignedUmi, final List isDuplicate, + final int editDistanceToJoin, final UmiMetrics expectedMetrics) { + UmiAwareMarkDuplicatesWithMateCigarTester tester = getTester(false); + tester.addArg("MAX_EDIT_DISTANCE_TO_JOIN=" + editDistanceToJoin); + + for( int i = 0;i < umis.size();i++ ) { + tester.addMatePairWithUmi(umis.get(i), assignedUmi.get(i), isDuplicate.get(i), isDuplicate.get(i)); + } + tester.setExpectedAssignedUmis(assignedUmi); + tester.setExpectedMetrics(expectedMetrics); + tester.runTest(); + } } diff --git a/src/test/java/picard/sam/markduplicates/UmiAwareMarkDuplicatesWithMateCigarTester.java b/src/test/java/picard/sam/markduplicates/UmiAwareMarkDuplicatesWithMateCigarTester.java index bdf166b03..a34e5bd0a 100644 --- a/src/test/java/picard/sam/markduplicates/UmiAwareMarkDuplicatesWithMateCigarTester.java +++ b/src/test/java/picard/sam/markduplicates/UmiAwareMarkDuplicatesWithMateCigarTester.java @@ -27,9 +27,13 @@ import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReaderFactory; +import htsjdk.samtools.metrics.MetricsFile; import org.testng.Assert; import picard.cmdline.CommandLineProgram; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; import java.util.List; /** @@ -42,6 +46,8 @@ public class UmiAwareMarkDuplicatesWithMateCigarTester extends AbstractMarkDuplicatesCommandLineProgramTester { private int readNameCounter = 0; private List expectedAssignedUmis; + private UmiMetrics expectedMetrics; + private File umiMetricsFile = new File(getOutputDir(), "umi_metrics.txt"); // This tag is only used for testing, it indicates what we expect to see in the inferred UMI tag. private final String expectedUmiTag = "RE"; @@ -50,10 +56,13 @@ // AbstractMarkDuplicatesCommandLineProgramTester. Since those tests use // reads that don't have UMIs we enable the ALLOW_MISSING_UMIS option. UmiAwareMarkDuplicatesWithMateCigarTester() { + addArg("UMI_METRICS_FILE=" + umiMetricsFile); addArg("ALLOW_MISSING_UMIS=" + true); } UmiAwareMarkDuplicatesWithMateCigarTester(final boolean allowMissingUmis) { + addArg("UMI_METRICS_FILE=" + umiMetricsFile); + if (allowMissingUmis) { addArg("ALLOW_MISSING_UMIS=" + true); } @@ -146,6 +155,11 @@ UmiAwareMarkDuplicatesWithMateCigarTester setExpectedAssignedUmis(final List> metricsOutput = new MetricsFile>(); + try { + metricsOutput.read(new FileReader(umiMetricsFile)); + } + catch (final FileNotFoundException ex) { + System.err.println("Metrics file not found: " + ex); + } + double tolerance = 1e-6; + Assert.assertEquals(metricsOutput.getMetrics().size(), 1); + final UmiMetrics observedMetrics = metricsOutput.getMetrics().get(0); + Assert.assertEquals(observedMetrics.MEAN_UMI_LENGTH, expectedMetrics.MEAN_UMI_LENGTH, "UMI_LENGTH does not match expected"); + Assert.assertEquals(observedMetrics.OBSERVED_UNIQUE_UMIS, expectedMetrics.OBSERVED_UNIQUE_UMIS, "OBSERVED_UNIQUE_UMIS does not match expected"); + Assert.assertEquals(observedMetrics.INFERRED_UNIQUE_UMIS, expectedMetrics.INFERRED_UNIQUE_UMIS, "INFERRED_UNIQUE_UMIS does not match expected"); + Assert.assertEquals(observedMetrics.OBSERVED_BASE_ERRORS, expectedMetrics.OBSERVED_BASE_ERRORS, "OBSERVED_BASE_ERRORS does not match expected"); + Assert.assertEquals(observedMetrics.DUPLICATE_SETS_IGNORING_UMI, expectedMetrics.DUPLICATE_SETS_IGNORING_UMI, "DUPLICATE_SETS_IGNORING_UMI does not match expected"); + Assert.assertEquals(observedMetrics.DUPLICATE_SETS_WITH_UMI, expectedMetrics.DUPLICATE_SETS_WITH_UMI, "DUPLICATE_SETS_WITH_UMI does not match expected"); + Assert.assertEquals(observedMetrics.INFERRED_UMI_ENTROPY, expectedMetrics.INFERRED_UMI_ENTROPY, tolerance, "INFERRED_UMI_ENTROPY does not match expected"); + Assert.assertEquals(observedMetrics.OBSERVED_UMI_ENTROPY, expectedMetrics.OBSERVED_UMI_ENTROPY, tolerance, "OBSERVED_UMI_ENTROPY does not match expected"); + Assert.assertEquals(observedMetrics.UMI_BASE_QUALITIES, expectedMetrics.UMI_BASE_QUALITIES, tolerance, "UMI_BASE_QUALITIES does not match expected"); + } + // Also do tests from AbstractMarkDuplicatesCommandLineProgramTester super.test(); }