From 00ea4c87cbb2532ddf5c29d9a4fc8293de12a3b1 Mon Sep 17 00:00:00 2001 From: Zaal Date: Thu, 8 Sep 2016 12:22:29 +0300 Subject: [PATCH 1/7] Fix for issues 342 and 138: introduced ALSO_IGNORE_DUPLICATES option --- .../java/picard/analysis/CollectGcBiasMetrics.java | 41 ++++-- .../picard/analysis/GcBiasMetricsCollector.java | 161 +++++++++++++-------- .../java/picard/metrics/MultiLevelCollector.java | 1 + src/main/resources/picard/analysis/gcBias.R | 7 +- .../picard/analysis/CollectGcBiasMetricsTest.java | 97 +++++++++++-- testdata/picard/metrics/chrM_NO_SEQ.sam | 5 + 6 files changed, 229 insertions(+), 83 deletions(-) create mode 100644 testdata/picard/metrics/chrM_NO_SEQ.sam diff --git a/src/main/java/picard/analysis/CollectGcBiasMetrics.java b/src/main/java/picard/analysis/CollectGcBiasMetrics.java index a0ca80e3a..509d1b99f 100644 --- a/src/main/java/picard/analysis/CollectGcBiasMetrics.java +++ b/src/main/java/picard/analysis/CollectGcBiasMetrics.java @@ -34,6 +34,7 @@ import picard.cmdline.Option; import picard.cmdline.programgroups.Metrics; import picard.metrics.GcBiasMetrics; +import picard.metrics.MultiLevelCollector; import picard.util.RExecutor; import java.io.File; @@ -93,24 +94,26 @@ "mean coverage (e.g. 3.1 means this GC bin has 3.1 times more reads per window than average). " + "" + "This tool also tracks mean base-quality scores of the reads within each GC content bin, enabling the user to determine " + - "how base quality scores vary with GC content.

"+ + "how base quality scores vary with GC content.

" + "" + "The chart output associated with this data table plots the NORMALIZED_COVERAGE, the distribution of WINDOWs corresponding " + - "to GC percentages, and base qualities corresponding to each %GC bin."+ + "to GC percentages, and base qualities corresponding to each %GC bin." + "

Note: Metrics labeled as percentages are actually expressed as fractions!

" + - "

Usage Example:

"+ + "

Usage Example:

" + "
" +
-            "java -jar picard.jar CollectGcBiasMetrics \\
"+ - " I=input.bam \\
"+ - " O=gc_bias_metrics.txt \\
"+ - " CHART=gc_bias_metrics.pdf \\
"+ - " S=summary_metrics.txt \\
"+ - " R=reference_sequence.fasta"+ - "
"+ + "java -jar picard.jar CollectGcBiasMetrics \\
" + + " I=input.bam \\
" + + " O=gc_bias_metrics.txt \\
" + + " CHART=gc_bias_metrics.pdf \\
" + + " S=summary_metrics.txt \\
" + + " R=reference_sequence.fasta" + + "" + "Please see " + "the GcBiasMetrics documentation for further explanations of each metric." + "
"; - /** The location of the R script to do the plotting. */ + /** + * The location of the R script to do the plotting. + */ private static final String R_SCRIPT = "picard/analysis/gcBias.R"; // Usage and parameters @@ -133,6 +136,9 @@ @Option(shortName = "LEVEL", doc = "The level(s) at which to accumulate metrics.") public Set METRIC_ACCUMULATION_LEVEL = CollectionUtil.makeSet(MetricAccumulationLevel.ALL_READS); + @Option(shortName = "ALSO_IGNORE_DUPLICATES", doc = "Use to get additional results without duplicates.") + public boolean ALSO_IGNORE_DUPLICATES = false; + // Calculates GcBiasMetrics for all METRIC_ACCUMULATION_LEVELs provided private GcBiasMetricsCollector multiCollector; @@ -158,11 +164,14 @@ protected void setup(final SAMFileHeader header, final File samFile) { IOUtil.assertFileIsWritable(SUMMARY_OUTPUT); IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE); + MultiLevelCollector.IS_NON_DUPLICATES = ALSO_IGNORE_DUPLICATES; + //Calculate windowsByGc for the reference sequence final int[] windowsByGc = GcBiasUtils.calculateRefWindowsByGc(BINS, REFERENCE_SEQUENCE, SCAN_WINDOW_SIZE); //Delegate actual collection to GcBiasMetricCollector - multiCollector = new GcBiasMetricsCollector(METRIC_ACCUMULATION_LEVEL, windowsByGc, header.getReadGroups(), SCAN_WINDOW_SIZE, IS_BISULFITE_SEQUENCED); + multiCollector = new GcBiasMetricsCollector(METRIC_ACCUMULATION_LEVEL, windowsByGc, header.getReadGroups(), + SCAN_WINDOW_SIZE, IS_BISULFITE_SEQUENCED); } //////////////////////////////////////////////////////////////////////////// @@ -179,14 +188,18 @@ protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) { @Override protected void finish() { multiCollector.finish(); + writeResultsToFiles(OUTPUT, SUMMARY_OUTPUT, CHART_OUTPUT); + } + + private void writeResultsToFiles(final File OUTPUT, final File SUMMARY_OUTPUT, final File CHART_OUTPUT) { final MetricsFile file = getMetricsFile(); final MetricsFile detailMetricsFile = getMetricsFile(); final MetricsFile summaryMetricsFile = getMetricsFile(); multiCollector.addAllLevelsToFile(file); final List gcBiasMetricsList = file.getMetrics(); - for(final GcBiasMetrics gcbm : gcBiasMetricsList){ + for (final GcBiasMetrics gcbm : gcBiasMetricsList) { final List gcDetailList = gcbm.DETAILS.getMetrics(); - for(final GcBiasDetailMetrics d : gcDetailList) { + for (final GcBiasDetailMetrics d : gcDetailList) { detailMetricsFile.addMetric(d); } summaryMetricsFile.addMetric(gcbm.SUMMARY); diff --git a/src/main/java/picard/analysis/GcBiasMetricsCollector.java b/src/main/java/picard/analysis/GcBiasMetricsCollector.java index e372da632..d455e108e 100644 --- a/src/main/java/picard/analysis/GcBiasMetricsCollector.java +++ b/src/main/java/picard/analysis/GcBiasMetricsCollector.java @@ -28,6 +28,7 @@ import htsjdk.samtools.SAMRecord; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.reference.ReferenceSequence; +import htsjdk.samtools.util.Log; import htsjdk.samtools.util.QualityUtil; import htsjdk.samtools.util.SequenceUtil; import htsjdk.samtools.util.StringUtil; @@ -56,14 +57,20 @@ private byte [] gc = null; private int referenceIndex = -1; private byte [] refBases = null; + private final static String NON_DUPS_PREFIX = "Non-Duplicates "; + + private static final Log log = Log.getInstance(GcBiasMetricsCollector.class); + public GcBiasMetricsCollector(final Set accumulationLevels, final int[] windowsByGc, - final List samRgRecords, final int scanWindowSize, final boolean bisulfite) { + final List samRgRecords, final int scanWindowSize, + final boolean bisulfite) { this.scanWindowSize = scanWindowSize; this.bisulfite = bisulfite; this.windowsByGc = windowsByGc; setup(accumulationLevels, samRgRecords); } + ///////////////////////////////////////////////////////////////////////////// // This method is called once Per samRecord ///////////////////////////////////////////////////////////////////////////// @@ -80,20 +87,19 @@ protected GcBiasCollectorArgs makeArg(final SAMRecord rec, final ReferenceSequen return new PerUnitGcBiasMetricsCollector(sample, library, readGroup); } - @Override - public void acceptRecord(final SAMRecord rec, final ReferenceSequence ref) {super.acceptRecord(rec, ref);} - ///////////////////////////////////////////////////////////////////////////// //A collector for individual GcBiasMetrics for a given SAMPLE or SAMPLE/LIBRARY //or SAMPLE/LIBRARY/READ_GROUP (depending on aggregation levels) ///////////////////////////////////////////////////////////////////////////// public class PerUnitGcBiasMetricsCollector implements PerUnitMetricCollector { - Map gcData = new HashMap(); + private final Map gcData; + private Map gcDataNonDups; private final String sample; private final String library; private final String readGroup; private static final String allReads = "All_Reads"; + ///////////////////////////////////////////////////////////////////////////// //Records the accumulation level for each level of collection and initializes // a GcObject for this accumulation level @@ -102,19 +108,9 @@ public PerUnitGcBiasMetricsCollector(final String sample, final String library, this.sample = sample; this.library = library; this.readGroup = readGroup; - final String prefix; - if (this.readGroup != null) { - prefix = this.readGroup; - gcData.put(prefix, new GcObject()); - } else if (this.library != null) { - prefix = this.library; - gcData.put(prefix, new GcObject()); - } else if (this.sample != null) { - prefix = this.sample; - gcData.put(prefix, new GcObject()); - } else { - prefix = allReads; - gcData.put(prefix, new GcObject()); + this.gcData = prepareGcData(); + if (IS_NON_DUPLICATES) { + this.gcDataNonDups = prepareGcData(); } } @@ -122,50 +118,101 @@ public PerUnitGcBiasMetricsCollector(final String sample, final String library, //Takes each record and sends them to addRead to calculate gc metrics for // that read for each accumulation level ///////////////////////////////////////////////////////////////////////////// + @Override public void acceptRecord(final GcBiasCollectorArgs args) { final SAMRecord rec = args.getRec(); - final String type; + if (rec.getReadBases().length == 0) { + log.warn("Omitting read " + rec.getReadName() + " with '*' in SEQ field."); + return; + } if (!rec.getReadUnmappedFlag()) { - if(referenceIndex != rec.getReferenceIndex() || gc == null){ + if (referenceIndex != rec.getReferenceIndex() || gc == null) { final ReferenceSequence ref = args.getRef(); refBases = ref.getBases(); StringUtil.toUpperCase(refBases); final int refLength = refBases.length; final int lastWindowStart = refLength - scanWindowSize; gc = GcBiasUtils.calculateAllGcs(refBases, lastWindowStart, scanWindowSize); - referenceIndex=rec.getReferenceIndex(); + referenceIndex = rec.getReferenceIndex(); } - final String group; - if (this.readGroup != null) { - type = this.readGroup; - group = "Read Group"; - addRead(gcData.get(type), rec, group, gc, refBases); - } else if (this.library != null) { - type = this.library; - group = "Library"; - addRead(gcData.get(type), rec, group, gc, refBases); - } else if (this.sample != null) { - type = this.sample; - group = "Sample"; - addRead(gcData.get(type), rec, group, gc, refBases); - } else { - type = allReads; - group = "All Reads"; - addRead(gcData.get(type), rec, group, gc, refBases); + addReadToGcData(rec, this.gcData, ""); + if (IS_NON_DUPLICATES && !rec.getDuplicateReadFlag()) { + addReadToGcData(rec, this.gcDataNonDups, NON_DUPS_PREFIX); } - } - else { - for (final Map.Entry entry : gcData.entrySet()) { - final GcObject gcCur = entry.getValue(); - if (!rec.getReadPairedFlag() || rec.getFirstOfPairFlag()) ++gcCur.totalClusters; + } else { + updTotalClusters(rec, this.gcData); + if (IS_NON_DUPLICATES && !rec.getDuplicateReadFlag()) { + updTotalClusters(rec, this.gcDataNonDups); } } } + @Override public void finish() {} ///////////////////////////////////////////////////////////////////////////// + //Called to add metrics to the output file for each level of collection + // these metrics are used for graphing gc bias in R script + ///////////////////////////////////////////////////////////////////////////// + @Override + public void addMetricsToFile(final MetricsFile file) { + addGcDataToFile(file, this.gcData, ""); + if (IS_NON_DUPLICATES) { + addGcDataToFile(file, this.gcDataNonDups, NON_DUPS_PREFIX); + } + } + + private void updTotalClusters(final SAMRecord rec, final Map gcData) { + for (final Map.Entry entry : gcData.entrySet()) { + final GcObject gcCur = entry.getValue(); + if (!rec.getReadPairedFlag() || rec.getFirstOfPairFlag()) ++gcCur.totalClusters; + } + } + + private Map prepareGcData() { + final String prefix; + final Map gcData = new HashMap<>(); + if (this.readGroup != null) { + prefix = this.readGroup; + gcData.put(prefix, new GcObject()); + } else if (this.library != null) { + prefix = this.library; + gcData.put(prefix, new GcObject()); + } else if (this.sample != null) { + prefix = this.sample; + gcData.put(prefix, new GcObject()); + } else { + prefix = allReads; + gcData.put(prefix, new GcObject()); + } + return gcData; + } + + private void addReadToGcData(final SAMRecord rec, final Map gcData, + final String nonDupsPrefix) { + final String type; + final String group; + if (this.readGroup != null) { + type = this.readGroup; + group = nonDupsPrefix + "Read Group"; + addRead(gcData.get(type), rec, group, gc, refBases); + } else if (this.library != null) { + type = this.library; + group = nonDupsPrefix + "Library"; + addRead(gcData.get(type), rec, group, gc, refBases); + } else if (this.sample != null) { + type = this.sample; + group = nonDupsPrefix + "Sample"; + addRead(gcData.get(type), rec, group, gc, refBases); + } else { + type = allReads; + group = nonDupsPrefix + "All Reads"; + addRead(gcData.get(type), rec, group, gc, refBases); + } + } + + ///////////////////////////////////////////////////////////////////////////// // Sums the values in an int[]. ///////////////////////////////////////////////////////////////////////////// private double sum(final int[] values) { @@ -178,11 +225,8 @@ private double sum(final int[] values) { return total; } - ///////////////////////////////////////////////////////////////////////////// - //Called to add metrics to the output file for each level of collection - // these metrics are used for graphing gc bias in R script - ///////////////////////////////////////////////////////////////////////////// - public void addMetricsToFile(final MetricsFile file) { + private void addGcDataToFile(final MetricsFile file, final Map gcData, + final String nonDupsPrefix) { for (final Map.Entry entry : gcData.entrySet()) { final GcObject gcCur = entry.getValue(); final String gcType = entry.getKey(); @@ -217,18 +261,18 @@ public void addMetricsToFile(final MetricsFile file) { detail.ERROR_BAR_WIDTH = 0; } detail.ACCUMULATION_LEVEL = group; - if (group.equals("Read Group")) {detail.READ_GROUP = gcType;} - else if (group.equals("Sample")) {detail.SAMPLE = gcType;} - else if (group.equals("Library")) {detail.LIBRARY = gcType;} + if (group.equals(nonDupsPrefix + "Read Group")) {detail.READ_GROUP = gcType;} + else if (group.equals(nonDupsPrefix + "Sample")) {detail.SAMPLE = gcType;} + else if (group.equals(nonDupsPrefix + "Library")) {detail.LIBRARY = gcType;} metrics.DETAILS.addMetric(detail); } // Synthesize the high level summary metrics final GcBiasSummaryMetrics summary = new GcBiasSummaryMetrics(); - if (group.equals("Read Group")) {summary.READ_GROUP = gcType;} - else if (group.equals("Sample")) {summary.SAMPLE = gcType;} - else if (group.equals("Library")) {summary.LIBRARY = gcType;} + if (group.equals(nonDupsPrefix + "Read Group")) {summary.READ_GROUP = gcType;} + else if (group.equals(nonDupsPrefix + "Sample")) {summary.SAMPLE = gcType;} + else if (group.equals(nonDupsPrefix + "Library")) {summary.LIBRARY = gcType;} summary.ACCUMULATION_LEVEL = group; summary.WINDOW_SIZE = scanWindowSize; @@ -240,7 +284,6 @@ public void addMetricsToFile(final MetricsFile file) { summary.GC_NC_60_79 = calculateGcNormCoverage(meanReadsPerWindow, readsByGc, 60, 79); summary.GC_NC_80_100 = calculateGcNormCoverage(meanReadsPerWindow, readsByGc, 80, 100); - calculateDropoutMetrics(metrics.DETAILS.getMetrics(), summary); metrics.SUMMARY = summary; @@ -254,7 +297,8 @@ public void addMetricsToFile(final MetricsFile file) { ///////////////////////////////////////////////////////////////////////////// // Calculates the normalized coverage over a given gc content region ///////////////////////////////////////////////////////////////////////////// - private double calculateGcNormCoverage(final double meanReadsPerWindow, final int[] readsByGc, final int start, final int end) { + private double calculateGcNormCoverage(final double meanReadsPerWindow, final int[] readsByGc, + final int start, final int end) { int windowsTotal = 0; double sum = 0.0; for (int i = start; i <= end; i++) { @@ -266,9 +310,8 @@ private double calculateGcNormCoverage(final double meanReadsPerWindow, final in if (windowsTotal == 0) { return 0.0; - } - else { - return (sum / (windowsTotal*meanReadsPerWindow)); + } else { + return (sum / (windowsTotal * meanReadsPerWindow)); } } diff --git a/src/main/java/picard/metrics/MultiLevelCollector.java b/src/main/java/picard/metrics/MultiLevelCollector.java index 914adad7b..74e2bbb49 100644 --- a/src/main/java/picard/metrics/MultiLevelCollector.java +++ b/src/main/java/picard/metrics/MultiLevelCollector.java @@ -67,6 +67,7 @@ public abstract class MultiLevelCollector { public static final String UNKNOWN = "unknown"; + public static boolean IS_NON_DUPLICATES; //The collector that will accept all records (allReads is NULL if !calculateAll) private PerUnitMetricCollector allReadCollector; diff --git a/src/main/resources/picard/analysis/gcBias.R b/src/main/resources/picard/analysis/gcBias.R index 503e73751..cefce39ff 100644 --- a/src/main/resources/picard/analysis/gcBias.R +++ b/src/main/resources/picard/analysis/gcBias.R @@ -68,7 +68,12 @@ for (k in 1:(num.plots)){ else{ if(accLevel == "Sample"){datasetName <- summaryMetrics[k, "SAMPLE"]} if(accLevel == "Library"){datasetName <- summaryMetrics[k, "LIBRARY"]} - else(datasetName <- summaryMetrics[k, "READ_GROUP"])} + if(accLevel == "Read Group"){datasetName <- summaryMetrics[k, "READ_GROUP"]} + if(accLevel == "Non-Duplicates All Reads"){datasetName <- summaryMetrics[k, "ALL_READS"]} + if(accLevel == "Non-Duplicates Sample"){datasetName <- summaryMetrics[k, "SAMPLE"]} + if(accLevel == "Non-Duplicates Library"){datasetName <- summaryMetrics[k, "LUBRARY"]} + if(accLevel == "Non-Duplicates Read Group"){datasetName <- summaryMetrics[k, "READ_GROUP"]} + } subtitle = cat("Total clusters: ",summaryMetrics[k,"TOTAL_CLUSTERS"],", Aligned reads: ",summaryMetrics[k, "ALIGNED_READS"]); # Do the main plot of the normalized coverage by GC plot(type="p", x=metrics$GC, y=metrics$NORMALIZED_COVERAGE, diff --git a/src/test/java/picard/analysis/CollectGcBiasMetricsTest.java b/src/test/java/picard/analysis/CollectGcBiasMetricsTest.java index 16b405fc4..4e535155f 100644 --- a/src/test/java/picard/analysis/CollectGcBiasMetricsTest.java +++ b/src/test/java/picard/analysis/CollectGcBiasMetricsTest.java @@ -41,6 +41,8 @@ private final static File TEST_DIR = new File("testdata/picard/sam/CollectGcBiasMetrics/"); private final File dict = new File(TEST_DIR, "MNOheader.dict"); + private final String REFERENCE_FILE_1 = "testdata/picard/metrics/chrMNO.reference.fasta"; + private final String REFERENCE_FILE_2 = "testdata/picard/metrics/chrM.reference.fasta"; File tempSamFileChrM_O; File tempSamFileAllChr; @@ -115,7 +117,7 @@ public void runGcBiasMultiLevelTest() throws IOException { outfile.deleteOnExit(); detailsOutfile.deleteOnExit(); - runGcBias(tempSamFileChrM_O, outfile, detailsOutfile); + runGcBias(tempSamFileChrM_O, REFERENCE_FILE_1, outfile, detailsOutfile, false); final MetricsFile> output = new MetricsFile>(); output.read(new FileReader(outfile)); @@ -204,8 +206,8 @@ public void runWindowsComparisonTest() throws IOException { detailsOutfile.deleteOnExit(); allChrDetailsOutfile.deleteOnExit(); - runGcBias(tempSamFileChrM_O, outfile, detailsOutfile); - runGcBias(tempSamFileAllChr, allChrOutFile, allChrDetailsOutfile); + runGcBias(tempSamFileChrM_O, REFERENCE_FILE_1, outfile, detailsOutfile, false); + runGcBias(tempSamFileAllChr, REFERENCE_FILE_1, allChrOutFile, allChrDetailsOutfile, false); final MetricsFile> outputDetails = new MetricsFile>(); outputDetails.read(new FileReader(detailsOutfile)); @@ -238,7 +240,7 @@ public File build (final List setBuilder, final File unsort final SAMFileWriter writer = new SAMFileWriterFactory() .setCreateIndex(true).makeBAMWriter(header, false, unsortedSam); - for( final SAMRecordSetBuilder subSetBuilder : setBuilder){ + for (final SAMRecordSetBuilder subSetBuilder : setBuilder) { for (final SAMRecord record : subSetBuilder) { writer.addAlignment(record); } @@ -260,8 +262,8 @@ public File build (final List setBuilder, final File unsort ///////////////////////////////////////////////////////////////////////////// // Runs CollectGcBias with input Sam file and outputs details and summary files for truth assertion. ///////////////////////////////////////////////////////////////////////////// - public void runGcBias (final File input, final File outfile, final File detailsOutfile) throws IOException { - final String referenceFile = "testdata/picard/metrics/chrMNO.reference.fasta"; + public void runGcBias (final File input, final String referenceFile, final File summaryOutfile, final File detailsOutfile, + final boolean nonDups) throws IOException { final File pdf = File.createTempFile("test", ".pdf"); pdf.deleteOnExit(); @@ -274,7 +276,7 @@ public void runGcBias (final File input, final File outfile, final File detailsO "INPUT=" + input.getAbsolutePath(), "OUTPUT=" + detailsOutfile.getAbsolutePath(), "REFERENCE_SEQUENCE=" + referenceFile, - "SUMMARY_OUTPUT=" + outfile.getAbsolutePath(), + "SUMMARY_OUTPUT=" + summaryOutfile.getAbsolutePath(), "CHART_OUTPUT=" + pdf.getAbsolutePath(), "SCAN_WINDOW_SIZE=" + windowSize, "MINIMUM_GENOME_FRACTION=" + minGenFraction, @@ -282,9 +284,86 @@ public void runGcBias (final File input, final File outfile, final File detailsO "LEVEL=ALL_READS", "LEVEL=SAMPLE", "LEVEL=READ_GROUP", - "ASSUME_SORTED=" + assumeSorted + "ASSUME_SORTED=" + assumeSorted, + "ALSO_IGNORE_DUPLICATES=" + nonDups }; - Assert.assertEquals(runPicardCommandLine(args), 0); + runPicardCommandLine(args); + } + + /** + * Compares metric's results by summary files without duplicates. + * @throws IOException + */ + @Test + public void runNonDupsComparisonTest() throws IOException { + final File inputFileWithDuplicates = new File("testdata/picard/metrics/chrMReads.sam"); + final File detailsOutfile = File.createTempFile("test", ".gc_bias_detail_metrics"); + final File summaryOutfile = File.createTempFile("test", ".gc_bias_summary_metrics"); + detailsOutfile.deleteOnExit(); + summaryOutfile.deleteOnExit(); + + + + runGcBias(inputFileWithDuplicates, REFERENCE_FILE_2, summaryOutfile, detailsOutfile, true); + + final MetricsFile> outputSummary = new MetricsFile<>(); + outputSummary.read(new FileReader(summaryOutfile)); + + for (final GcBiasSummaryMetrics summary : outputSummary.getMetrics()) { + if (summary.ACCUMULATION_LEVEL.equals("Non-Duplicates All Reads")) { //ALL_READS level for case without duplicates + Assert.assertEquals(summary.TOTAL_CLUSTERS, 3); + Assert.assertEquals(summary.ALIGNED_READS, 3); + Assert.assertEquals(summary.AT_DROPOUT, 79.180328); + Assert.assertEquals(summary.GC_DROPOUT, 12.28901); + Assert.assertEquals(summary.GC_NC_0_19, 0.0); + Assert.assertEquals(summary.GC_NC_20_39, 0.0); + Assert.assertEquals(summary.GC_NC_40_59, 1.246783); + Assert.assertEquals(summary.GC_NC_60_79, 0.0); + Assert.assertEquals(summary.GC_NC_80_100, 0.0); + } + if (summary.ACCUMULATION_LEVEL.equals("All Reads")) { //ALL_READS level + Assert.assertEquals(summary.TOTAL_CLUSTERS, 5); + Assert.assertEquals(summary.ALIGNED_READS, 5); + Assert.assertEquals(summary.AT_DROPOUT, 79.180328); + Assert.assertEquals(summary.GC_DROPOUT, 10.37037); + Assert.assertEquals(summary.GC_NC_0_19, 0.0); + Assert.assertEquals(summary.GC_NC_20_39, 0.0); + Assert.assertEquals(summary.GC_NC_40_59, 1.246783); + Assert.assertEquals(summary.GC_NC_60_79, 0.0); + Assert.assertEquals(summary.GC_NC_80_100, 0.0); + } + } + } + + /** + * If SAM/BAM file with '*' in SEQ field omit this read. + */ + @Test + public void runCheckingNoSEQTest() throws IOException { + final File input = new File("testdata/picard/metrics/chrM_NO_SEQ.sam"); + final File summaryOutfile = File.createTempFile("test", ".gc_bias.summary_metrics"); + final File detailsOutfile = File.createTempFile("test", ".gc_bias.detail_metrics"); + summaryOutfile.deleteOnExit(); + detailsOutfile.deleteOnExit(); + + runGcBias(input, REFERENCE_FILE_2, summaryOutfile, detailsOutfile, false); + + final MetricsFile> output = new MetricsFile<>(); + output.read(new FileReader(summaryOutfile)); + + for (final GcBiasSummaryMetrics metrics : output.getMetrics()) { + if (metrics.ACCUMULATION_LEVEL.equals("All Reads")) { //ALL_READS level + Assert.assertEquals(metrics.TOTAL_CLUSTERS, 0); + Assert.assertEquals(metrics.ALIGNED_READS, 1); + Assert.assertEquals(metrics.AT_DROPOUT, 78.682453); + Assert.assertEquals(metrics.GC_DROPOUT, 14.693382); + Assert.assertEquals(metrics.GC_NC_0_19, 0.0); + Assert.assertEquals(metrics.GC_NC_20_39, 0.0); + Assert.assertEquals(metrics.GC_NC_40_59, 1.246783); + Assert.assertEquals(metrics.GC_NC_60_79, 0.0); + Assert.assertEquals(metrics.GC_NC_80_100, 0.0); + } + } } ///////////////////////////////////////////////////////////////////////////// diff --git a/testdata/picard/metrics/chrM_NO_SEQ.sam b/testdata/picard/metrics/chrM_NO_SEQ.sam new file mode 100644 index 000000000..cae527e80 --- /dev/null +++ b/testdata/picard/metrics/chrM_NO_SEQ.sam @@ -0,0 +1,5 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chrM LN:16299 AS:mm9 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz M5:11c8af2a2528b25f2c080ab7da42edda SP:Mus musculus +@RG ID:62A40.2 PL:illumina PU:62A40AAXX101028.2 LB:Solexa-45345 DT:2010-10-28T00:00:00-0400 SM:RRBS885 CN:BI +NRUSCA-WDL30341:66:000000000-ABTGC:1:2111:13507:4770 417 chrM 14225 60 188H112M = 14225 197 CTTGACCTGTGCCGGGTACGGCGGCGGACCGGCGACTACCCGGAGGCTGGCGAGCTGGCCCAGCGGGCCTACGACCTCTACCACAGCCTCGAAAACCGCCTCCGCCAGGCCC DBFFDFFB>>):BFFBBBFFBFFF>B>>FB;F;36>B;??FFFFFF?(-122:?0>6:BB<>FFF Date: Wed, 5 Oct 2016 12:21:33 +0300 Subject: [PATCH 2/7] EPMCMBIPHO-468: changes for github --- .../java/picard/analysis/CollectGcBiasMetrics.java | 27 ++++++++++------------ .../picard/analysis/GcBiasMetricsCollector.java | 14 ++++++----- src/main/resources/picard/analysis/gcBias.R | 2 +- .../picard/analysis/CollectGcBiasMetricsTest.java | 2 -- 4 files changed, 21 insertions(+), 24 deletions(-) diff --git a/src/main/java/picard/analysis/CollectGcBiasMetrics.java b/src/main/java/picard/analysis/CollectGcBiasMetrics.java index 509d1b99f..d222dc7f2 100644 --- a/src/main/java/picard/analysis/CollectGcBiasMetrics.java +++ b/src/main/java/picard/analysis/CollectGcBiasMetrics.java @@ -94,26 +94,24 @@ "mean coverage (e.g. 3.1 means this GC bin has 3.1 times more reads per window than average). " + "" + "This tool also tracks mean base-quality scores of the reads within each GC content bin, enabling the user to determine " + - "how base quality scores vary with GC content.

" + + "how base quality scores vary with GC content.

"+ "" + "The chart output associated with this data table plots the NORMALIZED_COVERAGE, the distribution of WINDOWs corresponding " + - "to GC percentages, and base qualities corresponding to each %GC bin." + + "to GC percentages, and base qualities corresponding to each %GC bin."+ "

Note: Metrics labeled as percentages are actually expressed as fractions!

" + - "

Usage Example:

" + + "

Usage Example:

"+ "
" +
-            "java -jar picard.jar CollectGcBiasMetrics \\
" + - " I=input.bam \\
" + - " O=gc_bias_metrics.txt \\
" + - " CHART=gc_bias_metrics.pdf \\
" + - " S=summary_metrics.txt \\
" + - " R=reference_sequence.fasta" + - "
" + + "java -jar picard.jar CollectGcBiasMetrics \\
"+ + " I=input.bam \\
"+ + " O=gc_bias_metrics.txt \\
"+ + " CHART=gc_bias_metrics.pdf \\
"+ + " S=summary_metrics.txt \\
"+ + " R=reference_sequence.fasta"+ + ""+ "Please see " + "the GcBiasMetrics documentation for further explanations of each metric." + "
"; - /** - * The location of the R script to do the plotting. - */ + /** The location of the R script to do the plotting. */ private static final String R_SCRIPT = "picard/analysis/gcBias.R"; // Usage and parameters @@ -170,8 +168,7 @@ protected void setup(final SAMFileHeader header, final File samFile) { final int[] windowsByGc = GcBiasUtils.calculateRefWindowsByGc(BINS, REFERENCE_SEQUENCE, SCAN_WINDOW_SIZE); //Delegate actual collection to GcBiasMetricCollector - multiCollector = new GcBiasMetricsCollector(METRIC_ACCUMULATION_LEVEL, windowsByGc, header.getReadGroups(), - SCAN_WINDOW_SIZE, IS_BISULFITE_SEQUENCED); + multiCollector = new GcBiasMetricsCollector(METRIC_ACCUMULATION_LEVEL, windowsByGc, header.getReadGroups(), SCAN_WINDOW_SIZE, IS_BISULFITE_SEQUENCED); } //////////////////////////////////////////////////////////////////////////// diff --git a/src/main/java/picard/analysis/GcBiasMetricsCollector.java b/src/main/java/picard/analysis/GcBiasMetricsCollector.java index d455e108e..d0bbe60c4 100644 --- a/src/main/java/picard/analysis/GcBiasMetricsCollector.java +++ b/src/main/java/picard/analysis/GcBiasMetricsCollector.java @@ -61,7 +61,6 @@ private static final Log log = Log.getInstance(GcBiasMetricsCollector.class); - public GcBiasMetricsCollector(final Set accumulationLevels, final int[] windowsByGc, final List samRgRecords, final int scanWindowSize, final boolean bisulfite) { @@ -98,7 +97,7 @@ protected GcBiasCollectorArgs makeArg(final SAMRecord rec, final ReferenceSequen private final String library; private final String readGroup; private static final String allReads = "All_Reads"; - + private int logCounter; ///////////////////////////////////////////////////////////////////////////// //Records the accumulation level for each level of collection and initializes @@ -121,8 +120,11 @@ public PerUnitGcBiasMetricsCollector(final String sample, final String library, @Override public void acceptRecord(final GcBiasCollectorArgs args) { final SAMRecord rec = args.getRec(); - if (rec.getReadBases().length == 0) { + if (rec.getReadBases().length == 0 && logCounter < 100) { log.warn("Omitting read " + rec.getReadName() + " with '*' in SEQ field."); + if (++logCounter == 100) { + log.warn("You have more than 100 reads with '*' in SEQ field."); + } return; } if (!rec.getReadUnmappedFlag()) { @@ -141,9 +143,9 @@ public void acceptRecord(final GcBiasCollectorArgs args) { addReadToGcData(rec, this.gcDataNonDups, NON_DUPS_PREFIX); } } else { - updTotalClusters(rec, this.gcData); + updateTotalClusters(rec, this.gcData); if (IS_NON_DUPLICATES && !rec.getDuplicateReadFlag()) { - updTotalClusters(rec, this.gcDataNonDups); + updateTotalClusters(rec, this.gcDataNonDups); } } } @@ -163,7 +165,7 @@ public void addMetricsToFile(final MetricsFile file) { } } - private void updTotalClusters(final SAMRecord rec, final Map gcData) { + private void updateTotalClusters(final SAMRecord rec, final Map gcData) { for (final Map.Entry entry : gcData.entrySet()) { final GcObject gcCur = entry.getValue(); if (!rec.getReadPairedFlag() || rec.getFirstOfPairFlag()) ++gcCur.totalClusters; diff --git a/src/main/resources/picard/analysis/gcBias.R b/src/main/resources/picard/analysis/gcBias.R index cefce39ff..568477acb 100644 --- a/src/main/resources/picard/analysis/gcBias.R +++ b/src/main/resources/picard/analysis/gcBias.R @@ -71,7 +71,7 @@ for (k in 1:(num.plots)){ if(accLevel == "Read Group"){datasetName <- summaryMetrics[k, "READ_GROUP"]} if(accLevel == "Non-Duplicates All Reads"){datasetName <- summaryMetrics[k, "ALL_READS"]} if(accLevel == "Non-Duplicates Sample"){datasetName <- summaryMetrics[k, "SAMPLE"]} - if(accLevel == "Non-Duplicates Library"){datasetName <- summaryMetrics[k, "LUBRARY"]} + if(accLevel == "Non-Duplicates Library"){datasetName <- summaryMetrics[k, "LIBRARY"]} if(accLevel == "Non-Duplicates Read Group"){datasetName <- summaryMetrics[k, "READ_GROUP"]} } subtitle = cat("Total clusters: ",summaryMetrics[k,"TOTAL_CLUSTERS"],", Aligned reads: ",summaryMetrics[k, "ALIGNED_READS"]); diff --git a/src/test/java/picard/analysis/CollectGcBiasMetricsTest.java b/src/test/java/picard/analysis/CollectGcBiasMetricsTest.java index 4e535155f..698ff1cf4 100644 --- a/src/test/java/picard/analysis/CollectGcBiasMetricsTest.java +++ b/src/test/java/picard/analysis/CollectGcBiasMetricsTest.java @@ -302,8 +302,6 @@ public void runNonDupsComparisonTest() throws IOException { detailsOutfile.deleteOnExit(); summaryOutfile.deleteOnExit(); - - runGcBias(inputFileWithDuplicates, REFERENCE_FILE_2, summaryOutfile, detailsOutfile, true); final MetricsFile> outputSummary = new MetricsFile<>(); From 5b39d0da03a6212c819fe8dea1399aa1c3c88546 Mon Sep 17 00:00:00 2001 From: Ekaterina Kazachkova Date: Thu, 6 Oct 2016 12:06:12 +0300 Subject: [PATCH 3/7] EPMCMBIPHO-251: #2 comment --- src/main/java/picard/analysis/GcBiasMetricsCollector.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/picard/analysis/GcBiasMetricsCollector.java b/src/main/java/picard/analysis/GcBiasMetricsCollector.java index d0bbe60c4..5b9f1fc93 100644 --- a/src/main/java/picard/analysis/GcBiasMetricsCollector.java +++ b/src/main/java/picard/analysis/GcBiasMetricsCollector.java @@ -120,10 +120,10 @@ public PerUnitGcBiasMetricsCollector(final String sample, final String library, @Override public void acceptRecord(final GcBiasCollectorArgs args) { final SAMRecord rec = args.getRec(); - if (rec.getReadBases().length == 0 && logCounter < 100) { + if (logCounter < 100 && rec.getReadBases().length == 0) { log.warn("Omitting read " + rec.getReadName() + " with '*' in SEQ field."); if (++logCounter == 100) { - log.warn("You have more than 100 reads with '*' in SEQ field."); + log.warn("There are more than 100 reads with '*' in SEQ field in file."); } return; } From 06131b52042da4beac08436876cc3027610d1d29 Mon Sep 17 00:00:00 2001 From: Ekaterina Kazachkova Date: Thu, 6 Oct 2016 12:23:12 +0300 Subject: [PATCH 4/7] EPMCMBIPHO-251: #4 comment clean up --- src/main/java/picard/analysis/GcBiasMetricsCollector.java | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/main/java/picard/analysis/GcBiasMetricsCollector.java b/src/main/java/picard/analysis/GcBiasMetricsCollector.java index 5b9f1fc93..774b79185 100644 --- a/src/main/java/picard/analysis/GcBiasMetricsCollector.java +++ b/src/main/java/picard/analysis/GcBiasMetricsCollector.java @@ -194,24 +194,21 @@ private void updateTotalClusters(final SAMRecord rec, final Map gcData, final String nonDupsPrefix) { final String type; - final String group; + String group = nonDupsPrefix; if (this.readGroup != null) { type = this.readGroup; - group = nonDupsPrefix + "Read Group"; - addRead(gcData.get(type), rec, group, gc, refBases); + group += "Read Group"; } else if (this.library != null) { type = this.library; - group = nonDupsPrefix + "Library"; - addRead(gcData.get(type), rec, group, gc, refBases); + group += "Library"; } else if (this.sample != null) { type = this.sample; - group = nonDupsPrefix + "Sample"; - addRead(gcData.get(type), rec, group, gc, refBases); + group += "Sample"; } else { type = allReads; - group = nonDupsPrefix + "All Reads"; - addRead(gcData.get(type), rec, group, gc, refBases); + group += "All Reads"; } + addRead(gcData.get(type), rec, group, gc, refBases); } ///////////////////////////////////////////////////////////////////////////// From 932a55771b34efd5c62b6fe40d55f2ad6eaf0230 Mon Sep 17 00:00:00 2001 From: Ekaterina Kazachkova Date: Wed, 2 Nov 2016 19:39:12 +0300 Subject: [PATCH 5/7] fix for issue 342 --- .../java/picard/analysis/CollectGcBiasMetrics.java | 11 ++- .../picard/analysis/CollectMultipleMetrics.java | 1 + .../picard/analysis/GcBiasMetricsCollector.java | 82 ++++++++++++++-------- .../java/picard/analysis/GcBiasSummaryMetrics.java | 2 +- .../java/picard/metrics/MultiLevelCollector.java | 1 - src/main/resources/picard/analysis/gcBias.R | 16 ++--- 6 files changed, 66 insertions(+), 47 deletions(-) diff --git a/src/main/java/picard/analysis/CollectGcBiasMetrics.java b/src/main/java/picard/analysis/CollectGcBiasMetrics.java index d222dc7f2..30ed75677 100644 --- a/src/main/java/picard/analysis/CollectGcBiasMetrics.java +++ b/src/main/java/picard/analysis/CollectGcBiasMetrics.java @@ -134,7 +134,8 @@ @Option(shortName = "LEVEL", doc = "The level(s) at which to accumulate metrics.") public Set METRIC_ACCUMULATION_LEVEL = CollectionUtil.makeSet(MetricAccumulationLevel.ALL_READS); - @Option(shortName = "ALSO_IGNORE_DUPLICATES", doc = "Use to get additional results without duplicates.") + @Option(shortName = "ALSO_IGNORE_DUPLICATES", doc = "Use to get additional results without duplicates. This option " + + "allows to gain two plots per level at the same time: one is the usual one and the other excludes duplicates.") public boolean ALSO_IGNORE_DUPLICATES = false; // Calculates GcBiasMetrics for all METRIC_ACCUMULATION_LEVELs provided @@ -162,13 +163,11 @@ protected void setup(final SAMFileHeader header, final File samFile) { IOUtil.assertFileIsWritable(SUMMARY_OUTPUT); IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE); - MultiLevelCollector.IS_NON_DUPLICATES = ALSO_IGNORE_DUPLICATES; - //Calculate windowsByGc for the reference sequence final int[] windowsByGc = GcBiasUtils.calculateRefWindowsByGc(BINS, REFERENCE_SEQUENCE, SCAN_WINDOW_SIZE); //Delegate actual collection to GcBiasMetricCollector - multiCollector = new GcBiasMetricsCollector(METRIC_ACCUMULATION_LEVEL, windowsByGc, header.getReadGroups(), SCAN_WINDOW_SIZE, IS_BISULFITE_SEQUENCED); + multiCollector = new GcBiasMetricsCollector(METRIC_ACCUMULATION_LEVEL, windowsByGc, header.getReadGroups(), SCAN_WINDOW_SIZE, IS_BISULFITE_SEQUENCED, ALSO_IGNORE_DUPLICATES); } //////////////////////////////////////////////////////////////////////////// @@ -185,10 +184,10 @@ protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) { @Override protected void finish() { multiCollector.finish(); - writeResultsToFiles(OUTPUT, SUMMARY_OUTPUT, CHART_OUTPUT); + writeResultsToFiles(); } - private void writeResultsToFiles(final File OUTPUT, final File SUMMARY_OUTPUT, final File CHART_OUTPUT) { + private void writeResultsToFiles() { final MetricsFile file = getMetricsFile(); final MetricsFile detailMetricsFile = getMetricsFile(); final MetricsFile summaryMetricsFile = getMetricsFile(); diff --git a/src/main/java/picard/analysis/CollectMultipleMetrics.java b/src/main/java/picard/analysis/CollectMultipleMetrics.java index 3c5cf6ab8..8757b6392 100644 --- a/src/main/java/picard/analysis/CollectMultipleMetrics.java +++ b/src/main/java/picard/analysis/CollectMultipleMetrics.java @@ -241,6 +241,7 @@ public SinglePassSamProgram makeInstance(final String outbase, final String oute program.MINIMUM_GENOME_FRACTION = 1.0E-5; program.IS_BISULFITE_SEQUENCED = false; program.ASSUME_SORTED = false; + program.ALSO_IGNORE_DUPLICATES = false; //GC_Bias actually uses the class-level REFERENCE_SEQUENCE variable. program.REFERENCE_SEQUENCE = reference; diff --git a/src/main/java/picard/analysis/GcBiasMetricsCollector.java b/src/main/java/picard/analysis/GcBiasMetricsCollector.java index 774b79185..c0dc6d61a 100644 --- a/src/main/java/picard/analysis/GcBiasMetricsCollector.java +++ b/src/main/java/picard/analysis/GcBiasMetricsCollector.java @@ -52,21 +52,28 @@ private final boolean bisulfite; private int[] windowsByGc = new int[BINS]; private static final int BINS = 101; + //Use to calculate additional results without duplicates + private boolean ignoreDuplicates; //will hold the relevant gc information per contig private byte [] gc = null; private int referenceIndex = -1; private byte [] refBases = null; - private final static String NON_DUPS_PREFIX = "Non-Duplicates "; - private static final Log log = Log.getInstance(GcBiasMetricsCollector.class); public GcBiasMetricsCollector(final Set accumulationLevels, final int[] windowsByGc, final List samRgRecords, final int scanWindowSize, final boolean bisulfite) { + this(accumulationLevels, windowsByGc, samRgRecords, scanWindowSize, bisulfite, false); + } + + public GcBiasMetricsCollector(final Set accumulationLevels, final int[] windowsByGc, + final List samRgRecords, final int scanWindowSize, + final boolean bisulfite, final boolean ignoreDuplicates) { this.scanWindowSize = scanWindowSize; this.bisulfite = bisulfite; this.windowsByGc = windowsByGc; + this.ignoreDuplicates = ignoreDuplicates; setup(accumulationLevels, samRgRecords); } @@ -86,12 +93,32 @@ protected GcBiasCollectorArgs makeArg(final SAMRecord rec, final ReferenceSequen return new PerUnitGcBiasMetricsCollector(sample, library, readGroup); } + private enum GroupsType { + ALL_READS ("All Reads", "Non-Duplicates All Reads"), + LIBRARY ("Library", "Non-Duplicates Library"), + SAMPLE ("Sample", "Non-Duplicates Sample"), + READ_GROUP ("Read Group", "Non-Duplicates Read Group"); + + private final String groupsType; + private final String nonDuplicatesGroupType; + + GroupsType(final String groupType, final String nonDuplicatesGroupType) { + this.groupsType = groupType; + this.nonDuplicatesGroupType = nonDuplicatesGroupType; + } + + public String getGroupType(final boolean includeDuplicates) { + return includeDuplicates ? groupsType : nonDuplicatesGroupType; + } + } + ///////////////////////////////////////////////////////////////////////////// //A collector for individual GcBiasMetrics for a given SAMPLE or SAMPLE/LIBRARY //or SAMPLE/LIBRARY/READ_GROUP (depending on aggregation levels) ///////////////////////////////////////////////////////////////////////////// public class PerUnitGcBiasMetricsCollector implements PerUnitMetricCollector { private final Map gcData; + // Additional object to store data without duplicates (null if option ALSO_IGNORE_DUPLICATES is not specified) private Map gcDataNonDups; private final String sample; private final String library; @@ -108,7 +135,7 @@ public PerUnitGcBiasMetricsCollector(final String sample, final String library, this.library = library; this.readGroup = readGroup; this.gcData = prepareGcData(); - if (IS_NON_DUPLICATES) { + if (ignoreDuplicates) { this.gcDataNonDups = prepareGcData(); } } @@ -138,13 +165,13 @@ public void acceptRecord(final GcBiasCollectorArgs args) { referenceIndex = rec.getReferenceIndex(); } - addReadToGcData(rec, this.gcData, ""); - if (IS_NON_DUPLICATES && !rec.getDuplicateReadFlag()) { - addReadToGcData(rec, this.gcDataNonDups, NON_DUPS_PREFIX); + addReadToGcData(rec, this.gcData, true); + if (ignoreDuplicates && !rec.getDuplicateReadFlag()) { + addReadToGcData(rec, this.gcDataNonDups, false); } } else { updateTotalClusters(rec, this.gcData); - if (IS_NON_DUPLICATES && !rec.getDuplicateReadFlag()) { + if (ignoreDuplicates && !rec.getDuplicateReadFlag()) { updateTotalClusters(rec, this.gcDataNonDups); } } @@ -159,9 +186,9 @@ public void finish() {} ///////////////////////////////////////////////////////////////////////////// @Override public void addMetricsToFile(final MetricsFile file) { - addGcDataToFile(file, this.gcData, ""); - if (IS_NON_DUPLICATES) { - addGcDataToFile(file, this.gcDataNonDups, NON_DUPS_PREFIX); + addGcDataToFile(file, this.gcData, true); + if (ignoreDuplicates) { + addGcDataToFile(file, this.gcDataNonDups, false); } } @@ -177,36 +204,33 @@ private void updateTotalClusters(final SAMRecord rec, final Map gcData = new HashMap<>(); if (this.readGroup != null) { prefix = this.readGroup; - gcData.put(prefix, new GcObject()); } else if (this.library != null) { prefix = this.library; - gcData.put(prefix, new GcObject()); } else if (this.sample != null) { prefix = this.sample; - gcData.put(prefix, new GcObject()); } else { prefix = allReads; - gcData.put(prefix, new GcObject()); } + gcData.put(prefix, new GcObject()); return gcData; } private void addReadToGcData(final SAMRecord rec, final Map gcData, - final String nonDupsPrefix) { + final boolean includeDuplicates) { final String type; - String group = nonDupsPrefix; + String group; if (this.readGroup != null) { type = this.readGroup; - group += "Read Group"; + group = GroupsType.READ_GROUP.getGroupType(includeDuplicates); } else if (this.library != null) { type = this.library; - group += "Library"; + group = GroupsType.LIBRARY.getGroupType(includeDuplicates); } else if (this.sample != null) { type = this.sample; - group += "Sample"; + group = GroupsType.SAMPLE.getGroupType(includeDuplicates); } else { type = allReads; - group += "All Reads"; + group = GroupsType.ALL_READS.getGroupType(includeDuplicates); } addRead(gcData.get(type), rec, group, gc, refBases); } @@ -225,7 +249,7 @@ private double sum(final int[] values) { } private void addGcDataToFile(final MetricsFile file, final Map gcData, - final String nonDupsPrefix) { + final boolean includeDuplicates) { for (final Map.Entry entry : gcData.entrySet()) { final GcObject gcCur = entry.getValue(); final String gcType = entry.getKey(); @@ -233,7 +257,7 @@ private void addGcDataToFile(final MetricsFile file, fin final int[] readsByGc = gcCur.readsByGc; final long[] errorsByGc = gcCur.errorsByGc; final long[] basesByGc = gcCur.basesByGc; - final int totalClusters = gcCur.totalClusters; + final long totalClusters = gcCur.totalClusters; final long totalAlignedReads = gcCur.totalAlignedReads; final String group = gcCur.group; @@ -260,18 +284,18 @@ private void addGcDataToFile(final MetricsFile file, fin detail.ERROR_BAR_WIDTH = 0; } detail.ACCUMULATION_LEVEL = group; - if (group.equals(nonDupsPrefix + "Read Group")) {detail.READ_GROUP = gcType;} - else if (group.equals(nonDupsPrefix + "Sample")) {detail.SAMPLE = gcType;} - else if (group.equals(nonDupsPrefix + "Library")) {detail.LIBRARY = gcType;} + if (group.equals(GroupsType.READ_GROUP.getGroupType(includeDuplicates))) {detail.READ_GROUP = gcType;} + else if (group.equals(GroupsType.SAMPLE.getGroupType(includeDuplicates))) {detail.SAMPLE = gcType;} + else if (group.equals(GroupsType.LIBRARY.getGroupType(includeDuplicates))) {detail.LIBRARY = gcType;} metrics.DETAILS.addMetric(detail); } // Synthesize the high level summary metrics final GcBiasSummaryMetrics summary = new GcBiasSummaryMetrics(); - if (group.equals(nonDupsPrefix + "Read Group")) {summary.READ_GROUP = gcType;} - else if (group.equals(nonDupsPrefix + "Sample")) {summary.SAMPLE = gcType;} - else if (group.equals(nonDupsPrefix + "Library")) {summary.LIBRARY = gcType;} + if (group.equals(GroupsType.READ_GROUP.getGroupType(includeDuplicates))) {summary.READ_GROUP = gcType;} + else if (group.equals(GroupsType.SAMPLE.getGroupType(includeDuplicates))) {summary.SAMPLE = gcType;} + else if (group.equals(GroupsType.LIBRARY.getGroupType(includeDuplicates))) {summary.LIBRARY = gcType;} summary.ACCUMULATION_LEVEL = group; summary.WINDOW_SIZE = scanWindowSize; @@ -350,7 +374,7 @@ private void calculateDropoutMetrics(final Collection detai //Keeps track of each level of GcCalculation ///////////////////////////////////////////////////////////////////////////// class GcObject { - int totalClusters = 0; + long totalClusters = 0; long totalAlignedReads = 0; int[] readsByGc = new int[BINS]; long[] basesByGc = new long[BINS]; diff --git a/src/main/java/picard/analysis/GcBiasSummaryMetrics.java b/src/main/java/picard/analysis/GcBiasSummaryMetrics.java index 0808776a3..2b9ef4136 100644 --- a/src/main/java/picard/analysis/GcBiasSummaryMetrics.java +++ b/src/main/java/picard/analysis/GcBiasSummaryMetrics.java @@ -38,7 +38,7 @@ public int WINDOW_SIZE; /** The total number of clusters that were seen in the gc bias calculation. */ - public int TOTAL_CLUSTERS; + public long TOTAL_CLUSTERS; /** The total number of aligned reads used to compute the gc bias metrics. */ public long ALIGNED_READS; diff --git a/src/main/java/picard/metrics/MultiLevelCollector.java b/src/main/java/picard/metrics/MultiLevelCollector.java index 74e2bbb49..914adad7b 100644 --- a/src/main/java/picard/metrics/MultiLevelCollector.java +++ b/src/main/java/picard/metrics/MultiLevelCollector.java @@ -67,7 +67,6 @@ public abstract class MultiLevelCollector { public static final String UNKNOWN = "unknown"; - public static boolean IS_NON_DUPLICATES; //The collector that will accept all records (allReads is NULL if !calculateAll) private PerUnitMetricCollector allReadCollector; diff --git a/src/main/resources/picard/analysis/gcBias.R b/src/main/resources/picard/analysis/gcBias.R index 568477acb..1172df8bf 100644 --- a/src/main/resources/picard/analysis/gcBias.R +++ b/src/main/resources/picard/analysis/gcBias.R @@ -64,16 +64,12 @@ for (k in 1:(num.plots)){ par(mar = c(5, 4, 4, 4)); accLevel = summaryMetrics[k,"ACCUMULATION_LEVEL"]; - if(accLevel=="All Reads"){datasetName <- "All Reads";} - else{ - if(accLevel == "Sample"){datasetName <- summaryMetrics[k, "SAMPLE"]} - if(accLevel == "Library"){datasetName <- summaryMetrics[k, "LIBRARY"]} - if(accLevel == "Read Group"){datasetName <- summaryMetrics[k, "READ_GROUP"]} - if(accLevel == "Non-Duplicates All Reads"){datasetName <- summaryMetrics[k, "ALL_READS"]} - if(accLevel == "Non-Duplicates Sample"){datasetName <- summaryMetrics[k, "SAMPLE"]} - if(accLevel == "Non-Duplicates Library"){datasetName <- summaryMetrics[k, "LIBRARY"]} - if(accLevel == "Non-Duplicates Read Group"){datasetName <- summaryMetrics[k, "READ_GROUP"]} - } + if(accLevel=="All Reads" || accLevel=="Non-Duplicates All Reads"){datasetName <- "All Reads"; + } else { + if(accLevel == "Sample" || accLevel == "Non-Duplicates Sample"){datasetName <- summaryMetrics[k, "SAMPLE"]} + if(accLevel == "Library" || accLevel == "Non-Duplicates Library"){datasetName <- summaryMetrics[k, "LIBRARY"]} + if(accLevel == "Read Group" || accLevel == "Non-Duplicates Read Group"){datasetName <- summaryMetrics[k, "READ_GROUP"]} + } subtitle = cat("Total clusters: ",summaryMetrics[k,"TOTAL_CLUSTERS"],", Aligned reads: ",summaryMetrics[k, "ALIGNED_READS"]); # Do the main plot of the normalized coverage by GC plot(type="p", x=metrics$GC, y=metrics$NORMALIZED_COVERAGE, From 9a4d0cd4cf287b56e75290bda10cafc8a68e74cf Mon Sep 17 00:00:00 2001 From: Ekaterina Kazachkova Date: Fri, 11 Nov 2016 12:31:16 +0300 Subject: [PATCH 6/7] made changes according to Yossi's comments --- .../java/picard/analysis/GcBiasDetailMetrics.java | 2 + .../picard/analysis/GcBiasMetricsCollector.java | 56 +++++++++------------- .../java/picard/analysis/GcBiasSummaryMetrics.java | 3 ++ src/main/resources/picard/analysis/gcBias.R | 14 +++--- .../picard/analysis/CollectGcBiasMetricsTest.java | 4 +- 5 files changed, 37 insertions(+), 42 deletions(-) diff --git a/src/main/java/picard/analysis/GcBiasDetailMetrics.java b/src/main/java/picard/analysis/GcBiasDetailMetrics.java index 422886b4f..de8af4a93 100644 --- a/src/main/java/picard/analysis/GcBiasDetailMetrics.java +++ b/src/main/java/picard/analysis/GcBiasDetailMetrics.java @@ -34,6 +34,8 @@ */ public class GcBiasDetailMetrics extends MultilevelMetrics { public String ACCUMULATION_LEVEL; + /** This option is used to mark including or excluding duplicates. */ + public String READS_USED; /** The G+C content of the reference sequence represented by this bin. Values are from 0% to 100% */ public int GC; diff --git a/src/main/java/picard/analysis/GcBiasMetricsCollector.java b/src/main/java/picard/analysis/GcBiasMetricsCollector.java index c0dc6d61a..69e7e4bb4 100644 --- a/src/main/java/picard/analysis/GcBiasMetricsCollector.java +++ b/src/main/java/picard/analysis/GcBiasMetricsCollector.java @@ -93,25 +93,6 @@ protected GcBiasCollectorArgs makeArg(final SAMRecord rec, final ReferenceSequen return new PerUnitGcBiasMetricsCollector(sample, library, readGroup); } - private enum GroupsType { - ALL_READS ("All Reads", "Non-Duplicates All Reads"), - LIBRARY ("Library", "Non-Duplicates Library"), - SAMPLE ("Sample", "Non-Duplicates Sample"), - READ_GROUP ("Read Group", "Non-Duplicates Read Group"); - - private final String groupsType; - private final String nonDuplicatesGroupType; - - GroupsType(final String groupType, final String nonDuplicatesGroupType) { - this.groupsType = groupType; - this.nonDuplicatesGroupType = nonDuplicatesGroupType; - } - - public String getGroupType(final boolean includeDuplicates) { - return includeDuplicates ? groupsType : nonDuplicatesGroupType; - } - } - ///////////////////////////////////////////////////////////////////////////// //A collector for individual GcBiasMetrics for a given SAMPLE or SAMPLE/LIBRARY //or SAMPLE/LIBRARY/READ_GROUP (depending on aggregation levels) @@ -124,6 +105,12 @@ public String getGroupType(final boolean includeDuplicates) { private final String library; private final String readGroup; private static final String allReads = "All_Reads"; + private final static String ACCUMULATION_LEVEL_ALL_READS = "All Reads"; + private final static String ACCUMULATION_LEVEL_LIBRARY = "Library"; + private final static String ACCUMULATION_LEVEL_SAMPLE = "Sample"; + private final static String ACCUMULATION_LEVEL_READ_GROUP = "Read Group"; + private final static String READS_USED_ALL = "ALL"; + private final static String READS_USED_UNIQUE = "UNIQUE"; private int logCounter; ///////////////////////////////////////////////////////////////////////////// @@ -165,9 +152,9 @@ public void acceptRecord(final GcBiasCollectorArgs args) { referenceIndex = rec.getReferenceIndex(); } - addReadToGcData(rec, this.gcData, true); + addReadToGcData(rec, this.gcData); if (ignoreDuplicates && !rec.getDuplicateReadFlag()) { - addReadToGcData(rec, this.gcDataNonDups, false); + addReadToGcData(rec, this.gcDataNonDups); } } else { updateTotalClusters(rec, this.gcData); @@ -215,22 +202,21 @@ private void updateTotalClusters(final SAMRecord rec, final Map gcData, - final boolean includeDuplicates) { + private void addReadToGcData(final SAMRecord rec, final Map gcData) { final String type; String group; if (this.readGroup != null) { type = this.readGroup; - group = GroupsType.READ_GROUP.getGroupType(includeDuplicates); + group = ACCUMULATION_LEVEL_READ_GROUP; } else if (this.library != null) { type = this.library; - group = GroupsType.LIBRARY.getGroupType(includeDuplicates); + group = ACCUMULATION_LEVEL_LIBRARY; } else if (this.sample != null) { type = this.sample; - group = GroupsType.SAMPLE.getGroupType(includeDuplicates); + group = ACCUMULATION_LEVEL_SAMPLE; } else { type = allReads; - group = GroupsType.ALL_READS.getGroupType(includeDuplicates); + group = ACCUMULATION_LEVEL_ALL_READS; } addRead(gcData.get(type), rec, group, gc, refBases); } @@ -284,18 +270,22 @@ private void addGcDataToFile(final MetricsFile file, fin detail.ERROR_BAR_WIDTH = 0; } detail.ACCUMULATION_LEVEL = group; - if (group.equals(GroupsType.READ_GROUP.getGroupType(includeDuplicates))) {detail.READ_GROUP = gcType;} - else if (group.equals(GroupsType.SAMPLE.getGroupType(includeDuplicates))) {detail.SAMPLE = gcType;} - else if (group.equals(GroupsType.LIBRARY.getGroupType(includeDuplicates))) {detail.LIBRARY = gcType;} + if (group.equals(ACCUMULATION_LEVEL_READ_GROUP)) {detail.READ_GROUP = gcType;} + else if (group.equals(ACCUMULATION_LEVEL_SAMPLE)) {detail.SAMPLE = gcType;} + else if (group.equals(ACCUMULATION_LEVEL_LIBRARY)) {detail.LIBRARY = gcType;} + + detail.READS_USED = includeDuplicates ? READS_USED_ALL : READS_USED_UNIQUE; metrics.DETAILS.addMetric(detail); } // Synthesize the high level summary metrics final GcBiasSummaryMetrics summary = new GcBiasSummaryMetrics(); - if (group.equals(GroupsType.READ_GROUP.getGroupType(includeDuplicates))) {summary.READ_GROUP = gcType;} - else if (group.equals(GroupsType.SAMPLE.getGroupType(includeDuplicates))) {summary.SAMPLE = gcType;} - else if (group.equals(GroupsType.LIBRARY.getGroupType(includeDuplicates))) {summary.LIBRARY = gcType;} + if (group.equals(ACCUMULATION_LEVEL_READ_GROUP)) {summary.READ_GROUP = gcType;} + else if (group.equals(ACCUMULATION_LEVEL_SAMPLE)) {summary.SAMPLE = gcType;} + else if (group.equals(ACCUMULATION_LEVEL_LIBRARY)) {summary.LIBRARY = gcType;} + + summary.READS_USED = includeDuplicates ? READS_USED_ALL : READS_USED_UNIQUE; summary.ACCUMULATION_LEVEL = group; summary.WINDOW_SIZE = scanWindowSize; diff --git a/src/main/java/picard/analysis/GcBiasSummaryMetrics.java b/src/main/java/picard/analysis/GcBiasSummaryMetrics.java index 2b9ef4136..c8cf4fd94 100644 --- a/src/main/java/picard/analysis/GcBiasSummaryMetrics.java +++ b/src/main/java/picard/analysis/GcBiasSummaryMetrics.java @@ -34,6 +34,9 @@ public class GcBiasSummaryMetrics extends MultilevelMetrics { public String ACCUMULATION_LEVEL; + /** This option is used to mark including or excluding duplicates. */ + public String READS_USED; + /** The window size on the genome used to calculate the GC of the sequence. */ public int WINDOW_SIZE; diff --git a/src/main/resources/picard/analysis/gcBias.R b/src/main/resources/picard/analysis/gcBias.R index 1172df8bf..2dcb39746 100644 --- a/src/main/resources/picard/analysis/gcBias.R +++ b/src/main/resources/picard/analysis/gcBias.R @@ -64,12 +64,12 @@ for (k in 1:(num.plots)){ par(mar = c(5, 4, 4, 4)); accLevel = summaryMetrics[k,"ACCUMULATION_LEVEL"]; - if(accLevel=="All Reads" || accLevel=="Non-Duplicates All Reads"){datasetName <- "All Reads"; - } else { - if(accLevel == "Sample" || accLevel == "Non-Duplicates Sample"){datasetName <- summaryMetrics[k, "SAMPLE"]} - if(accLevel == "Library" || accLevel == "Non-Duplicates Library"){datasetName <- summaryMetrics[k, "LIBRARY"]} - if(accLevel == "Read Group" || accLevel == "Non-Duplicates Read Group"){datasetName <- summaryMetrics[k, "READ_GROUP"]} - } + if(accLevel=="All Reads"){datasetName <- "All Reads";} + else{ + if(accLevel == "Sample"){datasetName <- summaryMetrics[k, "SAMPLE"]} + if(accLevel == "Library"){datasetName <- summaryMetrics[k, "LIBRARY"]} + else(datasetName <- summaryMetrics[k, "READ_GROUP"])} + duplicatesMarker <- ifelse(summaryMetrics[k, "READS_USED"] == "ALL", "", " (duplicates excluded) "); subtitle = cat("Total clusters: ",summaryMetrics[k,"TOTAL_CLUSTERS"],", Aligned reads: ",summaryMetrics[k, "ALIGNED_READS"]); # Do the main plot of the normalized coverage by GC plot(type="p", x=metrics$GC, y=metrics$NORMALIZED_COVERAGE, @@ -78,7 +78,7 @@ for (k in 1:(num.plots)){ xlim=c(0,100), ylim=c(0, Y_AXIS_LIM), col=COLORS[1], - main=paste(accLevel, "Level:", datasetName, "GC Bias Plot", "\n", subtitle) + main=paste(accLevel, "Level:", datasetName, duplicatesMarker, "GC Bias Plot", "\n", subtitle) ); # Add lines at the 50% GC and coverage=1 diff --git a/src/test/java/picard/analysis/CollectGcBiasMetricsTest.java b/src/test/java/picard/analysis/CollectGcBiasMetricsTest.java index 698ff1cf4..d68877a6b 100644 --- a/src/test/java/picard/analysis/CollectGcBiasMetricsTest.java +++ b/src/test/java/picard/analysis/CollectGcBiasMetricsTest.java @@ -308,7 +308,7 @@ public void runNonDupsComparisonTest() throws IOException { outputSummary.read(new FileReader(summaryOutfile)); for (final GcBiasSummaryMetrics summary : outputSummary.getMetrics()) { - if (summary.ACCUMULATION_LEVEL.equals("Non-Duplicates All Reads")) { //ALL_READS level for case without duplicates + if (summary.ACCUMULATION_LEVEL.equals("All Reads") && summary.READS_USED.equals("UNIQUE")) { //ALL_READS level for case without duplicates Assert.assertEquals(summary.TOTAL_CLUSTERS, 3); Assert.assertEquals(summary.ALIGNED_READS, 3); Assert.assertEquals(summary.AT_DROPOUT, 79.180328); @@ -319,7 +319,7 @@ public void runNonDupsComparisonTest() throws IOException { Assert.assertEquals(summary.GC_NC_60_79, 0.0); Assert.assertEquals(summary.GC_NC_80_100, 0.0); } - if (summary.ACCUMULATION_LEVEL.equals("All Reads")) { //ALL_READS level + if (summary.ACCUMULATION_LEVEL.equals("All Reads") && summary.READS_USED.equals("ALL")) { //ALL_READS level Assert.assertEquals(summary.TOTAL_CLUSTERS, 5); Assert.assertEquals(summary.ALIGNED_READS, 5); Assert.assertEquals(summary.AT_DROPOUT, 79.180328); From 4df16dbb1d30707af191cd9893b55c43a52ffd13 Mon Sep 17 00:00:00 2001 From: Ekaterina Kazachkova Date: Mon, 21 Nov 2016 20:34:03 +0300 Subject: [PATCH 7/7] fix for constants in CollectGcBiasMetricsTest and CollectMultipleMetricsTest --- src/main/java/picard/analysis/GcBiasMetricsCollector.java | 12 ++++++------ src/test/java/picard/analysis/CollectGcBiasMetricsTest.java | 11 ++++++----- .../java/picard/analysis/CollectMultipleMetricsTest.java | 3 ++- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/main/java/picard/analysis/GcBiasMetricsCollector.java b/src/main/java/picard/analysis/GcBiasMetricsCollector.java index 69e7e4bb4..17454b71b 100644 --- a/src/main/java/picard/analysis/GcBiasMetricsCollector.java +++ b/src/main/java/picard/analysis/GcBiasMetricsCollector.java @@ -105,12 +105,12 @@ protected GcBiasCollectorArgs makeArg(final SAMRecord rec, final ReferenceSequen private final String library; private final String readGroup; private static final String allReads = "All_Reads"; - private final static String ACCUMULATION_LEVEL_ALL_READS = "All Reads"; - private final static String ACCUMULATION_LEVEL_LIBRARY = "Library"; - private final static String ACCUMULATION_LEVEL_SAMPLE = "Sample"; - private final static String ACCUMULATION_LEVEL_READ_GROUP = "Read Group"; - private final static String READS_USED_ALL = "ALL"; - private final static String READS_USED_UNIQUE = "UNIQUE"; + final static String ACCUMULATION_LEVEL_ALL_READS = "All Reads"; + final static String ACCUMULATION_LEVEL_LIBRARY = "Library"; + final static String ACCUMULATION_LEVEL_SAMPLE = "Sample"; + final static String ACCUMULATION_LEVEL_READ_GROUP = "Read Group"; + final static String READS_USED_ALL = "ALL"; + final static String READS_USED_UNIQUE = "UNIQUE"; private int logCounter; ///////////////////////////////////////////////////////////////////////////// diff --git a/src/test/java/picard/analysis/CollectGcBiasMetricsTest.java b/src/test/java/picard/analysis/CollectGcBiasMetricsTest.java index d68877a6b..51ed42436 100644 --- a/src/test/java/picard/analysis/CollectGcBiasMetricsTest.java +++ b/src/test/java/picard/analysis/CollectGcBiasMetricsTest.java @@ -14,6 +14,7 @@ import org.testng.annotations.Test; import picard.cmdline.CommandLineProgramTest; import picard.sam.SortSam; +import static picard.analysis.GcBiasMetricsCollector.PerUnitGcBiasMetricsCollector.*; import java.io.File; import java.io.FileReader; @@ -123,7 +124,7 @@ public void runGcBiasMultiLevelTest() throws IOException { output.read(new FileReader(outfile)); for (final GcBiasSummaryMetrics metrics : output.getMetrics()) { - if (metrics.ACCUMULATION_LEVEL.equals("All Reads")) { //ALL_READS level + if (metrics.ACCUMULATION_LEVEL.equals(ACCUMULATION_LEVEL_ALL_READS)) { //ALL_READS level Assert.assertEquals(metrics.TOTAL_CLUSTERS, 300); Assert.assertEquals(metrics.ALIGNED_READS, 600); Assert.assertEquals(metrics.AT_DROPOUT, 21.624498); @@ -220,7 +221,7 @@ public void runWindowsComparisonTest() throws IOException { //Output for the two sam files are only the same for the "All Reads" level for (final GcBiasDetailMetrics metrics : outputAllChrDetails.getMetrics()) { - if (metrics.ACCUMULATION_LEVEL.equals("All Reads")) { + if (metrics.ACCUMULATION_LEVEL.equals(ACCUMULATION_LEVEL_ALL_READS)) { Assert.assertEquals(metrics.WINDOWS, details.get(i).WINDOWS); i++; } @@ -308,7 +309,7 @@ public void runNonDupsComparisonTest() throws IOException { outputSummary.read(new FileReader(summaryOutfile)); for (final GcBiasSummaryMetrics summary : outputSummary.getMetrics()) { - if (summary.ACCUMULATION_LEVEL.equals("All Reads") && summary.READS_USED.equals("UNIQUE")) { //ALL_READS level for case without duplicates + if (summary.ACCUMULATION_LEVEL.equals(ACCUMULATION_LEVEL_ALL_READS) && summary.READS_USED.equals(READS_USED_UNIQUE)) { //ALL_READS level for case without duplicates Assert.assertEquals(summary.TOTAL_CLUSTERS, 3); Assert.assertEquals(summary.ALIGNED_READS, 3); Assert.assertEquals(summary.AT_DROPOUT, 79.180328); @@ -319,7 +320,7 @@ public void runNonDupsComparisonTest() throws IOException { Assert.assertEquals(summary.GC_NC_60_79, 0.0); Assert.assertEquals(summary.GC_NC_80_100, 0.0); } - if (summary.ACCUMULATION_LEVEL.equals("All Reads") && summary.READS_USED.equals("ALL")) { //ALL_READS level + if (summary.ACCUMULATION_LEVEL.equals(ACCUMULATION_LEVEL_ALL_READS) && summary.READS_USED.equals(READS_USED_ALL)) { //ALL_READS level Assert.assertEquals(summary.TOTAL_CLUSTERS, 5); Assert.assertEquals(summary.ALIGNED_READS, 5); Assert.assertEquals(summary.AT_DROPOUT, 79.180328); @@ -350,7 +351,7 @@ public void runCheckingNoSEQTest() throws IOException { output.read(new FileReader(summaryOutfile)); for (final GcBiasSummaryMetrics metrics : output.getMetrics()) { - if (metrics.ACCUMULATION_LEVEL.equals("All Reads")) { //ALL_READS level + if (metrics.ACCUMULATION_LEVEL.equals(ACCUMULATION_LEVEL_ALL_READS)) { //ALL_READS level Assert.assertEquals(metrics.TOTAL_CLUSTERS, 0); Assert.assertEquals(metrics.ALIGNED_READS, 1); Assert.assertEquals(metrics.AT_DROPOUT, 78.682453); diff --git a/src/test/java/picard/analysis/CollectMultipleMetricsTest.java b/src/test/java/picard/analysis/CollectMultipleMetricsTest.java index 517e41ec6..b37611232 100644 --- a/src/test/java/picard/analysis/CollectMultipleMetricsTest.java +++ b/src/test/java/picard/analysis/CollectMultipleMetricsTest.java @@ -14,6 +14,7 @@ import org.testng.annotations.Test; import picard.cmdline.CommandLineProgramTest; import picard.sam.SortSam; +import static picard.analysis.GcBiasMetricsCollector.PerUnitGcBiasMetricsCollector.*; import java.io.File; import java.io.FileInputStream; @@ -287,7 +288,7 @@ public void runGcTest(final File input) throws IOException { output.read(new FileReader(outfile + ".gc_bias.summary_metrics")); for (final GcBiasSummaryMetrics metrics : output.getMetrics()) { - if (metrics.ACCUMULATION_LEVEL.equals("All Reads")) { //ALL_READS level + if (metrics.ACCUMULATION_LEVEL.equals(ACCUMULATION_LEVEL_ALL_READS)) { //ALL_READS level Assert.assertEquals(metrics.TOTAL_CLUSTERS, 300); Assert.assertEquals(metrics.ALIGNED_READS, 600); Assert.assertEquals(metrics.AT_DROPOUT, 7.234062);