diff --git a/src/main/java/picard/analysis/CollectGcBiasMetrics.java b/src/main/java/picard/analysis/CollectGcBiasMetrics.java index a0ca80e3a..30ed75677 100644 --- a/src/main/java/picard/analysis/CollectGcBiasMetrics.java +++ b/src/main/java/picard/analysis/CollectGcBiasMetrics.java @@ -34,6 +34,7 @@ import picard.cmdline.Option; import picard.cmdline.programgroups.Metrics; import picard.metrics.GcBiasMetrics; +import picard.metrics.MultiLevelCollector; import picard.util.RExecutor; import java.io.File; @@ -133,6 +134,10 @@ @Option(shortName = "LEVEL", doc = "The level(s) at which to accumulate metrics.") public Set METRIC_ACCUMULATION_LEVEL = CollectionUtil.makeSet(MetricAccumulationLevel.ALL_READS); + @Option(shortName = "ALSO_IGNORE_DUPLICATES", doc = "Use to get additional results without duplicates. This option " + + "allows to gain two plots per level at the same time: one is the usual one and the other excludes duplicates.") + public boolean ALSO_IGNORE_DUPLICATES = false; + // Calculates GcBiasMetrics for all METRIC_ACCUMULATION_LEVELs provided private GcBiasMetricsCollector multiCollector; @@ -162,7 +167,7 @@ protected void setup(final SAMFileHeader header, final File samFile) { final int[] windowsByGc = GcBiasUtils.calculateRefWindowsByGc(BINS, REFERENCE_SEQUENCE, SCAN_WINDOW_SIZE); //Delegate actual collection to GcBiasMetricCollector - multiCollector = new GcBiasMetricsCollector(METRIC_ACCUMULATION_LEVEL, windowsByGc, header.getReadGroups(), SCAN_WINDOW_SIZE, IS_BISULFITE_SEQUENCED); + multiCollector = new GcBiasMetricsCollector(METRIC_ACCUMULATION_LEVEL, windowsByGc, header.getReadGroups(), SCAN_WINDOW_SIZE, IS_BISULFITE_SEQUENCED, ALSO_IGNORE_DUPLICATES); } //////////////////////////////////////////////////////////////////////////// @@ -179,14 +184,18 @@ protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) { @Override protected void finish() { multiCollector.finish(); + writeResultsToFiles(); + } + + private void writeResultsToFiles() { final MetricsFile file = getMetricsFile(); final MetricsFile detailMetricsFile = getMetricsFile(); final MetricsFile summaryMetricsFile = getMetricsFile(); multiCollector.addAllLevelsToFile(file); final List gcBiasMetricsList = file.getMetrics(); - for(final GcBiasMetrics gcbm : gcBiasMetricsList){ + for (final GcBiasMetrics gcbm : gcBiasMetricsList) { final List gcDetailList = gcbm.DETAILS.getMetrics(); - for(final GcBiasDetailMetrics d : gcDetailList) { + for (final GcBiasDetailMetrics d : gcDetailList) { detailMetricsFile.addMetric(d); } summaryMetricsFile.addMetric(gcbm.SUMMARY); diff --git a/src/main/java/picard/analysis/CollectMultipleMetrics.java b/src/main/java/picard/analysis/CollectMultipleMetrics.java index 3c5cf6ab8..8757b6392 100644 --- a/src/main/java/picard/analysis/CollectMultipleMetrics.java +++ b/src/main/java/picard/analysis/CollectMultipleMetrics.java @@ -241,6 +241,7 @@ public SinglePassSamProgram makeInstance(final String outbase, final String oute program.MINIMUM_GENOME_FRACTION = 1.0E-5; program.IS_BISULFITE_SEQUENCED = false; program.ASSUME_SORTED = false; + program.ALSO_IGNORE_DUPLICATES = false; //GC_Bias actually uses the class-level REFERENCE_SEQUENCE variable. program.REFERENCE_SEQUENCE = reference; diff --git a/src/main/java/picard/analysis/GcBiasDetailMetrics.java b/src/main/java/picard/analysis/GcBiasDetailMetrics.java index 422886b4f..de8af4a93 100644 --- a/src/main/java/picard/analysis/GcBiasDetailMetrics.java +++ b/src/main/java/picard/analysis/GcBiasDetailMetrics.java @@ -34,6 +34,8 @@ */ public class GcBiasDetailMetrics extends MultilevelMetrics { public String ACCUMULATION_LEVEL; + /** This option is used to mark including or excluding duplicates. */ + public String READS_USED; /** The G+C content of the reference sequence represented by this bin. Values are from 0% to 100% */ public int GC; diff --git a/src/main/java/picard/analysis/GcBiasMetricsCollector.java b/src/main/java/picard/analysis/GcBiasMetricsCollector.java index e372da632..17454b71b 100644 --- a/src/main/java/picard/analysis/GcBiasMetricsCollector.java +++ b/src/main/java/picard/analysis/GcBiasMetricsCollector.java @@ -28,6 +28,7 @@ import htsjdk.samtools.SAMRecord; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.reference.ReferenceSequence; +import htsjdk.samtools.util.Log; import htsjdk.samtools.util.QualityUtil; import htsjdk.samtools.util.SequenceUtil; import htsjdk.samtools.util.StringUtil; @@ -51,19 +52,31 @@ private final boolean bisulfite; private int[] windowsByGc = new int[BINS]; private static final int BINS = 101; + //Use to calculate additional results without duplicates + private boolean ignoreDuplicates; //will hold the relevant gc information per contig private byte [] gc = null; private int referenceIndex = -1; private byte [] refBases = null; + private static final Log log = Log.getInstance(GcBiasMetricsCollector.class); public GcBiasMetricsCollector(final Set accumulationLevels, final int[] windowsByGc, - final List samRgRecords, final int scanWindowSize, final boolean bisulfite) { + final List samRgRecords, final int scanWindowSize, + final boolean bisulfite) { + this(accumulationLevels, windowsByGc, samRgRecords, scanWindowSize, bisulfite, false); + } + + public GcBiasMetricsCollector(final Set accumulationLevels, final int[] windowsByGc, + final List samRgRecords, final int scanWindowSize, + final boolean bisulfite, final boolean ignoreDuplicates) { this.scanWindowSize = scanWindowSize; this.bisulfite = bisulfite; this.windowsByGc = windowsByGc; + this.ignoreDuplicates = ignoreDuplicates; setup(accumulationLevels, samRgRecords); } + ///////////////////////////////////////////////////////////////////////////// // This method is called once Per samRecord ///////////////////////////////////////////////////////////////////////////// @@ -80,19 +93,25 @@ protected GcBiasCollectorArgs makeArg(final SAMRecord rec, final ReferenceSequen return new PerUnitGcBiasMetricsCollector(sample, library, readGroup); } - @Override - public void acceptRecord(final SAMRecord rec, final ReferenceSequence ref) {super.acceptRecord(rec, ref);} - ///////////////////////////////////////////////////////////////////////////// //A collector for individual GcBiasMetrics for a given SAMPLE or SAMPLE/LIBRARY //or SAMPLE/LIBRARY/READ_GROUP (depending on aggregation levels) ///////////////////////////////////////////////////////////////////////////// public class PerUnitGcBiasMetricsCollector implements PerUnitMetricCollector { - Map gcData = new HashMap(); + private final Map gcData; + // Additional object to store data without duplicates (null if option ALSO_IGNORE_DUPLICATES is not specified) + private Map gcDataNonDups; private final String sample; private final String library; private final String readGroup; private static final String allReads = "All_Reads"; + final static String ACCUMULATION_LEVEL_ALL_READS = "All Reads"; + final static String ACCUMULATION_LEVEL_LIBRARY = "Library"; + final static String ACCUMULATION_LEVEL_SAMPLE = "Sample"; + final static String ACCUMULATION_LEVEL_READ_GROUP = "Read Group"; + final static String READS_USED_ALL = "ALL"; + final static String READS_USED_UNIQUE = "UNIQUE"; + private int logCounter; ///////////////////////////////////////////////////////////////////////////// //Records the accumulation level for each level of collection and initializes @@ -102,19 +121,9 @@ public PerUnitGcBiasMetricsCollector(final String sample, final String library, this.sample = sample; this.library = library; this.readGroup = readGroup; - final String prefix; - if (this.readGroup != null) { - prefix = this.readGroup; - gcData.put(prefix, new GcObject()); - } else if (this.library != null) { - prefix = this.library; - gcData.put(prefix, new GcObject()); - } else if (this.sample != null) { - prefix = this.sample; - gcData.put(prefix, new GcObject()); - } else { - prefix = allReads; - gcData.put(prefix, new GcObject()); + this.gcData = prepareGcData(); + if (ignoreDuplicates) { + this.gcDataNonDups = prepareGcData(); } } @@ -122,50 +131,97 @@ public PerUnitGcBiasMetricsCollector(final String sample, final String library, //Takes each record and sends them to addRead to calculate gc metrics for // that read for each accumulation level ///////////////////////////////////////////////////////////////////////////// + @Override public void acceptRecord(final GcBiasCollectorArgs args) { final SAMRecord rec = args.getRec(); - final String type; + if (logCounter < 100 && rec.getReadBases().length == 0) { + log.warn("Omitting read " + rec.getReadName() + " with '*' in SEQ field."); + if (++logCounter == 100) { + log.warn("There are more than 100 reads with '*' in SEQ field in file."); + } + return; + } if (!rec.getReadUnmappedFlag()) { - if(referenceIndex != rec.getReferenceIndex() || gc == null){ + if (referenceIndex != rec.getReferenceIndex() || gc == null) { final ReferenceSequence ref = args.getRef(); refBases = ref.getBases(); StringUtil.toUpperCase(refBases); final int refLength = refBases.length; final int lastWindowStart = refLength - scanWindowSize; gc = GcBiasUtils.calculateAllGcs(refBases, lastWindowStart, scanWindowSize); - referenceIndex=rec.getReferenceIndex(); + referenceIndex = rec.getReferenceIndex(); } - final String group; - if (this.readGroup != null) { - type = this.readGroup; - group = "Read Group"; - addRead(gcData.get(type), rec, group, gc, refBases); - } else if (this.library != null) { - type = this.library; - group = "Library"; - addRead(gcData.get(type), rec, group, gc, refBases); - } else if (this.sample != null) { - type = this.sample; - group = "Sample"; - addRead(gcData.get(type), rec, group, gc, refBases); - } else { - type = allReads; - group = "All Reads"; - addRead(gcData.get(type), rec, group, gc, refBases); + addReadToGcData(rec, this.gcData); + if (ignoreDuplicates && !rec.getDuplicateReadFlag()) { + addReadToGcData(rec, this.gcDataNonDups); } - } - else { - for (final Map.Entry entry : gcData.entrySet()) { - final GcObject gcCur = entry.getValue(); - if (!rec.getReadPairedFlag() || rec.getFirstOfPairFlag()) ++gcCur.totalClusters; + } else { + updateTotalClusters(rec, this.gcData); + if (ignoreDuplicates && !rec.getDuplicateReadFlag()) { + updateTotalClusters(rec, this.gcDataNonDups); } } } + @Override public void finish() {} ///////////////////////////////////////////////////////////////////////////// + //Called to add metrics to the output file for each level of collection + // these metrics are used for graphing gc bias in R script + ///////////////////////////////////////////////////////////////////////////// + @Override + public void addMetricsToFile(final MetricsFile file) { + addGcDataToFile(file, this.gcData, true); + if (ignoreDuplicates) { + addGcDataToFile(file, this.gcDataNonDups, false); + } + } + + private void updateTotalClusters(final SAMRecord rec, final Map gcData) { + for (final Map.Entry entry : gcData.entrySet()) { + final GcObject gcCur = entry.getValue(); + if (!rec.getReadPairedFlag() || rec.getFirstOfPairFlag()) ++gcCur.totalClusters; + } + } + + private Map prepareGcData() { + final String prefix; + final Map gcData = new HashMap<>(); + if (this.readGroup != null) { + prefix = this.readGroup; + } else if (this.library != null) { + prefix = this.library; + } else if (this.sample != null) { + prefix = this.sample; + } else { + prefix = allReads; + } + gcData.put(prefix, new GcObject()); + return gcData; + } + + private void addReadToGcData(final SAMRecord rec, final Map gcData) { + final String type; + String group; + if (this.readGroup != null) { + type = this.readGroup; + group = ACCUMULATION_LEVEL_READ_GROUP; + } else if (this.library != null) { + type = this.library; + group = ACCUMULATION_LEVEL_LIBRARY; + } else if (this.sample != null) { + type = this.sample; + group = ACCUMULATION_LEVEL_SAMPLE; + } else { + type = allReads; + group = ACCUMULATION_LEVEL_ALL_READS; + } + addRead(gcData.get(type), rec, group, gc, refBases); + } + + ///////////////////////////////////////////////////////////////////////////// // Sums the values in an int[]. ///////////////////////////////////////////////////////////////////////////// private double sum(final int[] values) { @@ -178,11 +234,8 @@ private double sum(final int[] values) { return total; } - ///////////////////////////////////////////////////////////////////////////// - //Called to add metrics to the output file for each level of collection - // these metrics are used for graphing gc bias in R script - ///////////////////////////////////////////////////////////////////////////// - public void addMetricsToFile(final MetricsFile file) { + private void addGcDataToFile(final MetricsFile file, final Map gcData, + final boolean includeDuplicates) { for (final Map.Entry entry : gcData.entrySet()) { final GcObject gcCur = entry.getValue(); final String gcType = entry.getKey(); @@ -190,7 +243,7 @@ public void addMetricsToFile(final MetricsFile file) { final int[] readsByGc = gcCur.readsByGc; final long[] errorsByGc = gcCur.errorsByGc; final long[] basesByGc = gcCur.basesByGc; - final int totalClusters = gcCur.totalClusters; + final long totalClusters = gcCur.totalClusters; final long totalAlignedReads = gcCur.totalAlignedReads; final String group = gcCur.group; @@ -217,18 +270,22 @@ public void addMetricsToFile(final MetricsFile file) { detail.ERROR_BAR_WIDTH = 0; } detail.ACCUMULATION_LEVEL = group; - if (group.equals("Read Group")) {detail.READ_GROUP = gcType;} - else if (group.equals("Sample")) {detail.SAMPLE = gcType;} - else if (group.equals("Library")) {detail.LIBRARY = gcType;} + if (group.equals(ACCUMULATION_LEVEL_READ_GROUP)) {detail.READ_GROUP = gcType;} + else if (group.equals(ACCUMULATION_LEVEL_SAMPLE)) {detail.SAMPLE = gcType;} + else if (group.equals(ACCUMULATION_LEVEL_LIBRARY)) {detail.LIBRARY = gcType;} + + detail.READS_USED = includeDuplicates ? READS_USED_ALL : READS_USED_UNIQUE; metrics.DETAILS.addMetric(detail); } // Synthesize the high level summary metrics final GcBiasSummaryMetrics summary = new GcBiasSummaryMetrics(); - if (group.equals("Read Group")) {summary.READ_GROUP = gcType;} - else if (group.equals("Sample")) {summary.SAMPLE = gcType;} - else if (group.equals("Library")) {summary.LIBRARY = gcType;} + if (group.equals(ACCUMULATION_LEVEL_READ_GROUP)) {summary.READ_GROUP = gcType;} + else if (group.equals(ACCUMULATION_LEVEL_SAMPLE)) {summary.SAMPLE = gcType;} + else if (group.equals(ACCUMULATION_LEVEL_LIBRARY)) {summary.LIBRARY = gcType;} + + summary.READS_USED = includeDuplicates ? READS_USED_ALL : READS_USED_UNIQUE; summary.ACCUMULATION_LEVEL = group; summary.WINDOW_SIZE = scanWindowSize; @@ -240,7 +297,6 @@ public void addMetricsToFile(final MetricsFile file) { summary.GC_NC_60_79 = calculateGcNormCoverage(meanReadsPerWindow, readsByGc, 60, 79); summary.GC_NC_80_100 = calculateGcNormCoverage(meanReadsPerWindow, readsByGc, 80, 100); - calculateDropoutMetrics(metrics.DETAILS.getMetrics(), summary); metrics.SUMMARY = summary; @@ -254,7 +310,8 @@ public void addMetricsToFile(final MetricsFile file) { ///////////////////////////////////////////////////////////////////////////// // Calculates the normalized coverage over a given gc content region ///////////////////////////////////////////////////////////////////////////// - private double calculateGcNormCoverage(final double meanReadsPerWindow, final int[] readsByGc, final int start, final int end) { + private double calculateGcNormCoverage(final double meanReadsPerWindow, final int[] readsByGc, + final int start, final int end) { int windowsTotal = 0; double sum = 0.0; for (int i = start; i <= end; i++) { @@ -266,9 +323,8 @@ private double calculateGcNormCoverage(final double meanReadsPerWindow, final in if (windowsTotal == 0) { return 0.0; - } - else { - return (sum / (windowsTotal*meanReadsPerWindow)); + } else { + return (sum / (windowsTotal * meanReadsPerWindow)); } } @@ -308,7 +364,7 @@ private void calculateDropoutMetrics(final Collection detai //Keeps track of each level of GcCalculation ///////////////////////////////////////////////////////////////////////////// class GcObject { - int totalClusters = 0; + long totalClusters = 0; long totalAlignedReads = 0; int[] readsByGc = new int[BINS]; long[] basesByGc = new long[BINS]; diff --git a/src/main/java/picard/analysis/GcBiasSummaryMetrics.java b/src/main/java/picard/analysis/GcBiasSummaryMetrics.java index 0808776a3..c8cf4fd94 100644 --- a/src/main/java/picard/analysis/GcBiasSummaryMetrics.java +++ b/src/main/java/picard/analysis/GcBiasSummaryMetrics.java @@ -34,11 +34,14 @@ public class GcBiasSummaryMetrics extends MultilevelMetrics { public String ACCUMULATION_LEVEL; + /** This option is used to mark including or excluding duplicates. */ + public String READS_USED; + /** The window size on the genome used to calculate the GC of the sequence. */ public int WINDOW_SIZE; /** The total number of clusters that were seen in the gc bias calculation. */ - public int TOTAL_CLUSTERS; + public long TOTAL_CLUSTERS; /** The total number of aligned reads used to compute the gc bias metrics. */ public long ALIGNED_READS; diff --git a/src/main/resources/picard/analysis/gcBias.R b/src/main/resources/picard/analysis/gcBias.R index 503e73751..2dcb39746 100644 --- a/src/main/resources/picard/analysis/gcBias.R +++ b/src/main/resources/picard/analysis/gcBias.R @@ -69,6 +69,7 @@ for (k in 1:(num.plots)){ if(accLevel == "Sample"){datasetName <- summaryMetrics[k, "SAMPLE"]} if(accLevel == "Library"){datasetName <- summaryMetrics[k, "LIBRARY"]} else(datasetName <- summaryMetrics[k, "READ_GROUP"])} + duplicatesMarker <- ifelse(summaryMetrics[k, "READS_USED"] == "ALL", "", " (duplicates excluded) "); subtitle = cat("Total clusters: ",summaryMetrics[k,"TOTAL_CLUSTERS"],", Aligned reads: ",summaryMetrics[k, "ALIGNED_READS"]); # Do the main plot of the normalized coverage by GC plot(type="p", x=metrics$GC, y=metrics$NORMALIZED_COVERAGE, @@ -77,7 +78,7 @@ for (k in 1:(num.plots)){ xlim=c(0,100), ylim=c(0, Y_AXIS_LIM), col=COLORS[1], - main=paste(accLevel, "Level:", datasetName, "GC Bias Plot", "\n", subtitle) + main=paste(accLevel, "Level:", datasetName, duplicatesMarker, "GC Bias Plot", "\n", subtitle) ); # Add lines at the 50% GC and coverage=1 diff --git a/src/test/java/picard/analysis/CollectGcBiasMetricsTest.java b/src/test/java/picard/analysis/CollectGcBiasMetricsTest.java index 16b405fc4..51ed42436 100644 --- a/src/test/java/picard/analysis/CollectGcBiasMetricsTest.java +++ b/src/test/java/picard/analysis/CollectGcBiasMetricsTest.java @@ -14,6 +14,7 @@ import org.testng.annotations.Test; import picard.cmdline.CommandLineProgramTest; import picard.sam.SortSam; +import static picard.analysis.GcBiasMetricsCollector.PerUnitGcBiasMetricsCollector.*; import java.io.File; import java.io.FileReader; @@ -41,6 +42,8 @@ private final static File TEST_DIR = new File("testdata/picard/sam/CollectGcBiasMetrics/"); private final File dict = new File(TEST_DIR, "MNOheader.dict"); + private final String REFERENCE_FILE_1 = "testdata/picard/metrics/chrMNO.reference.fasta"; + private final String REFERENCE_FILE_2 = "testdata/picard/metrics/chrM.reference.fasta"; File tempSamFileChrM_O; File tempSamFileAllChr; @@ -115,13 +118,13 @@ public void runGcBiasMultiLevelTest() throws IOException { outfile.deleteOnExit(); detailsOutfile.deleteOnExit(); - runGcBias(tempSamFileChrM_O, outfile, detailsOutfile); + runGcBias(tempSamFileChrM_O, REFERENCE_FILE_1, outfile, detailsOutfile, false); final MetricsFile> output = new MetricsFile>(); output.read(new FileReader(outfile)); for (final GcBiasSummaryMetrics metrics : output.getMetrics()) { - if (metrics.ACCUMULATION_LEVEL.equals("All Reads")) { //ALL_READS level + if (metrics.ACCUMULATION_LEVEL.equals(ACCUMULATION_LEVEL_ALL_READS)) { //ALL_READS level Assert.assertEquals(metrics.TOTAL_CLUSTERS, 300); Assert.assertEquals(metrics.ALIGNED_READS, 600); Assert.assertEquals(metrics.AT_DROPOUT, 21.624498); @@ -204,8 +207,8 @@ public void runWindowsComparisonTest() throws IOException { detailsOutfile.deleteOnExit(); allChrDetailsOutfile.deleteOnExit(); - runGcBias(tempSamFileChrM_O, outfile, detailsOutfile); - runGcBias(tempSamFileAllChr, allChrOutFile, allChrDetailsOutfile); + runGcBias(tempSamFileChrM_O, REFERENCE_FILE_1, outfile, detailsOutfile, false); + runGcBias(tempSamFileAllChr, REFERENCE_FILE_1, allChrOutFile, allChrDetailsOutfile, false); final MetricsFile> outputDetails = new MetricsFile>(); outputDetails.read(new FileReader(detailsOutfile)); @@ -218,7 +221,7 @@ public void runWindowsComparisonTest() throws IOException { //Output for the two sam files are only the same for the "All Reads" level for (final GcBiasDetailMetrics metrics : outputAllChrDetails.getMetrics()) { - if (metrics.ACCUMULATION_LEVEL.equals("All Reads")) { + if (metrics.ACCUMULATION_LEVEL.equals(ACCUMULATION_LEVEL_ALL_READS)) { Assert.assertEquals(metrics.WINDOWS, details.get(i).WINDOWS); i++; } @@ -238,7 +241,7 @@ public File build (final List setBuilder, final File unsort final SAMFileWriter writer = new SAMFileWriterFactory() .setCreateIndex(true).makeBAMWriter(header, false, unsortedSam); - for( final SAMRecordSetBuilder subSetBuilder : setBuilder){ + for (final SAMRecordSetBuilder subSetBuilder : setBuilder) { for (final SAMRecord record : subSetBuilder) { writer.addAlignment(record); } @@ -260,8 +263,8 @@ public File build (final List setBuilder, final File unsort ///////////////////////////////////////////////////////////////////////////// // Runs CollectGcBias with input Sam file and outputs details and summary files for truth assertion. ///////////////////////////////////////////////////////////////////////////// - public void runGcBias (final File input, final File outfile, final File detailsOutfile) throws IOException { - final String referenceFile = "testdata/picard/metrics/chrMNO.reference.fasta"; + public void runGcBias (final File input, final String referenceFile, final File summaryOutfile, final File detailsOutfile, + final boolean nonDups) throws IOException { final File pdf = File.createTempFile("test", ".pdf"); pdf.deleteOnExit(); @@ -274,7 +277,7 @@ public void runGcBias (final File input, final File outfile, final File detailsO "INPUT=" + input.getAbsolutePath(), "OUTPUT=" + detailsOutfile.getAbsolutePath(), "REFERENCE_SEQUENCE=" + referenceFile, - "SUMMARY_OUTPUT=" + outfile.getAbsolutePath(), + "SUMMARY_OUTPUT=" + summaryOutfile.getAbsolutePath(), "CHART_OUTPUT=" + pdf.getAbsolutePath(), "SCAN_WINDOW_SIZE=" + windowSize, "MINIMUM_GENOME_FRACTION=" + minGenFraction, @@ -282,9 +285,84 @@ public void runGcBias (final File input, final File outfile, final File detailsO "LEVEL=ALL_READS", "LEVEL=SAMPLE", "LEVEL=READ_GROUP", - "ASSUME_SORTED=" + assumeSorted + "ASSUME_SORTED=" + assumeSorted, + "ALSO_IGNORE_DUPLICATES=" + nonDups }; - Assert.assertEquals(runPicardCommandLine(args), 0); + runPicardCommandLine(args); + } + + /** + * Compares metric's results by summary files without duplicates. + * @throws IOException + */ + @Test + public void runNonDupsComparisonTest() throws IOException { + final File inputFileWithDuplicates = new File("testdata/picard/metrics/chrMReads.sam"); + final File detailsOutfile = File.createTempFile("test", ".gc_bias_detail_metrics"); + final File summaryOutfile = File.createTempFile("test", ".gc_bias_summary_metrics"); + detailsOutfile.deleteOnExit(); + summaryOutfile.deleteOnExit(); + + runGcBias(inputFileWithDuplicates, REFERENCE_FILE_2, summaryOutfile, detailsOutfile, true); + + final MetricsFile> outputSummary = new MetricsFile<>(); + outputSummary.read(new FileReader(summaryOutfile)); + + for (final GcBiasSummaryMetrics summary : outputSummary.getMetrics()) { + if (summary.ACCUMULATION_LEVEL.equals(ACCUMULATION_LEVEL_ALL_READS) && summary.READS_USED.equals(READS_USED_UNIQUE)) { //ALL_READS level for case without duplicates + Assert.assertEquals(summary.TOTAL_CLUSTERS, 3); + Assert.assertEquals(summary.ALIGNED_READS, 3); + Assert.assertEquals(summary.AT_DROPOUT, 79.180328); + Assert.assertEquals(summary.GC_DROPOUT, 12.28901); + Assert.assertEquals(summary.GC_NC_0_19, 0.0); + Assert.assertEquals(summary.GC_NC_20_39, 0.0); + Assert.assertEquals(summary.GC_NC_40_59, 1.246783); + Assert.assertEquals(summary.GC_NC_60_79, 0.0); + Assert.assertEquals(summary.GC_NC_80_100, 0.0); + } + if (summary.ACCUMULATION_LEVEL.equals(ACCUMULATION_LEVEL_ALL_READS) && summary.READS_USED.equals(READS_USED_ALL)) { //ALL_READS level + Assert.assertEquals(summary.TOTAL_CLUSTERS, 5); + Assert.assertEquals(summary.ALIGNED_READS, 5); + Assert.assertEquals(summary.AT_DROPOUT, 79.180328); + Assert.assertEquals(summary.GC_DROPOUT, 10.37037); + Assert.assertEquals(summary.GC_NC_0_19, 0.0); + Assert.assertEquals(summary.GC_NC_20_39, 0.0); + Assert.assertEquals(summary.GC_NC_40_59, 1.246783); + Assert.assertEquals(summary.GC_NC_60_79, 0.0); + Assert.assertEquals(summary.GC_NC_80_100, 0.0); + } + } + } + + /** + * If SAM/BAM file with '*' in SEQ field omit this read. + */ + @Test + public void runCheckingNoSEQTest() throws IOException { + final File input = new File("testdata/picard/metrics/chrM_NO_SEQ.sam"); + final File summaryOutfile = File.createTempFile("test", ".gc_bias.summary_metrics"); + final File detailsOutfile = File.createTempFile("test", ".gc_bias.detail_metrics"); + summaryOutfile.deleteOnExit(); + detailsOutfile.deleteOnExit(); + + runGcBias(input, REFERENCE_FILE_2, summaryOutfile, detailsOutfile, false); + + final MetricsFile> output = new MetricsFile<>(); + output.read(new FileReader(summaryOutfile)); + + for (final GcBiasSummaryMetrics metrics : output.getMetrics()) { + if (metrics.ACCUMULATION_LEVEL.equals(ACCUMULATION_LEVEL_ALL_READS)) { //ALL_READS level + Assert.assertEquals(metrics.TOTAL_CLUSTERS, 0); + Assert.assertEquals(metrics.ALIGNED_READS, 1); + Assert.assertEquals(metrics.AT_DROPOUT, 78.682453); + Assert.assertEquals(metrics.GC_DROPOUT, 14.693382); + Assert.assertEquals(metrics.GC_NC_0_19, 0.0); + Assert.assertEquals(metrics.GC_NC_20_39, 0.0); + Assert.assertEquals(metrics.GC_NC_40_59, 1.246783); + Assert.assertEquals(metrics.GC_NC_60_79, 0.0); + Assert.assertEquals(metrics.GC_NC_80_100, 0.0); + } + } } ///////////////////////////////////////////////////////////////////////////// diff --git a/src/test/java/picard/analysis/CollectMultipleMetricsTest.java b/src/test/java/picard/analysis/CollectMultipleMetricsTest.java index 517e41ec6..b37611232 100644 --- a/src/test/java/picard/analysis/CollectMultipleMetricsTest.java +++ b/src/test/java/picard/analysis/CollectMultipleMetricsTest.java @@ -14,6 +14,7 @@ import org.testng.annotations.Test; import picard.cmdline.CommandLineProgramTest; import picard.sam.SortSam; +import static picard.analysis.GcBiasMetricsCollector.PerUnitGcBiasMetricsCollector.*; import java.io.File; import java.io.FileInputStream; @@ -287,7 +288,7 @@ public void runGcTest(final File input) throws IOException { output.read(new FileReader(outfile + ".gc_bias.summary_metrics")); for (final GcBiasSummaryMetrics metrics : output.getMetrics()) { - if (metrics.ACCUMULATION_LEVEL.equals("All Reads")) { //ALL_READS level + if (metrics.ACCUMULATION_LEVEL.equals(ACCUMULATION_LEVEL_ALL_READS)) { //ALL_READS level Assert.assertEquals(metrics.TOTAL_CLUSTERS, 300); Assert.assertEquals(metrics.ALIGNED_READS, 600); Assert.assertEquals(metrics.AT_DROPOUT, 7.234062); diff --git a/testdata/picard/metrics/chrM_NO_SEQ.sam b/testdata/picard/metrics/chrM_NO_SEQ.sam new file mode 100644 index 000000000..cae527e80 --- /dev/null +++ b/testdata/picard/metrics/chrM_NO_SEQ.sam @@ -0,0 +1,5 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chrM LN:16299 AS:mm9 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz M5:11c8af2a2528b25f2c080ab7da42edda SP:Mus musculus +@RG ID:62A40.2 PL:illumina PU:62A40AAXX101028.2 LB:Solexa-45345 DT:2010-10-28T00:00:00-0400 SM:RRBS885 CN:BI +NRUSCA-WDL30341:66:000000000-ABTGC:1:2111:13507:4770 417 chrM 14225 60 188H112M = 14225 197 CTTGACCTGTGCCGGGTACGGCGGCGGACCGGCGACTACCCGGAGGCTGGCGAGCTGGCCCAGCGGGCCTACGACCTCTACCACAGCCTCGAAAACCGCCTCCGCCAGGCCC DBFFDFFB>>):BFFBBBFFBFFF>B>>FB;F;36>B;??FFFFFF?(-122:?0>6:BB<>FFF