diff --git a/src/main/java/picard/vcf/CallingMetricAccumulator.java b/src/main/java/picard/vcf/CallingMetricAccumulator.java index 8841b4a54..9a439fdd6 100644 --- a/src/main/java/picard/vcf/CallingMetricAccumulator.java +++ b/src/main/java/picard/vcf/CallingMetricAccumulator.java @@ -109,7 +109,8 @@ public CallingMetricAccumulator(final DbSnpBitSetUtil.DbSnpBitSets dbsnp) { } public void setup(final VCFHeader vcfHeader) { - //noop. + //Use sampleMetricsMap.get in case a sample isn't ever put in the map (due to being all HomRef for example) + vcfHeader.getGenotypeSamples().stream().forEach(sampleName -> sampleMetricsMap.get(sampleName)); } /** Incorporates the provided variant's data into the metric analysis. */ diff --git a/src/test/java/picard/vcf/CollectVariantCallingMetricsTest.java b/src/test/java/picard/vcf/CollectVariantCallingMetricsTest.java index 66c3167b5..f252063ae 100644 --- a/src/test/java/picard/vcf/CollectVariantCallingMetricsTest.java +++ b/src/test/java/picard/vcf/CollectVariantCallingMetricsTest.java @@ -191,4 +191,35 @@ public void testMetricsTinyGVCF() throws IOException { Assert.assertEquals(detailMetrics.size(), 1, "Did not parse the expected number of detail metrics."); } + + @Test + public void testAllHomRefVCF() throws IOException { + final File dbSnpFile = new File(TEST_DATA_DIR, "mini.dbsnp.vcf"); + final File vcfFile = new File(TEST_DATA_DIR, "allHomRef.vcf"); + final File indexedVcfFile = VcfTestUtils.createTemporaryIndexedVcfFromInput(vcfFile, "allHomRef.tmp."); + final File outFile = new File(TEST_DATA_DIR, "vcmetrics_allHomRef"); + final File summaryFile = new File(outFile + ".variant_calling_summary_metrics"); + final File detailFile = new File(outFile + ".variant_calling_detail_metrics"); + + outFile.deleteOnExit(); + summaryFile.deleteOnExit(); + detailFile.deleteOnExit(); + + final CollectVariantCallingMetrics program = new CollectVariantCallingMetrics(); + program.INPUT = indexedVcfFile; + program.DBSNP = dbSnpFile; + program.OUTPUT = outFile; + Assert.assertEquals(program.doWork(), 0); + + final MetricsFile> detail = new MetricsFile<>(); + detail.read(new FileReader(detailFile)); + boolean seenSampleWithOnlyHomRefs = false; + for (final CollectVariantCallingMetrics.VariantCallingDetailMetrics metrics : detail.getMetrics()) { + if (metrics.SAMPLE_ALIAS.equals("HG00116")) { + seenSampleWithOnlyHomRefs = true; + Assert.assertEquals(metrics.TOTAL_SNPS, 0); + } + } + Assert.assertTrue(seenSampleWithOnlyHomRefs); + } } diff --git a/testdata/picard/vcf/allHomRef.vcf b/testdata/picard/vcf/allHomRef.vcf new file mode 100755 index 000000000..9d61e91f0 --- /dev/null +++ b/testdata/picard/vcf/allHomRef.vcf @@ -0,0 +1,147 @@ +##fileformat=VCFv4.1 +##ApplyRecalibration="analysis_type=ApplyRecalibration input_file=[] read_buffer_size=null phone_home=STANDARD gatk_key=null tag=NA read_filter=[] intervals=[/seq/tng/mccowan/VARIANT_CALLING_FAUXCELL/v1/VARIANT_CALLING_FAUXCELL.padded.interval_list] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta nonDeterministicRandomSeed=false disableRandomization=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 use_legacy_downsampler=false baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false performanceLog=null useOriginalQualities=false BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 defaultBaseQualities=-1 validation_strictness=SILENT remove_program_records=false keep_program_records=false unsafe=null num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false input=[(RodBinding name=input source=/seq/tng/mccowan/VARIANT_CALLING_FAUXCELL/v1/VARIANT_CALLING_FAUXCELL.snps.unfiltered.vcf)] recal_file=(RodBinding name=recal_file source=/seq/tng/mccowan/VARIANT_CALLING_FAUXCELL/v1/VARIANT_CALLING_FAUXCELL.snps.recal) tranches_file=/seq/tng/mccowan/VARIANT_CALLING_FAUXCELL/v1/VARIANT_CALLING_FAUXCELL.snps.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=98.5 ignore_filter=null mode=SNP filter_mismatching_base_and_quals=false" +##FILTER=200.0"> +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##OriginalSnpEffCmd="SnpEff eff -v -onlyCoding true -c /seq/references/Homo_sapiens_assembly19/v1/snpEff/Homo_sapiens_assembly19.snpEff.config -i vcf -o vcf GRCh37.64 /seq/tng/mccowan/VARIANT_CALLING_FAUXCELL/v1/VARIANT_CALLING_FAUXCELL.unannotated.vcf " +##OriginalSnpEffVersion="2.0.5 (build 2011-12-24), by Pablo Cingolani" +##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[/seq/tng/mccowan/VARIANT_CALLING_FAUXCELL/v1/VARIANT_CALLING_FAUXCELL.bam.list] read_buffer_size=null phone_home=STANDARD gatk_key=null tag=NA read_filter=[] intervals=[/seq/tng/mccowan/VARIANT_CALLING_FAUXCELL/v1/scatter/temp_0001_of_50/scattered.intervals] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta nonDeterministicRandomSeed=false disableRandomization=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=75 use_legacy_downsampler=false baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false performanceLog=null useOriginalQualities=false BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 defaultBaseQualities=-1 validation_strictness=SILENT remove_program_records=false keep_program_records=false unsafe=null num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false genotype_likelihoods_model=BOTH pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=ORIGINAL min_base_quality_score=17 max_deletion_fraction=0.05 min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indel_heterozygosity=1.25E-4 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false heterozygosity=0.0010 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=30.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 p_nonref_model=EXACT_INDEPENDENT contamination_fraction_to_filter=0.0 contamination_percentage_per_sample_file=/seq/tng/mccowan/VARIANT_CALLING_FAUXCELL/v1/alleleBiasedDownsamplingPerSample.txt logRemovedReadsFromContaminationFiltering=null exactcallslog=null dbsnp=(RodBinding name=dbsnp source=/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.dbsnp.vcf) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_mismatching_base_and_quals=false" +##VariantAnnotator="analysis_type=VariantAnnotator input_file=[] read_buffer_size=null phone_home=STANDARD gatk_key=null tag=NA read_filter=[] intervals=[/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=50 reference_sequence=/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta nonDeterministicRandomSeed=false disableRandomization=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 use_legacy_downsampler=false baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false performanceLog=null useOriginalQualities=false BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 defaultBaseQualities=-1 validation_strictness=SILENT remove_program_records=false keep_program_records=false unsafe=null num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=/seq/tng/mccowan/VARIANT_CALLING_FAUXCELL/v1/VARIANT_CALLING_FAUXCELL.unannotated.vcf) snpEffFile=(RodBinding name=snpEffFile source=/seq/tng/mccowan/VARIANT_CALLING_FAUXCELL/v1/VARIANT_CALLING_FAUXCELL.snpeff.vcf) dbsnp=(RodBinding name= source=UNBOUND) comp=[] resource=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub annotation=[SnpEff] excludeAnnotation=[] group=[] expression=[] useAllAnnotations=false list=false alwaysAppendDbsnpId=false MendelViolationGenotypeQualityThreshold=0.0 requireStrictAlleleMatch=false filter_mismatching_base_and_quals=false" +##VariantFiltration="analysis_type=VariantFiltration input_file=[] read_buffer_size=null phone_home=STANDARD gatk_key=null tag=NA read_filter=[] intervals=[/seq/tng/mccowan/VARIANT_CALLING_FAUXCELL/v1/VARIANT_CALLING_FAUXCELL.padded.interval_list] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta nonDeterministicRandomSeed=false disableRandomization=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 use_legacy_downsampler=false baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false performanceLog=null useOriginalQualities=false BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 defaultBaseQualities=-1 validation_strictness=SILENT remove_program_records=false keep_program_records=false unsafe=null num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=/seq/tng/mccowan/VARIANT_CALLING_FAUXCELL/v1/VARIANT_CALLING_FAUXCELL.indels.unfiltered.vcf) mask=(RodBinding name= source=UNBOUND) out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub filterExpression=[FS>200.0, QD<2.0, ReadPosRankSum<-20.0, InbreedingCoeff<-0.8] filterName=[Indel_FS, Indel_QD, Indel_ReadPosRankSum, Indel_InbreedingCoeff] genotypeFilterExpression=[] genotypeFilterName=[] clusterSize=3 clusterWindowSize=0 maskExtension=0 maskName=Mask missingValuesInExpressionsShouldEvaluateAsFailing=false invalidatePreviousFilters=false filter_mismatching_base_and_quals=false" +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##reference=file:///seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00116 HG00123 HG00158 HG00160 HG00265 HG00311 HG00371 HG00380 HG00404 HG00551 HG00610 HG00628 HG00635 HG01048 HG01079 HG01094 HG01247 HG01256 HG01260 HG01461 HG01488 NA06989 NA11918 NA11919 NA12341 NA18510 NA18520 NA18522 NA18574 NA18634 NA18867 NA18960 NA18986 NA19004 NA19067 NA19092 NA19102 NA19474 NA19675 NA19703 NA19711 NA19725 NA19819 NA19908 NA19920 NA20787 NA20798 NA20801 NA20803 NA20805 +1 69270 . A G 569.98 PASS AC=23;AF=0.767;AN=30;BaseQRankSum=-2.920;DP=4121;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;InbreedingCoeff=0.3609;MLEAC=24;MLEAF=0.800;MQ=3.91;MQ0=1518;MQRankSum=0.523;QD=0.55;ReadPosRankSum=-0.861;SNPEFF_AMINO_ACID_CHANGE=S108;SNPEFF_CODON_CHANGE=tcA/tcG;SNPEFF_EFFECT=SYNONYMOUS_CODING;SNPEFF_EXON_ID=exon_1_69037_69829;SNPEFF_FUNCTIONAL_CLASS=SILENT;SNPEFF_GENE_BIOTYPE=protein_coding;SNPEFF_GENE_NAME=OR4F5;SNPEFF_IMPACT=LOW;SNPEFF_TRANSCRIPT_ID=ENST00000534990;VQSLOD=-8.590e+00;culprit=MQ GT:AD:DP:GQ:PL 0/0:60,18:75:9:0,9,89 ./. ./. ./. ./. ./. ./. ./. ./. ./. 1/1:51,23:71:3:24,3,0 ./. ./. ./. 1/1:59,15:72:3:33,3,0 ./. ./. ./. ./. ./. 0/0:2,0:2:6:0,6,45 0/0:66,8:71:3:0,3,24 ./. ./. ./. 0/0:75,0:75:9:0,9,81 1/1:183,17:197:9:82,9,0 0/1:56,19:63:18:18,0,18 ./. 1/1:22,22:43:3:31,3,0 ./. ./. 1/1:51,24:73:6:57,6,0 1/1:52,25:74:12:101,12,0 ./. ./. ./. 1/1:63,12:72:6:47,6,0 ./. ./. ./. ./. ./. ./. ./. ./. 1/1:40,56:93:9:78,9,0 1/1:63,14:74:6:47,6,0 1/1:79,11:87:3:24,3,0 ./.