use of org.broadinstitute.hellbender.tools.exome.TargetCollection in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method buildAndAnnotateTruthOverlappingGenotype.
private Genotype buildAndAnnotateTruthOverlappingGenotype(final String sample, final TargetCollection<Target> targets, final Genotype truthGenotype, final int truthCopyNumber, final CopyNumberTriStateAllele truthAllele, final List<Pair<VariantContext, Genotype>> calls) {
final Set<CopyNumberTriStateAllele> calledAlleles = calls.stream().map(pair -> CopyNumberTriStateAllele.valueOf(pair.getRight().getAllele(0))).collect(Collectors.toSet());
final Allele calledAllele = calledAlleles.size() == 1 ? calledAlleles.iterator().next() : Allele.NO_CALL;
final GenotypeBuilder builder = new GenotypeBuilder(sample);
// Set the call allele.
builder.alleles(Collections.singletonList(calledAllele));
// Set the truth allele.
builder.attribute(VariantEvaluationContext.TRUTH_GENOTYPE_KEY, CopyNumberTriStateAllele.ALL_ALLELES.indexOf(truthAllele));
// Annotate the genotype with the number of calls.
builder.attribute(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, calls.size());
// When there is more than one qualified type of event we indicate how many.
builder.attribute(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, CopyNumberTriStateAllele.ALL_ALLELES.stream().mapToInt(allele -> (int) calls.stream().filter(pair -> pair.getRight().getAllele(0).equals(allele, true)).count()).toArray());
// Calculate the length in targets of the call as the sum across all calls.
builder.attribute(VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, calls.stream().mapToInt(pair -> getTargetCount(targets, pair.getLeft(), pair.getRight())).sum());
// Calculate call quality-- if there is more than one overlapping call we take the maximum qual one.
builder.attribute(VariantEvaluationContext.CALL_QUALITY_KEY, calls.stream().mapToDouble(pair -> GATKProtectedVariantContextUtils.calculateGenotypeQualityFromPLs(pair.getRight())).max().orElse(0.0));
// Calculate the truth copy fraction.
builder.attribute(VariantEvaluationContext.TRUTH_COPY_FRACTION_KEY, truthGenotype.getExtendedAttribute(GS_COPY_NUMBER_FRACTION_KEY));
// Calculate the truth call quality.
final double truthQuality = calculateTruthQuality(truthGenotype, truthCopyNumber);
builder.attribute(VariantEvaluationContext.TRUTH_QUALITY_KEY, truthQuality);
// Set genotype filters:
final boolean truthPassQualityMinimum = truthQuality >= filterArguments.minimumTruthSegmentQuality;
builder.filter(truthPassQualityMinimum ? EvaluationFilter.PASS : EvaluationFilter.LowQuality.acronym);
// Calculate the evaluation class (TP, FN, etc.). Only if there is actually either a truth or a call that is not ref.
if (calledAlleles.contains(CopyNumberTriStateAllele.DEL) || calledAlleles.contains(CopyNumberTriStateAllele.DUP) || truthAllele != CopyNumberTriStateAllele.REF) {
final EvaluationClass evaluationClass;
if (calledAlleles.isEmpty() || (calledAlleles.size() == 1 && calledAlleles.contains(CopyNumberTriStateAllele.REF))) {
evaluationClass = EvaluationClass.FALSE_NEGATIVE;
} else if (calledAlleles.size() == 1) {
evaluationClass = calledAlleles.contains(truthAllele) ? EvaluationClass.TRUE_POSITIVE : truthAllele == CopyNumberTriStateAllele.REF ? EvaluationClass.FALSE_POSITIVE : /* else */
EvaluationClass.DISCORDANT_POSITIVE;
} else {
evaluationClass = truthAllele == CopyNumberTriStateAllele.REF ? EvaluationClass.FALSE_POSITIVE : EvaluationClass.MIXED_POSITIVE;
}
builder.attribute(VariantEvaluationContext.EVALUATION_CLASS_KEY, evaluationClass.acronym);
}
return builder.make();
}
use of org.broadinstitute.hellbender.tools.exome.TargetCollection in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method buildAndAnnotateTruthOverlappingGenotype.
private Genotype buildAndAnnotateTruthOverlappingGenotype(final String sample, final VariantContext truth, final List<VariantContext> calls, final TargetCollection<Target> targets) {
final Genotype truthGenotype = truth.getGenotype(sample);
// if there is no truth genotype for that sample, we output the "empty" genotype.
if (truthGenotype == null) {
return GenotypeBuilder.create(sample, Collections.emptyList());
}
final int truthCopyNumber = GATKProtectedVariantContextUtils.getAttributeAsInt(truthGenotype, GS_COPY_NUMBER_FORMAT_KEY, truthNeutralCopyNumber);
final CopyNumberTriStateAllele truthAllele = copyNumberToTrueAllele(truthCopyNumber);
final List<Pair<VariantContext, Genotype>> allCalls = calls.stream().map(vc -> new ImmutablePair<>(vc, vc.getGenotype(sample))).filter(pair -> pair.getRight() != null).filter(pair -> GATKProtectedVariantContextUtils.getAttributeAsString(pair.getRight(), XHMMSegmentGenotyper.DISCOVERY_KEY, XHMMSegmentGenotyper.DISCOVERY_FALSE).equals(XHMMSegmentGenotyper.DISCOVERY_TRUE)).collect(Collectors.toList());
final List<Pair<VariantContext, Genotype>> qualifiedCalls = composeQualifyingCallsList(targets, allCalls);
return buildAndAnnotateTruthOverlappingGenotype(sample, targets, truthGenotype, truthCopyNumber, truthAllele, qualifiedCalls);
}
use of org.broadinstitute.hellbender.tools.exome.TargetCollection in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCallsIntegrationTest method checkOutputTruthConcordance.
private void checkOutputTruthConcordance(final File truthFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) {
final List<VariantContext> truthVariants = readVCFFile(truthFile);
final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
final Set<String> outputSamples = outputVariants.get(0).getSampleNames();
final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);
for (final VariantContext truth : truthVariants) {
final List<Target> overlappingTargets = targets.targets(truth);
final List<VariantContext> overlappingOutput = outputVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(truth)).collect(Collectors.toList());
if (overlappingTargets.isEmpty()) {
Assert.assertTrue(overlappingOutput.isEmpty());
continue;
}
Assert.assertFalse(overlappingOutput.isEmpty());
Assert.assertEquals(overlappingOutput.stream().filter(vc -> new SimpleInterval(truth).equals(new SimpleInterval(vc))).count(), 1);
@SuppressWarnings("all") final Optional<VariantContext> prospectiveMatchingOutput = overlappingOutput.stream().filter(vc -> new SimpleInterval(truth).equals(new SimpleInterval(vc))).findFirst();
Assert.assertTrue(prospectiveMatchingOutput.isPresent());
final VariantContext matchingOutput = prospectiveMatchingOutput.get();
final int[] truthAC = calculateACFromTruth(truth);
final long truthAN = MathUtils.sum(truthAC);
final double[] truthAF = IntStream.of(Arrays.copyOfRange(truthAC, 1, truthAC.length)).mapToDouble(d -> d / (double) truthAN).toArray();
Assert.assertEquals(matchingOutput.getAttributeAsInt(VariantEvaluationContext.TRUTH_ALLELE_NUMBER_KEY, -1), truthAN);
assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.TRUTH_ALLELE_FREQUENCY_KEY, () -> new double[2], 0.0), truthAF, 0.001);
assertOutputVariantFilters(filteringOptions, overlappingTargets, matchingOutput, truthAF);
for (final String sample : outputSamples) {
final Genotype outputGenotype = matchingOutput.getGenotype(sample);
final Genotype truthGenotype = truth.getGenotype(sample);
final int truthCN = GATKProtectedVariantContextUtils.getAttributeAsInt(truthGenotype, ConvertGSVariantsToSegments.GS_COPY_NUMBER_FORMAT, -1);
final int truthGT = GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.TRUTH_GENOTYPE_KEY, -1);
final Object truthQualObject = outputGenotype.getAnyAttribute(VariantEvaluationContext.TRUTH_QUALITY_KEY);
Assert.assertNotNull(truthQualObject, "" + truthGenotype);
final double truthQual = Double.parseDouble(String.valueOf(truthQualObject));
if (truthQual < filteringOptions.minimumTruthSegmentQuality) {
Assert.assertEquals(outputGenotype.getFilters(), EvaluationFilter.LowQuality.acronym);
} else {
Assert.assertTrue(outputGenotype.getFilters() == null || outputGenotype.getFilters().equals(VCFConstants.PASSES_FILTERS_v4));
}
final double[] truthPosteriors = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(truthGenotype, ConvertGSVariantsToSegments.GS_COPY_NUMBER_POSTERIOR, () -> new double[0], Double.NEGATIVE_INFINITY);
final double truthDelPosterior = MathUtils.log10SumLog10(truthPosteriors, 0, EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT);
final double truthRefPosterior = truthPosteriors[EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT];
final double truthDupPosterior = truthPosteriors.length < EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT ? Double.NEGATIVE_INFINITY : MathUtils.log10SumLog10(truthPosteriors, EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT + 1, truthPosteriors.length);
final CopyNumberTriStateAllele truthAllele = truthGT == -1 ? null : CopyNumberTriStateAllele.ALL_ALLELES.get(truthGT);
if (truthCN < EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT) {
Assert.assertEquals(truthAllele, CopyNumberTriStateAllele.DEL);
Assert.assertEquals(truthQual * -.1, MathUtils.log10SumLog10(new double[] { truthRefPosterior, truthDupPosterior }), 0.01);
} else if (truthCN > EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT) {
Assert.assertEquals(truthAllele, CopyNumberTriStateAllele.DUP);
Assert.assertEquals(truthQual * -.1, MathUtils.log10SumLog10(new double[] { truthDelPosterior, truthRefPosterior }), 0.01, "" + truthGenotype + " " + outputGenotype);
} else {
Assert.assertEquals(truthAllele, CopyNumberTriStateAllele.REF);
Assert.assertEquals(truthQual * -.1, MathUtils.log10SumLog10(new double[] { truthDelPosterior, truthDupPosterior }), 0.01);
}
final double outputTruthFraction = GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.TRUTH_COPY_FRACTION_KEY, -1);
final double inputTruthFraction = GATKProtectedVariantContextUtils.getAttributeAsDouble(truthGenotype, ConvertGSVariantsToSegments.GS_COPY_NUMBER_FRACTION, -1);
Assert.assertEquals(outputTruthFraction, inputTruthFraction, 0.01);
}
}
}
use of org.broadinstitute.hellbender.tools.exome.TargetCollection in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method doWork.
@Override
protected Object doWork() {
final TargetCollection<Target> targets = targetArguments.readTargetCollection(false);
final VCFFileReader truthReader = openVCFReader(truthFile);
final VCFFileReader callsReader = openVCFReader(callsFile);
final GenotypeEvaluationRecordWriter caseWriter = openGenotypeEvaluationOutputWriter(caseDetailOutputFile);
if (samples.isEmpty()) {
samples = composeSetOfSamplesToEvaluate(callsReader);
}
final VariantContextWriter outputWriter = openVCFWriter(outputFile, samples);
final Map<String, EvaluationSampleSummaryRecord> sampleStats = samples.stream().collect(Collectors.toMap(s -> s, EvaluationSampleSummaryRecord::new));
final List<SimpleInterval> intervals = composeListOfProcessingIntervalsFromInputs(truthReader, callsReader);
for (final SimpleInterval interval : intervals) {
for (final VariantEvaluationContext vc : processInterval(truthReader, callsReader, interval, targets)) {
outputWriter.add(vc);
outputCases(caseWriter, vc, targets);
updateSampleStats(sampleStats, vc);
}
}
truthReader.close();
callsReader.close();
outputWriter.close();
closeCaseRecordWriter(caseWriter);
writeSampleSummaryFile(sampleSummaryOutputFile, sampleStats);
return "SUCCESS";
}
use of org.broadinstitute.hellbender.tools.exome.TargetCollection in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method processInterval.
/**
* Processes a cluster of truth and called variants that may overlap over a genome region.
* <p>
* It returns the list of {@link VariantEvaluationContext} instances to be output for the given interval. These
* are already sorted by coordinates.
* </p>
* @param truthReader reader to the truth variants.
* @param callsReader reader to the called variants.
* @param interval the interval to analyze.
* @return never {@code null}.
*/
private List<VariantEvaluationContext> processInterval(final VCFFileReader truthReader, final VCFFileReader callsReader, final SimpleInterval interval, final TargetCollection<Target> targets) {
final List<VariantContext> truthVariants = variantQueryToList(truthReader, interval);
final List<VariantContext> callsVariants = variantQueryToList(callsReader, interval);
final List<VariantEvaluationContext> evaluatedVariants = new ArrayList<>(truthVariants.size() + callsVariants.size());
for (final VariantContext truth : truthVariants) {
// skip truth that does not overlap a single target.
if (targets.targetCount(truth) == 0) {
continue;
}
final List<VariantContext> overlappingCalls = callsVariants.stream().filter(vc -> IntervalUtils.overlaps(truth, vc)).collect(Collectors.toList());
evaluatedVariants.add(composeTruthOverlappingVariantContext(truth, overlappingCalls, targets));
}
for (final VariantContext call : callsVariants) {
// skip call that does not overlap a single target (the user might want to call on a smaller set of targets)
if (targets.targetCount(call) == 0) {
continue;
}
final List<VariantContext> overlappingTruth = truthVariants.stream().filter(vc -> IntervalUtils.overlaps(call, vc)).collect(Collectors.toList());
if (overlappingTruth.isEmpty()) {
evaluatedVariants.add(composeNonTruthOverlappingVariantContext(call, targets));
}
}
return evaluatedVariants.stream().sorted(IntervalUtils.LEXICOGRAPHICAL_ORDER_COMPARATOR).map(this::applyVariantContextFilters).collect(Collectors.toList());
}
Aggregations