use of htsjdk.variant.variantcontext.Genotype in project gatk-protected by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method buildAndAnnotateTruthOverlappingGenotype.
private Genotype buildAndAnnotateTruthOverlappingGenotype(final String sample, final VariantContext truth, final List<VariantContext> calls, final TargetCollection<Target> targets) {
final Genotype truthGenotype = truth.getGenotype(sample);
// if there is no truth genotype for that sample, we output the "empty" genotype.
if (truthGenotype == null) {
return GenotypeBuilder.create(sample, Collections.emptyList());
}
final int truthCopyNumber = GATKProtectedVariantContextUtils.getAttributeAsInt(truthGenotype, GS_COPY_NUMBER_FORMAT_KEY, truthNeutralCopyNumber);
final CopyNumberTriStateAllele truthAllele = copyNumberToTrueAllele(truthCopyNumber);
final List<Pair<VariantContext, Genotype>> allCalls = calls.stream().map(vc -> new ImmutablePair<>(vc, vc.getGenotype(sample))).filter(pair -> pair.getRight() != null).filter(pair -> GATKProtectedVariantContextUtils.getAttributeAsString(pair.getRight(), XHMMSegmentGenotyper.DISCOVERY_KEY, XHMMSegmentGenotyper.DISCOVERY_FALSE).equals(XHMMSegmentGenotyper.DISCOVERY_TRUE)).collect(Collectors.toList());
final List<Pair<VariantContext, Genotype>> qualifiedCalls = composeQualifyingCallsList(targets, allCalls);
return buildAndAnnotateTruthOverlappingGenotype(sample, targets, truthGenotype, truthCopyNumber, truthAllele, qualifiedCalls);
}
use of htsjdk.variant.variantcontext.Genotype in project gatk-protected by broadinstitute.
the class ConvertGSVariantsToSegments method apply.
@Override
public void apply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) {
final SimpleInterval interval = new SimpleInterval(variant);
final int targetCount = targets.indexRange(interval).size();
final int[] callCounts = new int[CopyNumberTriState.values().length];
for (final Genotype genotype : variant.getGenotypes().iterateInSampleNameOrder()) {
final String sample = genotype.getSampleName();
final double mean = doubleFrom(genotype.getExtendedAttribute(GS_COPY_NUMBER_FRACTION));
final int copyNumber = intFrom(genotype.getExtendedAttribute(GS_COPY_NUMBER_FORMAT));
final CopyNumberTriState call = copyNumber == neutralCopyNumber ? CopyNumberTriState.NEUTRAL : (copyNumber < neutralCopyNumber) ? CopyNumberTriState.DELETION : CopyNumberTriState.DUPLICATION;
callCounts[call.ordinal()]++;
final double[] probs = doubleArrayFrom(genotype.getExtendedAttribute(GS_COPY_NUMBER_POSTERIOR));
final double log10PostQualCall = calculateLog10CallQuality(probs, call);
final double log10PostQualNonRef = calculateLog10CallQualityNonRef(probs);
final double phredProbCall = -10.0 * log10PostQualCall;
final double phredProbNonRef = -10.0 * log10PostQualNonRef;
final HiddenStateSegment<CopyNumberTriState, Target> segment = new HiddenStateSegment<>(interval, targetCount, mean, // GS VCF does not contain any stddev or var estimate for coverage fraction.
0.0, call, // GS does not provide an EQ, we approximate it to be the 1 - sum of all call compatible CN corresponding posterior probs
phredProbCall, // GS does not provide a SQ, we leave is a NaN.
Double.NaN, // GS does not provide a START Q.
Double.NaN, // GS does not provide a END Q.
Double.NaN, phredProbNonRef);
final HiddenStateSegmentRecord<CopyNumberTriState, Target> record = new HiddenStateSegmentRecord<>(sample, segment);
try {
outputWriter.writeRecord(record);
} catch (final IOException ex) {
throw new UserException.CouldNotCreateOutputFile(outputFile, ex);
}
}
}
use of htsjdk.variant.variantcontext.Genotype in project gatk-protected by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method outputCases.
private void outputCases(final GenotypeEvaluationRecordWriter caseWriter, final VariantEvaluationContext vc, final TargetCollection<Target> targets) {
if (caseWriter == null) {
return;
}
for (final Genotype g : vc.getGenotypes()) {
final EvaluationClass evalClass = GATKProtectedVariantContextUtils.getAttributeAsObject(g, VariantEvaluationContext.EVALUATION_CLASS_KEY, EvaluationClass::parseString, null);
if (evalClass == null) {
continue;
}
final String sample = g.getSampleName();
final SimpleInterval interval = new SimpleInterval(vc);
final int targetCount = getTargetCount(targets, interval, g);
final Set<String> variantFilters = vc.getFilters();
final Genotype genotype = vc.getGenotype(sample);
final Set<String> genotypeFilterArray = genotype.getFilters() == null ? Collections.emptySet() : Stream.of(genotype.getFilters().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)).collect(Collectors.toSet());
final Set<String> allFilterStrings = new LinkedHashSet<>();
allFilterStrings.addAll(variantFilters);
allFilterStrings.addAll(genotypeFilterArray);
final Set<EvaluationFilter> allFilters = allFilterStrings.stream().map(EvaluationFilter::fromString).filter(Objects::nonNull).collect(Collectors.toSet());
final GenotypeEvaluationRecord record = new GenotypeEvaluationRecord(sample, interval, targetCount, evalClass, allFilters, vc);
try {
caseWriter.writeRecord(record);
} catch (final IOException ex) {
throw new UserException.CouldNotCreateOutputFile(caseDetailOutputFile, ex);
}
}
}
use of htsjdk.variant.variantcontext.Genotype in project gatk-protected by broadinstitute.
the class EvaluateCopyNumberTriStateCallsIntegrationTest method checkOutputTruthConcordance.
private void checkOutputTruthConcordance(final File truthFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) {
final List<VariantContext> truthVariants = readVCFFile(truthFile);
final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
final Set<String> outputSamples = outputVariants.get(0).getSampleNames();
final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);
for (final VariantContext truth : truthVariants) {
final List<Target> overlappingTargets = targets.targets(truth);
final List<VariantContext> overlappingOutput = outputVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(truth)).collect(Collectors.toList());
if (overlappingTargets.isEmpty()) {
Assert.assertTrue(overlappingOutput.isEmpty());
continue;
}
Assert.assertFalse(overlappingOutput.isEmpty());
Assert.assertEquals(overlappingOutput.stream().filter(vc -> new SimpleInterval(truth).equals(new SimpleInterval(vc))).count(), 1);
@SuppressWarnings("all") final Optional<VariantContext> prospectiveMatchingOutput = overlappingOutput.stream().filter(vc -> new SimpleInterval(truth).equals(new SimpleInterval(vc))).findFirst();
Assert.assertTrue(prospectiveMatchingOutput.isPresent());
final VariantContext matchingOutput = prospectiveMatchingOutput.get();
final int[] truthAC = calculateACFromTruth(truth);
final long truthAN = MathUtils.sum(truthAC);
final double[] truthAF = IntStream.of(Arrays.copyOfRange(truthAC, 1, truthAC.length)).mapToDouble(d -> d / (double) truthAN).toArray();
Assert.assertEquals(matchingOutput.getAttributeAsInt(VariantEvaluationContext.TRUTH_ALLELE_NUMBER_KEY, -1), truthAN);
assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.TRUTH_ALLELE_FREQUENCY_KEY, () -> new double[2], 0.0), truthAF, 0.001);
assertOutputVariantFilters(filteringOptions, overlappingTargets, matchingOutput, truthAF);
for (final String sample : outputSamples) {
final Genotype outputGenotype = matchingOutput.getGenotype(sample);
final Genotype truthGenotype = truth.getGenotype(sample);
final int truthCN = GATKProtectedVariantContextUtils.getAttributeAsInt(truthGenotype, ConvertGSVariantsToSegments.GS_COPY_NUMBER_FORMAT, -1);
final int truthGT = GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.TRUTH_GENOTYPE_KEY, -1);
final Object truthQualObject = outputGenotype.getAnyAttribute(VariantEvaluationContext.TRUTH_QUALITY_KEY);
Assert.assertNotNull(truthQualObject, "" + truthGenotype);
final double truthQual = Double.parseDouble(String.valueOf(truthQualObject));
if (truthQual < filteringOptions.minimumTruthSegmentQuality) {
Assert.assertEquals(outputGenotype.getFilters(), EvaluationFilter.LowQuality.acronym);
} else {
Assert.assertTrue(outputGenotype.getFilters() == null || outputGenotype.getFilters().equals(VCFConstants.PASSES_FILTERS_v4));
}
final double[] truthPosteriors = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(truthGenotype, ConvertGSVariantsToSegments.GS_COPY_NUMBER_POSTERIOR, () -> new double[0], Double.NEGATIVE_INFINITY);
final double truthDelPosterior = MathUtils.log10SumLog10(truthPosteriors, 0, EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT);
final double truthRefPosterior = truthPosteriors[EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT];
final double truthDupPosterior = truthPosteriors.length < EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT ? Double.NEGATIVE_INFINITY : MathUtils.log10SumLog10(truthPosteriors, EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT + 1, truthPosteriors.length);
final CopyNumberTriStateAllele truthAllele = truthGT == -1 ? null : CopyNumberTriStateAllele.ALL_ALLELES.get(truthGT);
if (truthCN < EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT) {
Assert.assertEquals(truthAllele, CopyNumberTriStateAllele.DEL);
Assert.assertEquals(truthQual * -.1, MathUtils.log10SumLog10(new double[] { truthRefPosterior, truthDupPosterior }), 0.01);
} else if (truthCN > EvaluateCopyNumberTriStateCalls.REFERENCE_COPY_NUMBER_DEFAULT) {
Assert.assertEquals(truthAllele, CopyNumberTriStateAllele.DUP);
Assert.assertEquals(truthQual * -.1, MathUtils.log10SumLog10(new double[] { truthDelPosterior, truthRefPosterior }), 0.01, "" + truthGenotype + " " + outputGenotype);
} else {
Assert.assertEquals(truthAllele, CopyNumberTriStateAllele.REF);
Assert.assertEquals(truthQual * -.1, MathUtils.log10SumLog10(new double[] { truthDelPosterior, truthDupPosterior }), 0.01);
}
final double outputTruthFraction = GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.TRUTH_COPY_FRACTION_KEY, -1);
final double inputTruthFraction = GATKProtectedVariantContextUtils.getAttributeAsDouble(truthGenotype, ConvertGSVariantsToSegments.GS_COPY_NUMBER_FRACTION, -1);
Assert.assertEquals(outputTruthFraction, inputTruthFraction, 0.01);
}
}
}
use of htsjdk.variant.variantcontext.Genotype in project gatk-protected by broadinstitute.
the class EvaluateCopyNumberTriStateCallsIntegrationTest method checkOutputCallsWithOverlappingTruthConcordance.
private void checkOutputCallsWithOverlappingTruthConcordance(final File truthFile, final File callsFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) {
final List<VariantContext> truthVariants = readVCFFile(truthFile);
final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
final List<VariantContext> callsVariants = readVCFFile(callsFile);
final Set<String> outputSamples = outputVariants.get(0).getSampleNames();
final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);
for (final VariantContext truth : truthVariants) {
final List<Target> overlappingTargets = targets.targets(truth);
final List<VariantContext> overlappingOutput = outputVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(truth)).collect(Collectors.toList());
final List<VariantContext> overlappingCalls = callsVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(truth)).collect(Collectors.toList());
if (overlappingTargets.isEmpty()) {
Assert.assertTrue(overlappingOutput.isEmpty());
continue;
}
@SuppressWarnings("all") final VariantContext matchingOutput = overlappingOutput.stream().filter(vc -> new SimpleInterval(truth).equals(new SimpleInterval(vc))).findAny().get();
final int[] sampleCallsCount = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
for (final String sample : outputSamples) {
final Genotype outputGenotype = matchingOutput.getGenotype(sample);
final List<Pair<VariantContext, Genotype>> sampleCalls = overlappingCalls.stream().map(vc -> new ImmutablePair<>(vc, vc.getGenotype(sample))).filter(p -> XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(p.getRight().getExtendedAttribute(XHMMSegmentGenotyper.DISCOVERY_KEY))).filter(p -> callPassFilters(p.getLeft(), p.getRight(), targets, filteringOptions)).collect(Collectors.toList());
final int[] expectedCounts = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
sampleCalls.forEach(p -> {
expectedCounts[CopyNumberTriStateAllele.valueOf(p.getRight().getAllele(0)).index()]++;
});
final int[] actualCounts = GATKProtectedVariantContextUtils.getAttributeAsIntArray(outputGenotype, VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, () -> new int[CopyNumberTriStateAllele.ALL_ALLELES.size()], 0);
Assert.assertEquals(actualCounts, expectedCounts, Arrays.toString(actualCounts) + " " + Arrays.toString(expectedCounts));
final int expectedTotalCount = (int) MathUtils.sum(expectedCounts);
final int actualTotalCount = GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, -1);
Assert.assertEquals(actualTotalCount, expectedTotalCount);
final int expectedTargetCount = sampleCalls.stream().mapToInt(p -> targets.targetCount(p.getLeft())).sum();
final int observedTargetCount = GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, -1);
Assert.assertEquals(observedTargetCount, expectedTargetCount);
final Allele truthCallAllele = outputTruthAllele(outputGenotype);
final boolean isMixed = IntStream.of(actualCounts).filter(i -> i > 0).count() > 1;
final String evalClass = GATKProtectedVariantContextUtils.getAttributeAsString(outputGenotype, VariantEvaluationContext.EVALUATION_CLASS_KEY, null);
if (sampleCalls.size() > 0 && !isMixed) {
final Pair<VariantContext, Genotype> bestCall = sampleCalls.stream().sorted((p1, p2) -> -Double.compare(callGQ(p1.getRight()), callGQ(p2.getRight()))).findFirst().get();
final CopyNumberTriStateAllele expectedCall = CopyNumberTriStateAllele.valueOf(bestCall.getRight().getAllele(0));
final CopyNumberTriStateAllele actualCall = CopyNumberTriStateAllele.valueOf(outputGenotype.getAllele(0));
Assert.assertEquals(actualCall, expectedCall);
sampleCallsCount[expectedCall.index()]++;
if (!truthCallAllele.isReference()) {
if (truthCallAllele.equals(actualCall)) {
Assert.assertEquals(evalClass, EvaluationClass.TRUE_POSITIVE.acronym);
} else if (!truthCallAllele.isNoCall()) {
Assert.assertEquals(evalClass, EvaluationClass.DISCORDANT_POSITIVE.acronym);
} else {
Assert.assertNull(evalClass);
}
} else if (truthCallAllele.isReference()) {
Assert.assertEquals(evalClass, EvaluationClass.FALSE_POSITIVE.acronym);
}
} else {
Assert.assertEquals(Allele.NO_CALL, outputGenotype.getAllele(0));
if (sampleCalls.isEmpty()) {
Assert.assertEquals(evalClass, !truthCallAllele.isReference() && truthCallAllele.isCalled() ? EvaluationClass.FALSE_NEGATIVE.acronym : null);
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, -1), 0.0);
} else {
Assert.assertEquals(evalClass, !truthCallAllele.isReference() && truthCallAllele.isCalled() ? EvaluationClass.MIXED_POSITIVE.acronym : EvaluationClass.FALSE_POSITIVE.acronym);
final Pair<VariantContext, Genotype> bestCall = sampleCalls.stream().sorted((p1, p2) -> -Double.compare(callGQ(p1.getRight()), callGQ(p2.getRight()))).findFirst().get();
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, -1), callGQ(bestCall.getRight()));
}
}
}
final int expectedAN = (int) MathUtils.sum(sampleCallsCount);
final int observedAN = matchingOutput.getAttributeAsInt(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, -1);
Assert.assertEquals(observedAN, expectedAN);
final double[] expectedAF = Arrays.copyOfRange(IntStream.of(sampleCallsCount).mapToDouble(i -> expectedAN > 0 ? i / (double) expectedAN : 0.0).toArray(), 1, sampleCallsCount.length);
final double[] observedAF = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, () -> new double[matchingOutput.getAlternateAlleles().size()], 0.0);
Assert.assertNotNull(observedAF);
assertEquals(observedAF, expectedAF, 0.01);
}
}
Aggregations