use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCallsIntegrationTest method checkOutputCallsWithoutOverlappingTruthConcordance.
private void checkOutputCallsWithoutOverlappingTruthConcordance(final File truthFile, final File callsFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) {
final List<VariantContext> truthVariants = readVCFFile(truthFile);
final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
final List<VariantContext> callsVariants = readVCFFile(callsFile);
final Set<String> outputSamples = outputVariants.get(0).getSampleNames();
final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);
for (final VariantContext call : callsVariants) {
final List<Target> overlappingTargets = targets.targets(call);
final List<VariantContext> overlappingOutput = outputVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(call)).collect(Collectors.toList());
final List<VariantContext> overlappingTruth = truthVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(call)).collect(Collectors.toList());
if (!overlappingTruth.isEmpty()) {
continue;
}
@SuppressWarnings("all") final Optional<VariantContext> matchingOutputOptional = overlappingOutput.stream().filter(vc -> new SimpleInterval(call).equals(new SimpleInterval(vc))).findAny();
final VariantContext matchingOutput = matchingOutputOptional.get();
final int[] sampleCallsCount = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
for (final String sample : outputSamples) {
final Genotype outputGenotype = matchingOutput.getGenotype(sample);
final Genotype callGenotype = call.getGenotype(sample);
final Allele expectedCall = callGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(callGenotype.getAllele(0)) : null;
final Allele actualCall = outputGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(outputGenotype.getAllele(0)) : null;
Assert.assertEquals(expectedCall, actualCall);
final boolean expectedDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
final boolean actualDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
Assert.assertEquals(actualDiscovered, expectedDiscovered);
final int[] expectedCounts = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
if (expectedCall.isCalled() && actualDiscovered) {
expectedCounts[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
}
if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY)) {
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsIntArray(outputGenotype, VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, () -> new int[CopyNumberTriStateAllele.ALL_ALLELES.size()], 0), expectedCounts);
}
if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY)) {
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, -1), expectedCall.isCalled() && actualDiscovered ? 1 : 0);
}
final String evalClass = GATKProtectedVariantContextUtils.getAttributeAsString(outputGenotype, VariantEvaluationContext.EVALUATION_CLASS_KEY, null);
Assert.assertEquals(evalClass, expectedCall.isCalled() && actualDiscovered && expectedCall.isNonReference() ? EvaluationClass.UNKNOWN_POSITIVE.acronym : null);
if (expectedCall.isCalled()) {
sampleCallsCount[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
}
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, 0.0), callGQ(callGenotype), 0.01);
}
final int expectedAN = (int) MathUtils.sum(sampleCallsCount);
final int observedAN = matchingOutput.getAttributeAsInt(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, -1);
Assert.assertEquals(observedAN, expectedAN);
final double[] expectedAF = Arrays.copyOfRange(IntStream.of(sampleCallsCount).mapToDouble(i -> expectedAN > 0 ? i / (double) expectedAN : 0.0).toArray(), 1, sampleCallsCount.length);
final double[] observedAF = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, () -> new double[matchingOutput.getAlternateAlleles().size()], 0.0);
Assert.assertNotNull(observedAF);
assertEquals(observedAF, expectedAF, 0.01);
Assert.assertEquals(matchingOutput.getAttributeAsInt(VariantEvaluationContext.TRUTH_ALLELE_NUMBER_KEY, -1), 0);
}
}
use of htsjdk.variant.variantcontext.Genotype in project gatk-protected by broadinstitute.
the class ReferenceConfidenceModelUnitTest method checkReferenceModelResult.
private void checkReferenceModelResult(final RefConfData data, final List<VariantContext> contexts, final List<Integer> expectedDPs, final List<VariantContext> calls) {
Assert.assertNotNull(contexts);
final SimpleInterval loc = data.getActiveRegion().getExtendedSpan();
final List<Boolean> seenBP = new ArrayList<>(Collections.nCopies(data.getActiveRegion().getSpan().size(), false));
for (int i = 0; i < loc.size(); i++) {
final GenomeLoc curPos = parser.createGenomeLoc(loc.getContig(), loc.getStart() + i);
final VariantContext call = model.getOverlappingVariantContext(curPos, calls);
final VariantContext refModel = model.getOverlappingVariantContext(curPos, contexts);
if (!data.getActiveRegion().getSpan().contains(curPos)) {
// part of the extended interval, but not the full interval
Assert.assertNull(refModel);
continue;
}
if (call != null) {
if (call.isVariant() && refModel.getType() == VariantContext.Type.SYMBOLIC) {
//Assert.assertEquals(refModel, call, "Should have found call " + call + " but found " + refModel + " instead");
// must be a deletion.
Assert.assertTrue(call.getReference().length() > 1);
// the deletion must not start at the same position
Assert.assertTrue(call.getStart() < refModel.getStart());
Assert.assertEquals(call.getReference().getBaseString().substring(refModel.getStart() - call.getStart(), refModel.getStart() - call.getStart() + 1), refModel.getReference().getBaseString(), // the reference must be the same.
"" + data.getRefHap());
// No confidence in the reference hom-ref call across the deletion
Assert.assertTrue(refModel.getGenotype(0).getGQ() <= 0);
// the reference and the lonelly <NON_REF>
Assert.assertEquals(refModel.getAlleles().size(), 2);
Assert.assertEquals(refModel.getAlleles().get(1), GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
} else {
Assert.assertEquals(refModel, call, "Should have found call " + call + " but found " + refModel + " instead");
}
} else {
final int expectedDP = expectedDPs.get(curPos.getStart() - data.getActiveRegion().getSpan().getStart());
Assert.assertEquals(refModel.getStart(), loc.getStart() + i);
Assert.assertEquals(refModel.getEnd(), loc.getStart() + i);
Assert.assertFalse(refModel.hasLog10PError());
Assert.assertEquals(refModel.getAlternateAlleles().size(), 1);
Assert.assertEquals(refModel.getAlternateAllele(0), GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
Assert.assertTrue(refModel.hasGenotype(sample));
final Genotype g = refModel.getGenotype(sample);
Assert.assertTrue(g.hasAD());
Assert.assertTrue(g.hasDP());
Assert.assertEquals(g.getDP(), expectedDP);
Assert.assertTrue(g.hasGQ());
Assert.assertTrue(g.hasPL());
}
final VariantContext vc = call == null ? refModel : call;
if (curPos.getStart() == vc.getStart()) {
for (int pos = vc.getStart(); pos <= vc.getEnd(); pos++) {
final int j = pos - data.getActiveRegion().getSpan().getStart();
Assert.assertFalse(seenBP.get(j));
seenBP.set(j, true);
}
}
}
for (int i = 0; i < seenBP.size(); i++) {
Assert.assertEquals((boolean) seenBP.get(i), true);
}
}
use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.
the class ConvertGSVariantsToSegments method apply.
@Override
public void apply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) {
final SimpleInterval interval = new SimpleInterval(variant);
final int targetCount = targets.indexRange(interval).size();
final int[] callCounts = new int[CopyNumberTriState.values().length];
for (final Genotype genotype : variant.getGenotypes().iterateInSampleNameOrder()) {
final String sample = genotype.getSampleName();
final double mean = doubleFrom(genotype.getExtendedAttribute(GS_COPY_NUMBER_FRACTION));
final int copyNumber = intFrom(genotype.getExtendedAttribute(GS_COPY_NUMBER_FORMAT));
final CopyNumberTriState call = copyNumber == neutralCopyNumber ? CopyNumberTriState.NEUTRAL : (copyNumber < neutralCopyNumber) ? CopyNumberTriState.DELETION : CopyNumberTriState.DUPLICATION;
callCounts[call.ordinal()]++;
final double[] probs = doubleArrayFrom(genotype.getExtendedAttribute(GS_COPY_NUMBER_POSTERIOR));
final double log10PostQualCall = calculateLog10CallQuality(probs, call);
final double log10PostQualNonRef = calculateLog10CallQualityNonRef(probs);
final double phredProbCall = -10.0 * log10PostQualCall;
final double phredProbNonRef = -10.0 * log10PostQualNonRef;
final HiddenStateSegment<CopyNumberTriState, Target> segment = new HiddenStateSegment<>(interval, targetCount, mean, // GS VCF does not contain any stddev or var estimate for coverage fraction.
0.0, call, // GS does not provide an EQ, we approximate it to be the 1 - sum of all call compatible CN corresponding posterior probs
phredProbCall, // GS does not provide a SQ, we leave is a NaN.
Double.NaN, // GS does not provide a START Q.
Double.NaN, // GS does not provide a END Q.
Double.NaN, phredProbNonRef);
final HiddenStateSegmentRecord<CopyNumberTriState, Target> record = new HiddenStateSegmentRecord<>(sample, segment);
try {
outputWriter.writeRecord(record);
} catch (final IOException ex) {
throw new UserException.CouldNotCreateOutputFile(outputFile, ex);
}
}
}
use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method buildAndAnnotateTruthOverlappingGenotype.
private Genotype buildAndAnnotateTruthOverlappingGenotype(final String sample, final TargetCollection<Target> targets, final Genotype truthGenotype, final int truthCopyNumber, final CopyNumberTriStateAllele truthAllele, final List<Pair<VariantContext, Genotype>> calls) {
final Set<CopyNumberTriStateAllele> calledAlleles = calls.stream().map(pair -> CopyNumberTriStateAllele.valueOf(pair.getRight().getAllele(0))).collect(Collectors.toSet());
final Allele calledAllele = calledAlleles.size() == 1 ? calledAlleles.iterator().next() : Allele.NO_CALL;
final GenotypeBuilder builder = new GenotypeBuilder(sample);
// Set the call allele.
builder.alleles(Collections.singletonList(calledAllele));
// Set the truth allele.
builder.attribute(VariantEvaluationContext.TRUTH_GENOTYPE_KEY, CopyNumberTriStateAllele.ALL_ALLELES.indexOf(truthAllele));
// Annotate the genotype with the number of calls.
builder.attribute(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, calls.size());
// When there is more than one qualified type of event we indicate how many.
builder.attribute(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, CopyNumberTriStateAllele.ALL_ALLELES.stream().mapToInt(allele -> (int) calls.stream().filter(pair -> pair.getRight().getAllele(0).equals(allele, true)).count()).toArray());
// Calculate the length in targets of the call as the sum across all calls.
builder.attribute(VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, calls.stream().mapToInt(pair -> getTargetCount(targets, pair.getLeft(), pair.getRight())).sum());
// Calculate call quality-- if there is more than one overlapping call we take the maximum qual one.
builder.attribute(VariantEvaluationContext.CALL_QUALITY_KEY, calls.stream().mapToDouble(pair -> GATKProtectedVariantContextUtils.calculateGenotypeQualityFromPLs(pair.getRight())).max().orElse(0.0));
// Calculate the truth copy fraction.
builder.attribute(VariantEvaluationContext.TRUTH_COPY_FRACTION_KEY, truthGenotype.getExtendedAttribute(GS_COPY_NUMBER_FRACTION_KEY));
// Calculate the truth call quality.
final double truthQuality = calculateTruthQuality(truthGenotype, truthCopyNumber);
builder.attribute(VariantEvaluationContext.TRUTH_QUALITY_KEY, truthQuality);
// Set genotype filters:
final boolean truthPassQualityMinimum = truthQuality >= filterArguments.minimumTruthSegmentQuality;
builder.filter(truthPassQualityMinimum ? EvaluationFilter.PASS : EvaluationFilter.LowQuality.acronym);
// Calculate the evaluation class (TP, FN, etc.). Only if there is actually either a truth or a call that is not ref.
if (calledAlleles.contains(CopyNumberTriStateAllele.DEL) || calledAlleles.contains(CopyNumberTriStateAllele.DUP) || truthAllele != CopyNumberTriStateAllele.REF) {
final EvaluationClass evaluationClass;
if (calledAlleles.isEmpty() || (calledAlleles.size() == 1 && calledAlleles.contains(CopyNumberTriStateAllele.REF))) {
evaluationClass = EvaluationClass.FALSE_NEGATIVE;
} else if (calledAlleles.size() == 1) {
evaluationClass = calledAlleles.contains(truthAllele) ? EvaluationClass.TRUE_POSITIVE : truthAllele == CopyNumberTriStateAllele.REF ? EvaluationClass.FALSE_POSITIVE : /* else */
EvaluationClass.DISCORDANT_POSITIVE;
} else {
evaluationClass = truthAllele == CopyNumberTriStateAllele.REF ? EvaluationClass.FALSE_POSITIVE : EvaluationClass.MIXED_POSITIVE;
}
builder.attribute(VariantEvaluationContext.EVALUATION_CLASS_KEY, evaluationClass.acronym);
}
return builder.make();
}
use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method buildAndAnnotateTruthOverlappingGenotype.
private Genotype buildAndAnnotateTruthOverlappingGenotype(final String sample, final VariantContext truth, final List<VariantContext> calls, final TargetCollection<Target> targets) {
final Genotype truthGenotype = truth.getGenotype(sample);
// if there is no truth genotype for that sample, we output the "empty" genotype.
if (truthGenotype == null) {
return GenotypeBuilder.create(sample, Collections.emptyList());
}
final int truthCopyNumber = GATKProtectedVariantContextUtils.getAttributeAsInt(truthGenotype, GS_COPY_NUMBER_FORMAT_KEY, truthNeutralCopyNumber);
final CopyNumberTriStateAllele truthAllele = copyNumberToTrueAllele(truthCopyNumber);
final List<Pair<VariantContext, Genotype>> allCalls = calls.stream().map(vc -> new ImmutablePair<>(vc, vc.getGenotype(sample))).filter(pair -> pair.getRight() != null).filter(pair -> GATKProtectedVariantContextUtils.getAttributeAsString(pair.getRight(), XHMMSegmentGenotyper.DISCOVERY_KEY, XHMMSegmentGenotyper.DISCOVERY_FALSE).equals(XHMMSegmentGenotyper.DISCOVERY_TRUE)).collect(Collectors.toList());
final List<Pair<VariantContext, Genotype>> qualifiedCalls = composeQualifyingCallsList(targets, allCalls);
return buildAndAnnotateTruthOverlappingGenotype(sample, targets, truthGenotype, truthCopyNumber, truthAllele, qualifiedCalls);
}
Aggregations