use of htsjdk.variant.variantcontext.VariantContext in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCallsIntegrationTest method checkOutputCallsWithoutOverlappingTruthConcordance.
private void checkOutputCallsWithoutOverlappingTruthConcordance(final File truthFile, final File callsFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) {
final List<VariantContext> truthVariants = readVCFFile(truthFile);
final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
final List<VariantContext> callsVariants = readVCFFile(callsFile);
final Set<String> outputSamples = outputVariants.get(0).getSampleNames();
final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);
for (final VariantContext call : callsVariants) {
final List<Target> overlappingTargets = targets.targets(call);
final List<VariantContext> overlappingOutput = outputVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(call)).collect(Collectors.toList());
final List<VariantContext> overlappingTruth = truthVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(call)).collect(Collectors.toList());
if (!overlappingTruth.isEmpty()) {
continue;
}
@SuppressWarnings("all") final Optional<VariantContext> matchingOutputOptional = overlappingOutput.stream().filter(vc -> new SimpleInterval(call).equals(new SimpleInterval(vc))).findAny();
final VariantContext matchingOutput = matchingOutputOptional.get();
final int[] sampleCallsCount = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
for (final String sample : outputSamples) {
final Genotype outputGenotype = matchingOutput.getGenotype(sample);
final Genotype callGenotype = call.getGenotype(sample);
final Allele expectedCall = callGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(callGenotype.getAllele(0)) : null;
final Allele actualCall = outputGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(outputGenotype.getAllele(0)) : null;
Assert.assertEquals(expectedCall, actualCall);
final boolean expectedDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
final boolean actualDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
Assert.assertEquals(actualDiscovered, expectedDiscovered);
final int[] expectedCounts = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
if (expectedCall.isCalled() && actualDiscovered) {
expectedCounts[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
}
if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY)) {
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsIntArray(outputGenotype, VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, () -> new int[CopyNumberTriStateAllele.ALL_ALLELES.size()], 0), expectedCounts);
}
if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY)) {
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, -1), expectedCall.isCalled() && actualDiscovered ? 1 : 0);
}
final String evalClass = GATKProtectedVariantContextUtils.getAttributeAsString(outputGenotype, VariantEvaluationContext.EVALUATION_CLASS_KEY, null);
Assert.assertEquals(evalClass, expectedCall.isCalled() && actualDiscovered && expectedCall.isNonReference() ? EvaluationClass.UNKNOWN_POSITIVE.acronym : null);
if (expectedCall.isCalled()) {
sampleCallsCount[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
}
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, 0.0), callGQ(callGenotype), 0.01);
}
final int expectedAN = (int) MathUtils.sum(sampleCallsCount);
final int observedAN = matchingOutput.getAttributeAsInt(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, -1);
Assert.assertEquals(observedAN, expectedAN);
final double[] expectedAF = Arrays.copyOfRange(IntStream.of(sampleCallsCount).mapToDouble(i -> expectedAN > 0 ? i / (double) expectedAN : 0.0).toArray(), 1, sampleCallsCount.length);
final double[] observedAF = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, () -> new double[matchingOutput.getAlternateAlleles().size()], 0.0);
Assert.assertNotNull(observedAF);
assertEquals(observedAF, expectedAF, 0.01);
Assert.assertEquals(matchingOutput.getAttributeAsInt(VariantEvaluationContext.TRUTH_ALLELE_NUMBER_KEY, -1), 0);
}
}
use of htsjdk.variant.variantcontext.VariantContext in project gatk-protected by broadinstitute.
the class ReferenceConfidenceModelUnitTest method testRefConfidencePartialReads.
@Test
public void testRefConfidencePartialReads() {
final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples, 2);
final IndependentSampleGenotypesModel genotypingModel = new IndependentSampleGenotypesModel();
final String ref = "ACGTAACCGGTT";
for (int readLen = 3; readLen < ref.length(); readLen++) {
for (int start = 0; start < ref.length() - readLen; start++) {
final RefConfData data = new RefConfData(ref, 0);
final List<Haplotype> haplotypes = Arrays.asList(data.getRefHap());
final List<VariantContext> calls = Collections.emptyList();
data.getActiveRegion().add(data.makeRead(start, readLen));
final ReadLikelihoods<Haplotype> likelihoods = createDummyStratifiedReadMap(data.getRefHap(), samples, data.getActiveRegion());
final List<Integer> expectedDPs = new ArrayList<>(Collections.nCopies(data.getActiveRegion().getSpan().size(), 0));
for (int i = start; i < readLen + start; i++) expectedDPs.set(i, 1);
final List<VariantContext> contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, ploidyModel, calls);
checkReferenceModelResult(data, contexts, expectedDPs, calls);
}
}
}
use of htsjdk.variant.variantcontext.VariantContext in project gatk-protected by broadinstitute.
the class ReferenceConfidenceModelUnitTest method checkReferenceModelResult.
private void checkReferenceModelResult(final RefConfData data, final List<VariantContext> contexts, final List<Integer> expectedDPs, final List<VariantContext> calls) {
Assert.assertNotNull(contexts);
final SimpleInterval loc = data.getActiveRegion().getExtendedSpan();
final List<Boolean> seenBP = new ArrayList<>(Collections.nCopies(data.getActiveRegion().getSpan().size(), false));
for (int i = 0; i < loc.size(); i++) {
final GenomeLoc curPos = parser.createGenomeLoc(loc.getContig(), loc.getStart() + i);
final VariantContext call = model.getOverlappingVariantContext(curPos, calls);
final VariantContext refModel = model.getOverlappingVariantContext(curPos, contexts);
if (!data.getActiveRegion().getSpan().contains(curPos)) {
// part of the extended interval, but not the full interval
Assert.assertNull(refModel);
continue;
}
if (call != null) {
if (call.isVariant() && refModel.getType() == VariantContext.Type.SYMBOLIC) {
//Assert.assertEquals(refModel, call, "Should have found call " + call + " but found " + refModel + " instead");
// must be a deletion.
Assert.assertTrue(call.getReference().length() > 1);
// the deletion must not start at the same position
Assert.assertTrue(call.getStart() < refModel.getStart());
Assert.assertEquals(call.getReference().getBaseString().substring(refModel.getStart() - call.getStart(), refModel.getStart() - call.getStart() + 1), refModel.getReference().getBaseString(), // the reference must be the same.
"" + data.getRefHap());
// No confidence in the reference hom-ref call across the deletion
Assert.assertTrue(refModel.getGenotype(0).getGQ() <= 0);
// the reference and the lonelly <NON_REF>
Assert.assertEquals(refModel.getAlleles().size(), 2);
Assert.assertEquals(refModel.getAlleles().get(1), GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
} else {
Assert.assertEquals(refModel, call, "Should have found call " + call + " but found " + refModel + " instead");
}
} else {
final int expectedDP = expectedDPs.get(curPos.getStart() - data.getActiveRegion().getSpan().getStart());
Assert.assertEquals(refModel.getStart(), loc.getStart() + i);
Assert.assertEquals(refModel.getEnd(), loc.getStart() + i);
Assert.assertFalse(refModel.hasLog10PError());
Assert.assertEquals(refModel.getAlternateAlleles().size(), 1);
Assert.assertEquals(refModel.getAlternateAllele(0), GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
Assert.assertTrue(refModel.hasGenotype(sample));
final Genotype g = refModel.getGenotype(sample);
Assert.assertTrue(g.hasAD());
Assert.assertTrue(g.hasDP());
Assert.assertEquals(g.getDP(), expectedDP);
Assert.assertTrue(g.hasGQ());
Assert.assertTrue(g.hasPL());
}
final VariantContext vc = call == null ? refModel : call;
if (curPos.getStart() == vc.getStart()) {
for (int pos = vc.getStart(); pos <= vc.getEnd(); pos++) {
final int j = pos - data.getActiveRegion().getSpan().getStart();
Assert.assertFalse(seenBP.get(j));
seenBP.set(j, true);
}
}
}
for (int i = 0; i < seenBP.size(); i++) {
Assert.assertEquals((boolean) seenBP.get(i), true);
}
}
use of htsjdk.variant.variantcontext.VariantContext in project gatk-protected by broadinstitute.
the class ReferenceConfidenceModelUnitTest method testOverlappingVariantContext.
@Test
public void testOverlappingVariantContext() {
final VariantContext vc10 = GATKVariantContextUtils.makeFromAlleles("test", "1", 10, Arrays.asList("A", "C"));
final VariantContext vc13 = GATKVariantContextUtils.makeFromAlleles("test", "1", 13, Arrays.asList("A", "C"));
final VariantContext vc12_15 = GATKVariantContextUtils.makeFromAlleles("test", "1", 12, Arrays.asList("ACAT", "A"));
final VariantContext vc18 = GATKVariantContextUtils.makeFromAlleles("test", "1", 18, Arrays.asList("A", "ACAT"));
final List<VariantContext> calls = Arrays.asList(vc13, vc12_15, vc18, vc10);
checkOverlapping(8, calls, null);
checkOverlapping(9, calls, null);
checkOverlapping(10, calls, vc10);
checkOverlapping(11, calls, null);
checkOverlapping(12, calls, vc12_15);
checkOverlapping(13, calls, vc13);
checkOverlapping(14, calls, vc12_15);
checkOverlapping(15, calls, vc12_15);
checkOverlapping(16, calls, null);
checkOverlapping(17, calls, null);
checkOverlapping(18, calls, vc18);
checkOverlapping(19, calls, null);
checkOverlapping(20, calls, null);
}
use of htsjdk.variant.variantcontext.VariantContext in project gatk-protected by broadinstitute.
the class ReferenceConfidenceModelUnitTest method checkOverlapping.
private void checkOverlapping(final int pos, Collection<VariantContext> calls, final VariantContext expected) {
final GenomeLoc loc = parser.createGenomeLoc(parser.getSequenceDictionary().getSequences().get(0).getSequenceName(), pos, pos);
final VariantContext actual = model.getOverlappingVariantContext(loc, calls);
Assert.assertEquals(actual, expected);
}
Aggregations