use of htsjdk.variant.variantcontext.Genotype in project gatk-protected by broadinstitute.
the class EvaluateCopyNumberTriStateCallsIntegrationTest method checkOutputCallsWithoutOverlappingTruthConcordance.
private void checkOutputCallsWithoutOverlappingTruthConcordance(final File truthFile, final File callsFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) {
final List<VariantContext> truthVariants = readVCFFile(truthFile);
final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
final List<VariantContext> callsVariants = readVCFFile(callsFile);
final Set<String> outputSamples = outputVariants.get(0).getSampleNames();
final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);
for (final VariantContext call : callsVariants) {
final List<Target> overlappingTargets = targets.targets(call);
final List<VariantContext> overlappingOutput = outputVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(call)).collect(Collectors.toList());
final List<VariantContext> overlappingTruth = truthVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(call)).collect(Collectors.toList());
if (!overlappingTruth.isEmpty()) {
continue;
}
@SuppressWarnings("all") final Optional<VariantContext> matchingOutputOptional = overlappingOutput.stream().filter(vc -> new SimpleInterval(call).equals(new SimpleInterval(vc))).findAny();
final VariantContext matchingOutput = matchingOutputOptional.get();
final int[] sampleCallsCount = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
for (final String sample : outputSamples) {
final Genotype outputGenotype = matchingOutput.getGenotype(sample);
final Genotype callGenotype = call.getGenotype(sample);
final Allele expectedCall = callGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(callGenotype.getAllele(0)) : null;
final Allele actualCall = outputGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(outputGenotype.getAllele(0)) : null;
Assert.assertEquals(expectedCall, actualCall);
final boolean expectedDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
final boolean actualDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
Assert.assertEquals(actualDiscovered, expectedDiscovered);
final int[] expectedCounts = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
if (expectedCall.isCalled() && actualDiscovered) {
expectedCounts[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
}
if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY)) {
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsIntArray(outputGenotype, VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, () -> new int[CopyNumberTriStateAllele.ALL_ALLELES.size()], 0), expectedCounts);
}
if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY)) {
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, -1), expectedCall.isCalled() && actualDiscovered ? 1 : 0);
}
final String evalClass = GATKProtectedVariantContextUtils.getAttributeAsString(outputGenotype, VariantEvaluationContext.EVALUATION_CLASS_KEY, null);
Assert.assertEquals(evalClass, expectedCall.isCalled() && actualDiscovered && expectedCall.isNonReference() ? EvaluationClass.UNKNOWN_POSITIVE.acronym : null);
if (expectedCall.isCalled()) {
sampleCallsCount[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
}
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, 0.0), callGQ(callGenotype), 0.01);
}
final int expectedAN = (int) MathUtils.sum(sampleCallsCount);
final int observedAN = matchingOutput.getAttributeAsInt(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, -1);
Assert.assertEquals(observedAN, expectedAN);
final double[] expectedAF = Arrays.copyOfRange(IntStream.of(sampleCallsCount).mapToDouble(i -> expectedAN > 0 ? i / (double) expectedAN : 0.0).toArray(), 1, sampleCallsCount.length);
final double[] observedAF = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, () -> new double[matchingOutput.getAlternateAlleles().size()], 0.0);
Assert.assertNotNull(observedAF);
assertEquals(observedAF, expectedAF, 0.01);
Assert.assertEquals(matchingOutput.getAttributeAsInt(VariantEvaluationContext.TRUTH_ALLELE_NUMBER_KEY, -1), 0);
}
}
use of htsjdk.variant.variantcontext.Genotype in project gatk-protected by broadinstitute.
the class ConvertGSVariantsToSegmentsIntegrationTest method composeExpectedSegments.
private List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> composeExpectedSegments(final File vcf, final TargetCollection<Target> targets) throws IOException {
final VCFFileReader reader = new VCFFileReader(vcf, false);
final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> result = new ArrayList<>();
reader.iterator().forEachRemaining(vc -> {
final int targetCount = targets.indexRange(vc).size();
for (final Genotype genotype : vc.getGenotypes()) {
final int cn = Integer.parseInt(genotype.getExtendedAttribute("CN").toString());
final double[] cnp = Stream.of(genotype.getExtendedAttribute("CNP").toString().replaceAll("\\[\\]", "").split(",")).mapToDouble(Double::parseDouble).toArray();
final double cnpSum = MathUtils.approximateLog10SumLog10(cnp);
final CopyNumberTriState call = expectedCall(cn);
final double exactLog10Prob = expectedExactLog10(call, cnp);
final HiddenStateSegment<CopyNumberTriState, Target> expectedSegment = new HiddenStateSegment<>(new SimpleInterval(vc), targetCount, Double.parseDouble(genotype.getExtendedAttribute("CNF").toString()), 0.000, call, -10.0 * exactLog10Prob, Double.NaN, Double.NaN, Double.NaN, -10.0 * (cnp[ConvertGSVariantsToSegments.NEUTRAL_COPY_NUMBER_DEFAULT] - cnpSum));
result.add(new HiddenStateSegmentRecord<>(genotype.getSampleName(), expectedSegment));
}
});
return result;
}
use of htsjdk.variant.variantcontext.Genotype in project gatk-protected by broadinstitute.
the class OrientationBiasFiltererUnitTest method testAnnotateVariantContextWithPreprocessingValuesMultiArtifact.
@Test
public void testAnnotateVariantContextWithPreprocessingValuesMultiArtifact() {
final FeatureDataSource<VariantContext> featureDataSource = new FeatureDataSource<>(new File(smallM2VcfMore));
SortedSet<Transition> relevantTransitions = new TreeSet<>();
relevantTransitions.add(Transition.transitionOf('G', 'T'));
relevantTransitions.add(Transition.transitionOf('C', 'T'));
final Map<Transition, Double> preAdapterQFakeScoreMap = new HashMap<>();
final double amGTPreAdapterQ = 20.0;
final double amCTPreAdapterQ = 25.0;
// preAdapterQ suppression will do nothing.
preAdapterQFakeScoreMap.put(relevantTransitions.first(), amGTPreAdapterQ);
// preAdapterQ suppression will do nothing.
preAdapterQFakeScoreMap.put(relevantTransitions.last(), amCTPreAdapterQ);
for (final VariantContext vc : featureDataSource) {
final VariantContext updatedVariantContext = OrientationBiasFilterer.annotateVariantContextWithPreprocessingValues(vc, relevantTransitions, preAdapterQFakeScoreMap);
final Genotype genotypeTumor = updatedVariantContext.getGenotype("TUMOR");
final Genotype genotypeNormal = updatedVariantContext.getGenotype("NORMAL");
// This is mostly just to make sure that nobody breaks the test itself. I.e. that this test will test all tumor genotype paths be artifact or non-artifact.
boolean wasGenotypeTumorTested = false;
// Check whether this genotype is reverse complement or actual artifact mode
wasGenotypeTumorTested |= assertArtifact(amGTPreAdapterQ, genotypeTumor, relevantTransitions.first());
wasGenotypeTumorTested |= assertArtifact(amCTPreAdapterQ, genotypeTumor, relevantTransitions.last());
// Check any variants that are not an artifact mode but are SNP
if (!OrientationBiasUtils.isGenotypeInTransitionsWithComplement(genotypeTumor, relevantTransitions)) {
assertNotTransition(genotypeTumor);
wasGenotypeTumorTested = true;
} else {
// Check attributes common to all variants in artifact mode
Assert.assertNotEquals(genotypeTumor.getExtendedAttribute(OrientationBiasFilterConstants.FOB, VCFConstants.EMPTY_ALLELE), VCFConstants.EMPTY_ALLELE);
Assert.assertNotEquals(genotypeTumor.getExtendedAttribute(OrientationBiasFilterConstants.P_ARTIFACT_FIELD_NAME, VCFConstants.EMPTY_ALLELE), VCFConstants.EMPTY_ALLELE);
}
// The NORMAL is always ref/ref in the example file.
assertNormal(genotypeNormal);
Assert.assertTrue(wasGenotypeTumorTested, "The test seems to be broken... A variant context was tested, but it had no tumor genotype.");
}
}
use of htsjdk.variant.variantcontext.Genotype in project gatk-protected by broadinstitute.
the class OrientationBiasFiltererUnitTest method testAnnotateVariantContextWithPreprocessingValues.
@Test
public void testAnnotateVariantContextWithPreprocessingValues() {
final FeatureDataSource<VariantContext> featureDataSource = new FeatureDataSource<>(new File(smallM2Vcf));
SortedSet<Transition> relevantTransitions = new TreeSet<>();
relevantTransitions.add(Transition.transitionOf('G', 'T'));
final Map<Transition, Double> preAdapterQFakeScoreMap = new HashMap<>();
final double amGTPreAdapterQ = 20.0;
// preAdapterQ suppression will do nothing.
preAdapterQFakeScoreMap.put(Transition.transitionOf('G', 'T'), amGTPreAdapterQ);
for (final VariantContext vc : featureDataSource) {
final VariantContext updatedVariantContext = OrientationBiasFilterer.annotateVariantContextWithPreprocessingValues(vc, relevantTransitions, preAdapterQFakeScoreMap);
final Genotype genotypeTumor = updatedVariantContext.getGenotype("TUMOR");
final Genotype genotypeNormal = updatedVariantContext.getGenotype("NORMAL");
Assert.assertNotEquals(genotypeTumor.getExtendedAttribute(OrientationBiasFilterConstants.FOB, VCFConstants.EMPTY_ALLELE), VCFConstants.EMPTY_ALLELE);
Assert.assertNotEquals(genotypeTumor.getExtendedAttribute(OrientationBiasFilterConstants.P_ARTIFACT_FIELD_NAME, VCFConstants.EMPTY_ALLELE), VCFConstants.EMPTY_ALLELE);
assertArtifact(amGTPreAdapterQ, genotypeTumor, Transition.transitionOf('G', 'T'));
// The NORMAL is always ref/ref in the example file.
assertNormal(genotypeNormal);
}
}
use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCallsIntegrationTest method checkOutputCallsWithOverlappingTruthConcordance.
private void checkOutputCallsWithOverlappingTruthConcordance(final File truthFile, final File callsFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) {
final List<VariantContext> truthVariants = readVCFFile(truthFile);
final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
final List<VariantContext> callsVariants = readVCFFile(callsFile);
final Set<String> outputSamples = outputVariants.get(0).getSampleNames();
final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);
for (final VariantContext truth : truthVariants) {
final List<Target> overlappingTargets = targets.targets(truth);
final List<VariantContext> overlappingOutput = outputVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(truth)).collect(Collectors.toList());
final List<VariantContext> overlappingCalls = callsVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(truth)).collect(Collectors.toList());
if (overlappingTargets.isEmpty()) {
Assert.assertTrue(overlappingOutput.isEmpty());
continue;
}
@SuppressWarnings("all") final VariantContext matchingOutput = overlappingOutput.stream().filter(vc -> new SimpleInterval(truth).equals(new SimpleInterval(vc))).findAny().get();
final int[] sampleCallsCount = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
for (final String sample : outputSamples) {
final Genotype outputGenotype = matchingOutput.getGenotype(sample);
final List<Pair<VariantContext, Genotype>> sampleCalls = overlappingCalls.stream().map(vc -> new ImmutablePair<>(vc, vc.getGenotype(sample))).filter(p -> XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(p.getRight().getExtendedAttribute(XHMMSegmentGenotyper.DISCOVERY_KEY))).filter(p -> callPassFilters(p.getLeft(), p.getRight(), targets, filteringOptions)).collect(Collectors.toList());
final int[] expectedCounts = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
sampleCalls.forEach(p -> {
expectedCounts[CopyNumberTriStateAllele.valueOf(p.getRight().getAllele(0)).index()]++;
});
final int[] actualCounts = GATKProtectedVariantContextUtils.getAttributeAsIntArray(outputGenotype, VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, () -> new int[CopyNumberTriStateAllele.ALL_ALLELES.size()], 0);
Assert.assertEquals(actualCounts, expectedCounts, Arrays.toString(actualCounts) + " " + Arrays.toString(expectedCounts));
final int expectedTotalCount = (int) MathUtils.sum(expectedCounts);
final int actualTotalCount = GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, -1);
Assert.assertEquals(actualTotalCount, expectedTotalCount);
final int expectedTargetCount = sampleCalls.stream().mapToInt(p -> targets.targetCount(p.getLeft())).sum();
final int observedTargetCount = GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, -1);
Assert.assertEquals(observedTargetCount, expectedTargetCount);
final Allele truthCallAllele = outputTruthAllele(outputGenotype);
final boolean isMixed = IntStream.of(actualCounts).filter(i -> i > 0).count() > 1;
final String evalClass = GATKProtectedVariantContextUtils.getAttributeAsString(outputGenotype, VariantEvaluationContext.EVALUATION_CLASS_KEY, null);
if (sampleCalls.size() > 0 && !isMixed) {
final Pair<VariantContext, Genotype> bestCall = sampleCalls.stream().sorted((p1, p2) -> -Double.compare(callGQ(p1.getRight()), callGQ(p2.getRight()))).findFirst().get();
final CopyNumberTriStateAllele expectedCall = CopyNumberTriStateAllele.valueOf(bestCall.getRight().getAllele(0));
final CopyNumberTriStateAllele actualCall = CopyNumberTriStateAllele.valueOf(outputGenotype.getAllele(0));
Assert.assertEquals(actualCall, expectedCall);
sampleCallsCount[expectedCall.index()]++;
if (!truthCallAllele.isReference()) {
if (truthCallAllele.equals(actualCall)) {
Assert.assertEquals(evalClass, EvaluationClass.TRUE_POSITIVE.acronym);
} else if (!truthCallAllele.isNoCall()) {
Assert.assertEquals(evalClass, EvaluationClass.DISCORDANT_POSITIVE.acronym);
} else {
Assert.assertNull(evalClass);
}
} else if (truthCallAllele.isReference()) {
Assert.assertEquals(evalClass, EvaluationClass.FALSE_POSITIVE.acronym);
}
} else {
Assert.assertEquals(Allele.NO_CALL, outputGenotype.getAllele(0));
if (sampleCalls.isEmpty()) {
Assert.assertEquals(evalClass, !truthCallAllele.isReference() && truthCallAllele.isCalled() ? EvaluationClass.FALSE_NEGATIVE.acronym : null);
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, -1), 0.0);
} else {
Assert.assertEquals(evalClass, !truthCallAllele.isReference() && truthCallAllele.isCalled() ? EvaluationClass.MIXED_POSITIVE.acronym : EvaluationClass.FALSE_POSITIVE.acronym);
final Pair<VariantContext, Genotype> bestCall = sampleCalls.stream().sorted((p1, p2) -> -Double.compare(callGQ(p1.getRight()), callGQ(p2.getRight()))).findFirst().get();
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, -1), callGQ(bestCall.getRight()));
}
}
}
final int expectedAN = (int) MathUtils.sum(sampleCallsCount);
final int observedAN = matchingOutput.getAttributeAsInt(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, -1);
Assert.assertEquals(observedAN, expectedAN);
final double[] expectedAF = Arrays.copyOfRange(IntStream.of(sampleCallsCount).mapToDouble(i -> expectedAN > 0 ? i / (double) expectedAN : 0.0).toArray(), 1, sampleCallsCount.length);
final double[] observedAF = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, () -> new double[matchingOutput.getAlternateAlleles().size()], 0.0);
Assert.assertNotNull(observedAF);
assertEquals(observedAF, expectedAF, 0.01);
}
}
Aggregations