use of htsjdk.variant.variantcontext.Allele in project gatk by broadinstitute.
the class ReadThreadingAssembler method composeGivenHaplotypes.
/**
* Create the list of artificial GGA-mode haplotypes by injecting each of the provided alternate alleles into the reference haplotype
*
* @param refHaplotype the reference haplotype
* @param givenHaplotypes the list of alternate alleles in VariantContexts
* @param activeRegionWindow the window containing the reference haplotype
*
* @return a non-null list of haplotypes
*/
private static List<Haplotype> composeGivenHaplotypes(final Haplotype refHaplotype, final List<VariantContext> givenHaplotypes, final SimpleInterval activeRegionWindow) {
Utils.nonNull(refHaplotype, "the reference haplotype cannot be null");
Utils.nonNull(givenHaplotypes, "given haplotypes cannot be null");
Utils.nonNull(activeRegionWindow, "active region window cannot be null");
Utils.validateArg(activeRegionWindow.size() == refHaplotype.length(), "inconsistent reference haplotype and active region window");
final Set<Haplotype> returnHaplotypes = new LinkedHashSet<>();
final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
for (final VariantContext compVC : givenHaplotypes) {
Utils.validateArg(GATKVariantContextUtils.overlapsRegion(compVC, activeRegionWindow), " some variant provided does not overlap with active region window");
for (final Allele compAltAllele : compVC.getAlternateAlleles()) {
final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart());
if (insertedRefHaplotype != null) {
// can be null if the requested allele can't be inserted into the haplotype
returnHaplotypes.add(insertedRefHaplotype);
}
}
}
return new ArrayList<>(returnHaplotypes);
}
use of htsjdk.variant.variantcontext.Allele in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCallsIntegrationTest method checkOutputCallsWithOverlappingTruthConcordance.
private void checkOutputCallsWithOverlappingTruthConcordance(final File truthFile, final File callsFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) {
final List<VariantContext> truthVariants = readVCFFile(truthFile);
final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
final List<VariantContext> callsVariants = readVCFFile(callsFile);
final Set<String> outputSamples = outputVariants.get(0).getSampleNames();
final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);
for (final VariantContext truth : truthVariants) {
final List<Target> overlappingTargets = targets.targets(truth);
final List<VariantContext> overlappingOutput = outputVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(truth)).collect(Collectors.toList());
final List<VariantContext> overlappingCalls = callsVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(truth)).collect(Collectors.toList());
if (overlappingTargets.isEmpty()) {
Assert.assertTrue(overlappingOutput.isEmpty());
continue;
}
@SuppressWarnings("all") final VariantContext matchingOutput = overlappingOutput.stream().filter(vc -> new SimpleInterval(truth).equals(new SimpleInterval(vc))).findAny().get();
final int[] sampleCallsCount = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
for (final String sample : outputSamples) {
final Genotype outputGenotype = matchingOutput.getGenotype(sample);
final List<Pair<VariantContext, Genotype>> sampleCalls = overlappingCalls.stream().map(vc -> new ImmutablePair<>(vc, vc.getGenotype(sample))).filter(p -> XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(p.getRight().getExtendedAttribute(XHMMSegmentGenotyper.DISCOVERY_KEY))).filter(p -> callPassFilters(p.getLeft(), p.getRight(), targets, filteringOptions)).collect(Collectors.toList());
final int[] expectedCounts = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
sampleCalls.forEach(p -> {
expectedCounts[CopyNumberTriStateAllele.valueOf(p.getRight().getAllele(0)).index()]++;
});
final int[] actualCounts = GATKProtectedVariantContextUtils.getAttributeAsIntArray(outputGenotype, VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, () -> new int[CopyNumberTriStateAllele.ALL_ALLELES.size()], 0);
Assert.assertEquals(actualCounts, expectedCounts, Arrays.toString(actualCounts) + " " + Arrays.toString(expectedCounts));
final int expectedTotalCount = (int) MathUtils.sum(expectedCounts);
final int actualTotalCount = GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, -1);
Assert.assertEquals(actualTotalCount, expectedTotalCount);
final int expectedTargetCount = sampleCalls.stream().mapToInt(p -> targets.targetCount(p.getLeft())).sum();
final int observedTargetCount = GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, -1);
Assert.assertEquals(observedTargetCount, expectedTargetCount);
final Allele truthCallAllele = outputTruthAllele(outputGenotype);
final boolean isMixed = IntStream.of(actualCounts).filter(i -> i > 0).count() > 1;
final String evalClass = GATKProtectedVariantContextUtils.getAttributeAsString(outputGenotype, VariantEvaluationContext.EVALUATION_CLASS_KEY, null);
if (sampleCalls.size() > 0 && !isMixed) {
final Pair<VariantContext, Genotype> bestCall = sampleCalls.stream().sorted((p1, p2) -> -Double.compare(callGQ(p1.getRight()), callGQ(p2.getRight()))).findFirst().get();
final CopyNumberTriStateAllele expectedCall = CopyNumberTriStateAllele.valueOf(bestCall.getRight().getAllele(0));
final CopyNumberTriStateAllele actualCall = CopyNumberTriStateAllele.valueOf(outputGenotype.getAllele(0));
Assert.assertEquals(actualCall, expectedCall);
sampleCallsCount[expectedCall.index()]++;
if (!truthCallAllele.isReference()) {
if (truthCallAllele.equals(actualCall)) {
Assert.assertEquals(evalClass, EvaluationClass.TRUE_POSITIVE.acronym);
} else if (!truthCallAllele.isNoCall()) {
Assert.assertEquals(evalClass, EvaluationClass.DISCORDANT_POSITIVE.acronym);
} else {
Assert.assertNull(evalClass);
}
} else if (truthCallAllele.isReference()) {
Assert.assertEquals(evalClass, EvaluationClass.FALSE_POSITIVE.acronym);
}
} else {
Assert.assertEquals(Allele.NO_CALL, outputGenotype.getAllele(0));
if (sampleCalls.isEmpty()) {
Assert.assertEquals(evalClass, !truthCallAllele.isReference() && truthCallAllele.isCalled() ? EvaluationClass.FALSE_NEGATIVE.acronym : null);
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, -1), 0.0);
} else {
Assert.assertEquals(evalClass, !truthCallAllele.isReference() && truthCallAllele.isCalled() ? EvaluationClass.MIXED_POSITIVE.acronym : EvaluationClass.FALSE_POSITIVE.acronym);
final Pair<VariantContext, Genotype> bestCall = sampleCalls.stream().sorted((p1, p2) -> -Double.compare(callGQ(p1.getRight()), callGQ(p2.getRight()))).findFirst().get();
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, -1), callGQ(bestCall.getRight()));
}
}
}
final int expectedAN = (int) MathUtils.sum(sampleCallsCount);
final int observedAN = matchingOutput.getAttributeAsInt(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, -1);
Assert.assertEquals(observedAN, expectedAN);
final double[] expectedAF = Arrays.copyOfRange(IntStream.of(sampleCallsCount).mapToDouble(i -> expectedAN > 0 ? i / (double) expectedAN : 0.0).toArray(), 1, sampleCallsCount.length);
final double[] observedAF = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, () -> new double[matchingOutput.getAlternateAlleles().size()], 0.0);
Assert.assertNotNull(observedAF);
assertEquals(observedAF, expectedAF, 0.01);
}
}
use of htsjdk.variant.variantcontext.Allele in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCallsIntegrationTest method checkOutputCallsWithoutOverlappingTruthConcordance.
private void checkOutputCallsWithoutOverlappingTruthConcordance(final File truthFile, final File callsFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) {
final List<VariantContext> truthVariants = readVCFFile(truthFile);
final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
final List<VariantContext> callsVariants = readVCFFile(callsFile);
final Set<String> outputSamples = outputVariants.get(0).getSampleNames();
final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);
for (final VariantContext call : callsVariants) {
final List<Target> overlappingTargets = targets.targets(call);
final List<VariantContext> overlappingOutput = outputVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(call)).collect(Collectors.toList());
final List<VariantContext> overlappingTruth = truthVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(call)).collect(Collectors.toList());
if (!overlappingTruth.isEmpty()) {
continue;
}
@SuppressWarnings("all") final Optional<VariantContext> matchingOutputOptional = overlappingOutput.stream().filter(vc -> new SimpleInterval(call).equals(new SimpleInterval(vc))).findAny();
final VariantContext matchingOutput = matchingOutputOptional.get();
final int[] sampleCallsCount = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
for (final String sample : outputSamples) {
final Genotype outputGenotype = matchingOutput.getGenotype(sample);
final Genotype callGenotype = call.getGenotype(sample);
final Allele expectedCall = callGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(callGenotype.getAllele(0)) : null;
final Allele actualCall = outputGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(outputGenotype.getAllele(0)) : null;
Assert.assertEquals(expectedCall, actualCall);
final boolean expectedDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
final boolean actualDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
Assert.assertEquals(actualDiscovered, expectedDiscovered);
final int[] expectedCounts = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
if (expectedCall.isCalled() && actualDiscovered) {
expectedCounts[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
}
if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY)) {
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsIntArray(outputGenotype, VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, () -> new int[CopyNumberTriStateAllele.ALL_ALLELES.size()], 0), expectedCounts);
}
if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY)) {
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, -1), expectedCall.isCalled() && actualDiscovered ? 1 : 0);
}
final String evalClass = GATKProtectedVariantContextUtils.getAttributeAsString(outputGenotype, VariantEvaluationContext.EVALUATION_CLASS_KEY, null);
Assert.assertEquals(evalClass, expectedCall.isCalled() && actualDiscovered && expectedCall.isNonReference() ? EvaluationClass.UNKNOWN_POSITIVE.acronym : null);
if (expectedCall.isCalled()) {
sampleCallsCount[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
}
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, 0.0), callGQ(callGenotype), 0.01);
}
final int expectedAN = (int) MathUtils.sum(sampleCallsCount);
final int observedAN = matchingOutput.getAttributeAsInt(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, -1);
Assert.assertEquals(observedAN, expectedAN);
final double[] expectedAF = Arrays.copyOfRange(IntStream.of(sampleCallsCount).mapToDouble(i -> expectedAN > 0 ? i / (double) expectedAN : 0.0).toArray(), 1, sampleCallsCount.length);
final double[] observedAF = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, () -> new double[matchingOutput.getAlternateAlleles().size()], 0.0);
Assert.assertNotNull(observedAF);
assertEquals(observedAF, expectedAF, 0.01);
Assert.assertEquals(matchingOutput.getAttributeAsInt(VariantEvaluationContext.TRUTH_ALLELE_NUMBER_KEY, -1), 0);
}
}
use of htsjdk.variant.variantcontext.Allele in project gatk by broadinstitute.
the class CopyNumberTriStateUnitTest method testAlleleListsLogic.
@Test
public void testAlleleListsLogic() {
Assert.assertEquals(CopyNumberTriState.ALL_ALLELES.size(), CopyNumberTriState.values().length);
Assert.assertEquals(CopyNumberTriState.ALL_ALLELES.subList(1, CopyNumberTriState.ALL_ALLELES.size()), CopyNumberTriState.ALTERNATIVE_ALLELES);
Assert.assertFalse(CopyNumberTriState.ALTERNATIVE_ALLELES.stream().anyMatch(Allele::isReference));
Assert.assertTrue(CopyNumberTriState.ALL_ALLELES.get(0).isReference());
Assert.assertEquals(CopyNumberTriState.ALL_ALLELES.get(0), CopyNumberTriState.NEUTRAL.allele);
final List<Allele> expectedOrder = new ArrayList<>();
for (final CopyNumberTriState state : CopyNumberTriState.values()) {
if (state == CopyNumberTriState.NEUTRAL) {
continue;
}
expectedOrder.add(state.allele);
}
Assert.assertEquals(CopyNumberTriState.ALTERNATIVE_ALLELES, expectedOrder);
}
use of htsjdk.variant.variantcontext.Allele in project gatk-protected by broadinstitute.
the class GermlineProbabilityCalculatorUnitTest method testGetGermlineAltAlleleFrequencies.
@Test
public void testGetGermlineAltAlleleFrequencies() {
final double defaultAF = 0.001;
final double nonDefaultAF1 = 0.1;
final double nonDefaultAF2 = 0.01;
final Allele Aref = Allele.create("A", true);
final Allele C = Allele.create("C");
final Allele G = Allele.create("G");
final Allele T = Allele.create("T");
final String source = "SOURCE";
final int start = 1;
final int stop = 1;
//biallelic, vc has the same alt allele
final List<Allele> altAlleles1 = Arrays.asList(C);
final VariantContext vc1 = new VariantContextBuilder(source, "1", start, stop, Arrays.asList(Aref, C)).attribute(VCFConstants.ALLELE_FREQUENCY_KEY, new double[] { nonDefaultAF1 }).make();
final double[] af1 = GermlineProbabilityCalculator.getGermlineAltAlleleFrequencies(altAlleles1, Optional.of(vc1), defaultAF);
Assert.assertEquals(af1.length, altAlleles1.size());
Assert.assertEquals(af1[0], nonDefaultAF1, 0.00001);
//biallelic, vc has different alt allele
final List<Allele> altAlleles2 = Arrays.asList(C);
final VariantContext vc2 = new VariantContextBuilder(source, "1", start, stop, Arrays.asList(Aref, G)).attribute(VCFConstants.ALLELE_FREQUENCY_KEY, new double[] { nonDefaultAF1 }).make();
final double[] af2 = GermlineProbabilityCalculator.getGermlineAltAlleleFrequencies(altAlleles2, Optional.of(vc2), defaultAF);
Assert.assertEquals(af2.length, altAlleles2.size());
Assert.assertEquals(af2[0], defaultAF, 0.00001);
//triallelic, same alt alleles
final List<Allele> altAlleles3 = Arrays.asList(C, G);
final VariantContext vc3 = new VariantContextBuilder(source, "1", start, stop, Arrays.asList(Aref, C, G)).attribute(VCFConstants.ALLELE_FREQUENCY_KEY, new double[] { nonDefaultAF1, nonDefaultAF2 }).make();
final double[] af3 = GermlineProbabilityCalculator.getGermlineAltAlleleFrequencies(altAlleles3, Optional.of(vc3), defaultAF);
Assert.assertEquals(af3.length, altAlleles3.size());
Assert.assertEquals(af3[0], nonDefaultAF1, 0.00001);
Assert.assertEquals(af3[1], nonDefaultAF2, 0.00001);
//triallelic, same alt alleles in different order
final List<Allele> altAlleles4 = Arrays.asList(C, G);
final VariantContext vc4 = new VariantContextBuilder(source, "1", start, stop, Arrays.asList(Aref, G, C)).attribute(VCFConstants.ALLELE_FREQUENCY_KEY, new double[] { nonDefaultAF1, nonDefaultAF2 }).make();
final double[] af4 = GermlineProbabilityCalculator.getGermlineAltAlleleFrequencies(altAlleles4, Optional.of(vc4), defaultAF);
Assert.assertEquals(af4.length, altAlleles4.size());
Assert.assertEquals(af4[0], nonDefaultAF2, 0.00001);
Assert.assertEquals(af4[1], nonDefaultAF1, 0.00001);
//triallelic, only one allele in common
final List<Allele> altAlleles5 = Arrays.asList(C, G);
final VariantContext vc5 = new VariantContextBuilder(source, "1", start, stop, Arrays.asList(Aref, C, T)).attribute(VCFConstants.ALLELE_FREQUENCY_KEY, new double[] { nonDefaultAF1, nonDefaultAF2 }).make();
final double[] af5 = GermlineProbabilityCalculator.getGermlineAltAlleleFrequencies(altAlleles5, Optional.of(vc5), defaultAF);
Assert.assertEquals(af5.length, altAlleles5.size());
Assert.assertEquals(af5[0], nonDefaultAF1, 0.00001);
Assert.assertEquals(af5[1], defaultAF, 0.00001);
}
Aggregations