use of htsjdk.variant.variantcontext.Allele in project gatk by broadinstitute.
the class ReadThreadingAssemblerUnitTest method testAssembleRefAndInsertion.
@Test(dataProvider = "AssembleIntervalsWithVariantData")
public void testAssembleRefAndInsertion(final ReadThreadingAssembler assembler, final SimpleInterval loc, final int nReadsToUse, final int variantSite) {
final byte[] refBases = seq.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getEnd()).getBases();
for (int insertionLength = 1; insertionLength < 10; insertionLength++) {
final Allele refBase = Allele.create(refBases[variantSite], false);
final Allele altBase = Allele.create(new String(refBases).substring(variantSite, variantSite + insertionLength + 1), true);
final VariantContextBuilder vcb = new VariantContextBuilder("x", loc.getContig(), variantSite, variantSite + insertionLength, Arrays.asList(refBase, altBase));
testAssemblyWithVariant(assembler, refBases, loc, nReadsToUse, vcb.make());
}
}
use of htsjdk.variant.variantcontext.Allele in project gatk by broadinstitute.
the class GermlineProbabilityCalculator method calculateAnnotations.
public static Map<String, Object> calculateAnnotations(List<VariantContext> germlineResourceVariants, final List<Allele> altAlleles, final double[] tumorLog10Odds, final Optional<double[]> normalLog10Odds, final double afOfAllelesNotInGermlineResource, final double log10PriorProbOfSomaticEvent) {
final double[] normalLog10OddsOrFlat = normalLog10Odds.orElseGet(() -> MathUtils.applyToArray(tumorLog10Odds, x -> 0));
final Optional<VariantContext> germlineVC = germlineResourceVariants.isEmpty() ? Optional.empty() : // assume only one VC per site
Optional.of(germlineResourceVariants.get(0));
final double[] populationAlleleFrequencies = getGermlineAltAlleleFrequencies(altAlleles, germlineVC, afOfAllelesNotInGermlineResource);
// note the minus sign required because Mutect has the convention that this is log odds of allele *NOT* being in the normal
final double[] germlineLog10Posteriors = new IndexRange(0, altAlleles.size()).mapToDouble(n -> log10PosteriorProbabilityOfGermlineVariant(-normalLog10OddsOrFlat[n], tumorLog10Odds[n], populationAlleleFrequencies[n], log10PriorProbOfSomaticEvent));
return ImmutableMap.of(POPULATION_AF_VCF_ATTRIBUTE, populationAlleleFrequencies, GERMLINE_POSTERIORS_VCF_ATTRIBUTE, germlineLog10Posteriors);
}
use of htsjdk.variant.variantcontext.Allele in project gatk by broadinstitute.
the class ReadThreadingAssembler method composeGivenHaplotypes.
/**
* Create the list of artificial GGA-mode haplotypes by injecting each of the provided alternate alleles into the reference haplotype
*
* @param refHaplotype the reference haplotype
* @param givenHaplotypes the list of alternate alleles in VariantContexts
* @param activeRegionWindow the window containing the reference haplotype
*
* @return a non-null list of haplotypes
*/
private static List<Haplotype> composeGivenHaplotypes(final Haplotype refHaplotype, final List<VariantContext> givenHaplotypes, final SimpleInterval activeRegionWindow) {
Utils.nonNull(refHaplotype, "the reference haplotype cannot be null");
Utils.nonNull(givenHaplotypes, "given haplotypes cannot be null");
Utils.nonNull(activeRegionWindow, "active region window cannot be null");
Utils.validateArg(activeRegionWindow.size() == refHaplotype.length(), "inconsistent reference haplotype and active region window");
final Set<Haplotype> returnHaplotypes = new LinkedHashSet<>();
final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
for (final VariantContext compVC : givenHaplotypes) {
Utils.validateArg(GATKVariantContextUtils.overlapsRegion(compVC, activeRegionWindow), " some variant provided does not overlap with active region window");
for (final Allele compAltAllele : compVC.getAlternateAlleles()) {
final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart());
if (insertedRefHaplotype != null) {
// can be null if the requested allele can't be inserted into the haplotype
returnHaplotypes.add(insertedRefHaplotype);
}
}
}
return new ArrayList<>(returnHaplotypes);
}
use of htsjdk.variant.variantcontext.Allele in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCallsIntegrationTest method checkOutputCallsWithOverlappingTruthConcordance.
private void checkOutputCallsWithOverlappingTruthConcordance(final File truthFile, final File callsFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) {
final List<VariantContext> truthVariants = readVCFFile(truthFile);
final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
final List<VariantContext> callsVariants = readVCFFile(callsFile);
final Set<String> outputSamples = outputVariants.get(0).getSampleNames();
final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);
for (final VariantContext truth : truthVariants) {
final List<Target> overlappingTargets = targets.targets(truth);
final List<VariantContext> overlappingOutput = outputVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(truth)).collect(Collectors.toList());
final List<VariantContext> overlappingCalls = callsVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(truth)).collect(Collectors.toList());
if (overlappingTargets.isEmpty()) {
Assert.assertTrue(overlappingOutput.isEmpty());
continue;
}
@SuppressWarnings("all") final VariantContext matchingOutput = overlappingOutput.stream().filter(vc -> new SimpleInterval(truth).equals(new SimpleInterval(vc))).findAny().get();
final int[] sampleCallsCount = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
for (final String sample : outputSamples) {
final Genotype outputGenotype = matchingOutput.getGenotype(sample);
final List<Pair<VariantContext, Genotype>> sampleCalls = overlappingCalls.stream().map(vc -> new ImmutablePair<>(vc, vc.getGenotype(sample))).filter(p -> XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(p.getRight().getExtendedAttribute(XHMMSegmentGenotyper.DISCOVERY_KEY))).filter(p -> callPassFilters(p.getLeft(), p.getRight(), targets, filteringOptions)).collect(Collectors.toList());
final int[] expectedCounts = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
sampleCalls.forEach(p -> {
expectedCounts[CopyNumberTriStateAllele.valueOf(p.getRight().getAllele(0)).index()]++;
});
final int[] actualCounts = GATKProtectedVariantContextUtils.getAttributeAsIntArray(outputGenotype, VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, () -> new int[CopyNumberTriStateAllele.ALL_ALLELES.size()], 0);
Assert.assertEquals(actualCounts, expectedCounts, Arrays.toString(actualCounts) + " " + Arrays.toString(expectedCounts));
final int expectedTotalCount = (int) MathUtils.sum(expectedCounts);
final int actualTotalCount = GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, -1);
Assert.assertEquals(actualTotalCount, expectedTotalCount);
final int expectedTargetCount = sampleCalls.stream().mapToInt(p -> targets.targetCount(p.getLeft())).sum();
final int observedTargetCount = GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, -1);
Assert.assertEquals(observedTargetCount, expectedTargetCount);
final Allele truthCallAllele = outputTruthAllele(outputGenotype);
final boolean isMixed = IntStream.of(actualCounts).filter(i -> i > 0).count() > 1;
final String evalClass = GATKProtectedVariantContextUtils.getAttributeAsString(outputGenotype, VariantEvaluationContext.EVALUATION_CLASS_KEY, null);
if (sampleCalls.size() > 0 && !isMixed) {
final Pair<VariantContext, Genotype> bestCall = sampleCalls.stream().sorted((p1, p2) -> -Double.compare(callGQ(p1.getRight()), callGQ(p2.getRight()))).findFirst().get();
final CopyNumberTriStateAllele expectedCall = CopyNumberTriStateAllele.valueOf(bestCall.getRight().getAllele(0));
final CopyNumberTriStateAllele actualCall = CopyNumberTriStateAllele.valueOf(outputGenotype.getAllele(0));
Assert.assertEquals(actualCall, expectedCall);
sampleCallsCount[expectedCall.index()]++;
if (!truthCallAllele.isReference()) {
if (truthCallAllele.equals(actualCall)) {
Assert.assertEquals(evalClass, EvaluationClass.TRUE_POSITIVE.acronym);
} else if (!truthCallAllele.isNoCall()) {
Assert.assertEquals(evalClass, EvaluationClass.DISCORDANT_POSITIVE.acronym);
} else {
Assert.assertNull(evalClass);
}
} else if (truthCallAllele.isReference()) {
Assert.assertEquals(evalClass, EvaluationClass.FALSE_POSITIVE.acronym);
}
} else {
Assert.assertEquals(Allele.NO_CALL, outputGenotype.getAllele(0));
if (sampleCalls.isEmpty()) {
Assert.assertEquals(evalClass, !truthCallAllele.isReference() && truthCallAllele.isCalled() ? EvaluationClass.FALSE_NEGATIVE.acronym : null);
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, -1), 0.0);
} else {
Assert.assertEquals(evalClass, !truthCallAllele.isReference() && truthCallAllele.isCalled() ? EvaluationClass.MIXED_POSITIVE.acronym : EvaluationClass.FALSE_POSITIVE.acronym);
final Pair<VariantContext, Genotype> bestCall = sampleCalls.stream().sorted((p1, p2) -> -Double.compare(callGQ(p1.getRight()), callGQ(p2.getRight()))).findFirst().get();
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, -1), callGQ(bestCall.getRight()));
}
}
}
final int expectedAN = (int) MathUtils.sum(sampleCallsCount);
final int observedAN = matchingOutput.getAttributeAsInt(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, -1);
Assert.assertEquals(observedAN, expectedAN);
final double[] expectedAF = Arrays.copyOfRange(IntStream.of(sampleCallsCount).mapToDouble(i -> expectedAN > 0 ? i / (double) expectedAN : 0.0).toArray(), 1, sampleCallsCount.length);
final double[] observedAF = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, () -> new double[matchingOutput.getAlternateAlleles().size()], 0.0);
Assert.assertNotNull(observedAF);
assertEquals(observedAF, expectedAF, 0.01);
}
}
use of htsjdk.variant.variantcontext.Allele in project gatk by broadinstitute.
the class EvaluateCopyNumberTriStateCallsIntegrationTest method checkOutputCallsWithoutOverlappingTruthConcordance.
private void checkOutputCallsWithoutOverlappingTruthConcordance(final File truthFile, final File callsFile, final File targetsFile, final File vcfOutput, final EvaluationFiltersArgumentCollection filteringOptions) {
final List<VariantContext> truthVariants = readVCFFile(truthFile);
final List<VariantContext> outputVariants = readVCFFile(vcfOutput);
final List<VariantContext> callsVariants = readVCFFile(callsFile);
final Set<String> outputSamples = outputVariants.get(0).getSampleNames();
final TargetCollection<Target> targets = TargetArgumentCollection.readTargetCollection(targetsFile);
for (final VariantContext call : callsVariants) {
final List<Target> overlappingTargets = targets.targets(call);
final List<VariantContext> overlappingOutput = outputVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(call)).collect(Collectors.toList());
final List<VariantContext> overlappingTruth = truthVariants.stream().filter(vc -> new SimpleInterval(vc).overlaps(call)).collect(Collectors.toList());
if (!overlappingTruth.isEmpty()) {
continue;
}
@SuppressWarnings("all") final Optional<VariantContext> matchingOutputOptional = overlappingOutput.stream().filter(vc -> new SimpleInterval(call).equals(new SimpleInterval(vc))).findAny();
final VariantContext matchingOutput = matchingOutputOptional.get();
final int[] sampleCallsCount = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
for (final String sample : outputSamples) {
final Genotype outputGenotype = matchingOutput.getGenotype(sample);
final Genotype callGenotype = call.getGenotype(sample);
final Allele expectedCall = callGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(callGenotype.getAllele(0)) : null;
final Allele actualCall = outputGenotype.getAllele(0).isCalled() ? CopyNumberTriStateAllele.valueOf(outputGenotype.getAllele(0)) : null;
Assert.assertEquals(expectedCall, actualCall);
final boolean expectedDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
final boolean actualDiscovered = XHMMSegmentGenotyper.DISCOVERY_TRUE.equals(GATKProtectedVariantContextUtils.getAttributeAsString(callGenotype, XHMMSegmentGenotyper.DISCOVERY_KEY, "N"));
Assert.assertEquals(actualDiscovered, expectedDiscovered);
final int[] expectedCounts = new int[CopyNumberTriStateAllele.ALL_ALLELES.size()];
if (expectedCall.isCalled() && actualDiscovered) {
expectedCounts[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
}
if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY)) {
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsIntArray(outputGenotype, VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, () -> new int[CopyNumberTriStateAllele.ALL_ALLELES.size()], 0), expectedCounts);
}
if (outputGenotype.hasExtendedAttribute(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY)) {
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsInt(outputGenotype, VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, -1), expectedCall.isCalled() && actualDiscovered ? 1 : 0);
}
final String evalClass = GATKProtectedVariantContextUtils.getAttributeAsString(outputGenotype, VariantEvaluationContext.EVALUATION_CLASS_KEY, null);
Assert.assertEquals(evalClass, expectedCall.isCalled() && actualDiscovered && expectedCall.isNonReference() ? EvaluationClass.UNKNOWN_POSITIVE.acronym : null);
if (expectedCall.isCalled()) {
sampleCallsCount[CopyNumberTriStateAllele.valueOf(expectedCall).index()]++;
}
Assert.assertEquals(GATKProtectedVariantContextUtils.getAttributeAsDouble(outputGenotype, VariantEvaluationContext.CALL_QUALITY_KEY, 0.0), callGQ(callGenotype), 0.01);
}
final int expectedAN = (int) MathUtils.sum(sampleCallsCount);
final int observedAN = matchingOutput.getAttributeAsInt(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, -1);
Assert.assertEquals(observedAN, expectedAN);
final double[] expectedAF = Arrays.copyOfRange(IntStream.of(sampleCallsCount).mapToDouble(i -> expectedAN > 0 ? i / (double) expectedAN : 0.0).toArray(), 1, sampleCallsCount.length);
final double[] observedAF = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(matchingOutput, VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, () -> new double[matchingOutput.getAlternateAlleles().size()], 0.0);
Assert.assertNotNull(observedAF);
assertEquals(observedAF, expectedAF, 0.01);
Assert.assertEquals(matchingOutput.getAttributeAsInt(VariantEvaluationContext.TRUTH_ALLELE_NUMBER_KEY, -1), 0);
}
}
Aggregations