use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.
the class AFCalculatorProvider method getInstance.
/**
* Returns a AF calculator capable to handle a particular variant-context.
* @param variantContext the target context build.
* @param defaultPloidy the assumed ploidy in case that there is no a GT call present to determine it.
* @return never {@code null}
*/
public AFCalculator getInstance(final VariantContext variantContext, final int defaultPloidy, final int maximumAltAlleles) {
Utils.nonNull(variantContext, "variant context cannot be null");
final int sampleCount = variantContext.getNSamples();
if (sampleCount == 0) {
return getInstance(defaultPloidy, maximumAltAlleles);
}
final GenotypesContext genotypes = variantContext.getGenotypes();
final Genotype firstGenotype = genotypes.get(0);
int ploidy = firstGenotype.getPloidy();
if (ploidy <= 0) {
ploidy = defaultPloidy;
}
for (int i = 1; i < sampleCount; i++) {
final Genotype genotype = genotypes.get(i);
final int declaredPloidy = genotype.getPloidy();
final int actualPloidy = declaredPloidy <= 0 ? defaultPloidy : declaredPloidy;
if (actualPloidy != ploidy) {
ploidy = AFCalculatorImplementation.UNBOUND_PLOIDY;
break;
}
}
return getInstance(ploidy, Math.min(variantContext.getNAlleles() - 1, maximumAltAlleles));
}
use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.
the class AlleleFrequencyCalculator method getLog10PNonRef.
//TODO: this should be a class of static methods once the old AFCalculator is gone.
/**
* Compute the probability of the alleles segregating given the genotype likelihoods of the samples in vc
*
* @param vc the VariantContext holding the alleles and sample information. The VariantContext
* must have at least 1 alternative allele
* @param refSnpIndelPseudocounts a total hack. A length-3 vector containing Dirichlet prior pseudocounts to
* be given to ref, alt SNP, and alt indel alleles. Hack won't be necessary when we destroy the old AF calculators
* @return result (for programming convenience)
*/
@Override
public AFCalculationResult getLog10PNonRef(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles, final double[] refSnpIndelPseudocounts) {
Utils.nonNull(vc, "VariantContext cannot be null");
final int numAlleles = vc.getNAlleles();
final List<Allele> alleles = vc.getAlleles();
Utils.validateArg(numAlleles > 1, () -> "VariantContext has only a single reference allele, but getLog10PNonRef requires at least one at all " + vc);
final double[] priorPseudocounts = alleles.stream().mapToDouble(a -> a.isReference() ? refPseudocount : (a.length() > 1 ? snpPseudocount : indelPseudocount)).toArray();
double[] alleleCounts = new double[numAlleles];
// log10(1/numAlleles)
final double flatLog10AlleleFrequency = -MathUtils.log10(numAlleles);
double[] log10AlleleFrequencies = new IndexRange(0, numAlleles).mapToDouble(n -> flatLog10AlleleFrequency);
double alleleCountsMaximumDifference = Double.POSITIVE_INFINITY;
while (alleleCountsMaximumDifference > THRESHOLD_FOR_ALLELE_COUNT_CONVERGENCE) {
final double[] newAlleleCounts = effectiveAlleleCounts(vc, log10AlleleFrequencies);
alleleCountsMaximumDifference = Arrays.stream(MathArrays.ebeSubtract(alleleCounts, newAlleleCounts)).map(Math::abs).max().getAsDouble();
alleleCounts = newAlleleCounts;
final double[] posteriorPseudocounts = MathArrays.ebeAdd(priorPseudocounts, alleleCounts);
// first iteration uses flat prior in order to avoid local minimum where the prior + no pseudocounts gives such a low
// effective allele frequency that it overwhelms the genotype likelihood of a real variant
// basically, we want a chance to get non-zero pseudocounts before using a prior that's biased against a variant
log10AlleleFrequencies = new Dirichlet(posteriorPseudocounts).log10MeanWeights();
}
double[] log10POfZeroCountsByAllele = new double[numAlleles];
double log10PNoVariant = 0;
for (final Genotype g : vc.getGenotypes()) {
if (!g.hasLikelihoods()) {
continue;
}
final int ploidy = g.getPloidy() == 0 ? defaultPloidy : g.getPloidy();
final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(ploidy, numAlleles);
final double[] log10GenotypePosteriors = log10NormalizedGenotypePosteriors(g, glCalc, log10AlleleFrequencies);
//the total probability
log10PNoVariant += log10GenotypePosteriors[HOM_REF_GENOTYPE_INDEX];
// per allele non-log space probabilities of zero counts for this sample
// for each allele calculate the total probability of genotypes containing at least one copy of the allele
final double[] log10ProbabilityOfNonZeroAltAlleles = new double[numAlleles];
Arrays.fill(log10ProbabilityOfNonZeroAltAlleles, Double.NEGATIVE_INFINITY);
for (int genotype = 0; genotype < glCalc.genotypeCount(); genotype++) {
final double log10GenotypePosterior = log10GenotypePosteriors[genotype];
glCalc.genotypeAlleleCountsAt(genotype).forEachAlleleIndexAndCount((alleleIndex, count) -> log10ProbabilityOfNonZeroAltAlleles[alleleIndex] = MathUtils.log10SumLog10(log10ProbabilityOfNonZeroAltAlleles[alleleIndex], log10GenotypePosterior));
}
for (int allele = 0; allele < numAlleles; allele++) {
// if prob of non hom ref == 1 up to numerical precision, short-circuit to avoid NaN
if (log10ProbabilityOfNonZeroAltAlleles[allele] >= 0) {
log10POfZeroCountsByAllele[allele] = Double.NEGATIVE_INFINITY;
} else {
log10POfZeroCountsByAllele[allele] += MathUtils.log10OneMinusPow10(log10ProbabilityOfNonZeroAltAlleles[allele]);
}
}
}
// unfortunately AFCalculationResult expects integers for the MLE. We really should emit the EM no-integer values
// which are valuable (eg in CombineGVCFs) as the sufficient statistics of the Dirichlet posterior on allele frequencies
final int[] integerAlleleCounts = Arrays.stream(alleleCounts).mapToInt(x -> (int) Math.round(x)).toArray();
final int[] integerAltAlleleCounts = Arrays.copyOfRange(integerAlleleCounts, 1, numAlleles);
//skip the ref allele (index 0)
final Map<Allele, Double> log10PRefByAllele = IntStream.range(1, numAlleles).boxed().collect(Collectors.toMap(alleles::get, a -> log10POfZeroCountsByAllele[a]));
// we compute posteriors here and don't have the same prior that AFCalculationResult expects. Therefore, we
// give it our posterior as its "likelihood" along with a flat dummy prior
//TODO: HACK must be negative for AFCalcResult
final double[] dummyFlatPrior = { -1e-10, -1e-10 };
final double[] log10PosteriorOfNoVariantYesVariant = { log10PNoVariant, MathUtils.log10OneMinusPow10(log10PNoVariant) };
return new AFCalculationResult(integerAltAlleleCounts, alleles, log10PosteriorOfNoVariantYesVariant, dummyFlatPrior, log10PRefByAllele);
}
use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.
the class AlleleFrequencyCalculator method log10NormalizedGenotypePosteriors.
private static double[] log10NormalizedGenotypePosteriors(final Genotype g, final GenotypeLikelihoodCalculator glCalc, final double[] log10AlleleFrequencies) {
final double[] log10Likelihoods = g.getLikelihoods().getAsVector();
final double[] log10Posteriors = new IndexRange(0, glCalc.genotypeCount()).mapToDouble(genotypeIndex -> {
final GenotypeAlleleCounts gac = glCalc.genotypeAlleleCountsAt(genotypeIndex);
return gac.log10CombinationCount() + log10Likelihoods[genotypeIndex] + gac.sumOverAlleleIndicesAndCounts((index, count) -> count * log10AlleleFrequencies[index]);
});
return MathUtils.normalizeLog10(log10Posteriors);
}
use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.
the class GenotypeConcordanceTest method testGenotypeConcordanceDetermineState.
@Test(dataProvider = "genotypeConcordanceDetermineStateDataProvider")
public void testGenotypeConcordanceDetermineState(final Allele truthAllele1, final Allele truthAllele2, final TruthState expectedTruthState, final Allele callAllele1, final Allele callAllele2, final CallState expectedCallState) throws Exception {
final List<Allele> truthAlleles = makeUniqueListOfAlleles(truthAllele1, truthAllele2);
final Genotype truthGt = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(truthAllele1, truthAllele2));
final VariantContext truthVariantContext = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, truthAlleles).genotypes(truthGt).make();
final List<Allele> callAlleles = makeUniqueListOfAlleles(callAllele1, callAllele2);
final Genotype callGt = GenotypeBuilder.create(CALL_SAMPLE_NAME, Arrays.asList(callAllele1, callAllele2));
final VariantContext callVariantContext = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, callAlleles).genotypes(callGt).make();
testGenotypeConcordanceDetermineState(truthVariantContext, expectedTruthState, callVariantContext, expectedCallState, 0, 0);
}
use of htsjdk.variant.variantcontext.Genotype in project gatk by broadinstitute.
the class GenotypeConcordanceTest method testGenotypeConcordanceDetermineStateFilter.
@Test
public void testGenotypeConcordanceDetermineStateFilter() throws Exception {
final Set<String> filters = new HashSet<>(Arrays.asList("BAD!"));
// Filtering on the variant context
final List<Allele> alleles1 = makeUniqueListOfAlleles(Aref, C);
final Genotype gt1 = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C));
final VariantContext vcFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles1).genotypes(gt1).filters(filters).make();
final List<Allele> alleles2 = makeUniqueListOfAlleles(Aref, T);
final Genotype gt2 = GenotypeBuilder.create(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, T));
final VariantContext vcNotFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles2).genotypes(gt2).make();
testGenotypeConcordanceDetermineState(vcFiltered, TruthState.VC_FILTERED, vcNotFiltered, CallState.HET_REF_VAR1, 0, 0);
testGenotypeConcordanceDetermineState(vcNotFiltered, TruthState.HET_REF_VAR1, vcFiltered, CallState.VC_FILTERED, 0, 0);
testGenotypeConcordanceDetermineState(vcFiltered, TruthState.VC_FILTERED, vcFiltered, CallState.VC_FILTERED, 0, 0);
// Filtering on the genotype
final List<String> gtFilters = new ArrayList<>(Arrays.asList("WICKED"));
final List<Allele> alleles3 = makeUniqueListOfAlleles(Aref, C);
final Genotype gt3 = new GenotypeBuilder(TRUTH_SAMPLE_NAME, Arrays.asList(Aref, C)).filters(gtFilters).make();
final VariantContext vcGtFiltered = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles3).genotypes(gt3).make();
testGenotypeConcordanceDetermineState(vcGtFiltered, TruthState.GT_FILTERED, vcNotFiltered, CallState.HET_REF_VAR1, 0, 0);
testGenotypeConcordanceDetermineState(vcNotFiltered, TruthState.HET_REF_VAR1, vcGtFiltered, CallState.GT_FILTERED, 0, 0);
testGenotypeConcordanceDetermineState(vcGtFiltered, TruthState.GT_FILTERED, vcGtFiltered, CallState.GT_FILTERED, 0, 0);
}
Aggregations