use of org.broadinstitute.hellbender.utils.IndexRange in project gatk by broadinstitute.
the class XHMMSegmentCallerIntegrationTest method assertSampleSegmentsCoordinates.
private void assertSampleSegmentsCoordinates(List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> sampleRecords, TargetCollection<Target> targets) {
for (final HiddenStateSegmentRecord<CopyNumberTriState, Target> record : sampleRecords) {
final IndexRange range = targets.indexRange(record.getSegment());
Assert.assertTrue(range.size() > 0);
Assert.assertEquals(record.getSegment().getContig(), targets.location(range.from).getContig());
Assert.assertEquals(record.getSegment().getStart(), targets.location(range.from).getStart());
Assert.assertEquals(record.getSegment().getEnd(), targets.location(range.to - 1).getEnd());
}
}
use of org.broadinstitute.hellbender.utils.IndexRange in project gatk-protected by broadinstitute.
the class SomaticLikelihoodsEngineUnitTest method testEvidence.
@Test
public void testEvidence() {
// one exact limit for the evidence is when the likelihoods of each read are so peaked (i.e. the most likely allele
// of each read is much likelier than all other alleles) that the sum over latent read-to-allele assignments
// (that is, over the indicator z in the notes) is dominated by the max-likelihood allele configuration
// and thus the evidence reduces to exactly integrating out the Dirichlet allele fractions
final double[] prior = new double[] { 1, 2 };
final RealMatrix log10Likelihoods = new Array2DRowRealMatrix(2, 4);
log10Likelihoods.setRow(0, new double[] { 0.1, 4.0, 3.0, -10 });
log10Likelihoods.setRow(1, new double[] { -12, -9, -5.0, 0.5 });
final double calculatedLog10Evidence = SomaticLikelihoodsEngine.log10Evidence(log10Likelihoods, prior);
final double[] maxLikelihoodCounts = new double[] { 3, 1 };
final double expectedLog10Evidence = SomaticLikelihoodsEngine.log10DirichletNormalization(prior) - SomaticLikelihoodsEngine.log10DirichletNormalization(MathArrays.ebeAdd(prior, maxLikelihoodCounts)) + new IndexRange(0, log10Likelihoods.getColumnDimension()).sum(read -> log10Likelihoods.getColumnVector(read).getMaxValue());
Assert.assertEquals(calculatedLog10Evidence, expectedLog10Evidence, 1e-5);
// when there's just one read we can calculate the likelihood exactly
final double[] prior2 = new double[] { 1, 2 };
final RealMatrix log10Likelihoods2 = new Array2DRowRealMatrix(2, 1);
log10Likelihoods2.setRow(0, new double[] { 0.1 });
log10Likelihoods2.setRow(1, new double[] { 0.5 });
final double calculatedLog10Evidence2 = SomaticLikelihoodsEngine.log10Evidence(log10Likelihoods2, prior2);
final double[] delta0 = new double[] { 1, 0 };
final double[] delta1 = new double[] { 0, 1 };
final double expectedLog10Evidence2 = MathUtils.log10SumLog10(log10Likelihoods2.getEntry(0, 0) + SomaticLikelihoodsEngine.log10DirichletNormalization(prior2) - SomaticLikelihoodsEngine.log10DirichletNormalization(MathArrays.ebeAdd(prior2, delta0)), +log10Likelihoods2.getEntry(1, 0) + SomaticLikelihoodsEngine.log10DirichletNormalization(prior2) - SomaticLikelihoodsEngine.log10DirichletNormalization(MathArrays.ebeAdd(prior2, delta1)));
Assert.assertEquals(calculatedLog10Evidence2, expectedLog10Evidence2, 0.05);
}
use of org.broadinstitute.hellbender.utils.IndexRange in project gatk-protected by broadinstitute.
the class GermlineProbabilityCalculator method calculateAnnotations.
public static Map<String, Object> calculateAnnotations(List<VariantContext> germlineResourceVariants, final List<Allele> altAlleles, final double[] tumorLog10Odds, final Optional<double[]> normalLog10Odds, final double afOfAllelesNotInGermlineResource, final double log10PriorProbOfSomaticEvent) {
final double[] normalLog10OddsOrFlat = normalLog10Odds.orElseGet(() -> MathUtils.applyToArray(tumorLog10Odds, x -> 0));
final Optional<VariantContext> germlineVC = germlineResourceVariants.isEmpty() ? Optional.empty() : // assume only one VC per site
Optional.of(germlineResourceVariants.get(0));
final double[] populationAlleleFrequencies = getGermlineAltAlleleFrequencies(altAlleles, germlineVC, afOfAllelesNotInGermlineResource);
// note the minus sign required because Mutect has the convention that this is log odds of allele *NOT* being in the normal
final double[] germlineLog10Posteriors = new IndexRange(0, altAlleles.size()).mapToDouble(n -> log10PosteriorProbabilityOfGermlineVariant(-normalLog10OddsOrFlat[n], tumorLog10Odds[n], populationAlleleFrequencies[n], log10PriorProbOfSomaticEvent));
return ImmutableMap.of(POPULATION_AF_VCF_ATTRIBUTE, populationAlleleFrequencies, GERMLINE_POSTERIORS_VCF_ATTRIBUTE, germlineLog10Posteriors);
}
use of org.broadinstitute.hellbender.utils.IndexRange in project gatk-protected by broadinstitute.
the class XHMMSegmentCallerIntegrationTest method assertSampleSegmentsCoverAllTargets.
private void assertSampleSegmentsCoverAllTargets(final List<HiddenStateSegmentRecord<CopyNumberTriState, Target>> sampleRecords, final TargetCollection<Target> targets) {
int next = 0;
for (final HiddenStateSegmentRecord<CopyNumberTriState, Target> record : sampleRecords) {
final IndexRange range = targets.indexRange(record.getSegment());
Assert.assertEquals(range.from, next);
next = range.to;
}
}
use of org.broadinstitute.hellbender.utils.IndexRange in project gatk by broadinstitute.
the class ReadLikelihoods method filterPoorlyModeledReads.
/**
* Removes those read that the best possible likelihood given any allele is just too low.
*
* <p>
* This is determined by a maximum error per read-base against the best likelihood possible.
* </p>
*
* @param maximumErrorPerBase the minimum acceptable error rate per read base, must be
* a positive number.
*
* @throws IllegalStateException is not supported for read-likelihood that do not contain alleles.
*
* @throws IllegalArgumentException if {@code maximumErrorPerBase} is negative.
*/
public void filterPoorlyModeledReads(final double maximumErrorPerBase) {
Utils.validateArg(alleles.numberOfAlleles() > 0, "unsupported for read-likelihood collections with no alleles");
Utils.validateArg(!Double.isNaN(maximumErrorPerBase) && maximumErrorPerBase > 0.0, "the maximum error per base must be a positive number");
new IndexRange(0, samples.numberOfSamples()).forEach(s -> {
final GATKRead[] sampleReads = readsBySampleIndex[s];
final List<Integer> removeIndices = new IndexRange(0, sampleReads.length).filter(r -> readIsPoorlyModelled(s, r, sampleReads[r], maximumErrorPerBase));
removeSampleReads(s, removeIndices, alleles.numberOfAlleles());
});
}
Aggregations