use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk by broadinstitute.
the class AlleleFractionLikelihoodsUnitTest method testHetLogLikelihoodMinorFractionNearOne.
// if f is very close to 1 we have an analytic result for comparison
@Test
public void testHetLogLikelihoodMinorFractionNearOne() {
//pi is just a prefactor so we don't need to test it thoroughly here
final double pi = 0.01;
for (final double f : Arrays.asList(1 - 1e-6, 1 - 1e-7, 1 - 1e-8)) {
for (final double mean : Arrays.asList(0.9, 1.0, 1.1)) {
for (final double variance : Arrays.asList(0.01, 0.005, 0.001)) {
final double alpha = mean * mean / variance;
final double beta = mean / variance;
final AlleleFractionGlobalParameters parameters = new AlleleFractionGlobalParameters(mean, variance, pi);
for (final int a : Arrays.asList(1, 10, 20)) {
//alt count
for (final int r : Arrays.asList(1, 10, 20)) {
//ref count
final AllelicCount count = new AllelicCount(DUMMY, r, a);
final double actual = AlleleFractionLikelihoods.hetLogLikelihood(parameters, f, count, AlleleFractionIndicator.ALT_MINOR);
final double expected = -r * log(beta) + Gamma.logGamma(alpha + r) - Gamma.logGamma(alpha) + log((1 - pi) / 2) - r * log(f / (1 - f));
Assert.assertEquals(actual, expected, 1e-4);
}
}
}
}
}
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.
the class AllelicPanelOfNormalsCreator method create.
/**
* Creates an {@link AllelicPanelOfNormals} given a site-frequency threshold;
* sites appearing in strictly less than this fraction of samples will not be included in the panel of normals.
* @param siteFrequencyThreshold site-frequency threshold
* @return an {@link AllelicPanelOfNormals} containing sites
* above the site-frequency threshold
*/
public AllelicPanelOfNormals create(final double siteFrequencyThreshold) {
logger.info("Creating allelic panel of normals...");
//used to filter on frequency
final Map<SimpleInterval, MutableInt> numberOfSamplesMap = new HashMap<>();
//store only the total counts (smaller memory footprint)
final Map<SimpleInterval, AllelicCount> totalCountsMap = new HashMap<>();
int pulldownFileCounter = 1;
final int totalNumberOfSamples = pulldownFiles.size();
for (final File pulldownFile : pulldownFiles) {
logger.info("Processing pulldown file " + pulldownFileCounter++ + "/" + totalNumberOfSamples + " (" + pulldownFile + ")...");
final AllelicCountCollection pulldownCounts = new AllelicCountCollection(pulldownFile);
for (final AllelicCount count : pulldownCounts.getCounts()) {
//update the sum of ref and alt counts at each site
final SimpleInterval site = count.getInterval();
final AllelicCount currentCountAtSite = totalCountsMap.getOrDefault(site, new AllelicCount(site, 0, 0));
final AllelicCount updatedCountAtSite = new AllelicCount(site, currentCountAtSite.getRefReadCount() + count.getRefReadCount(), currentCountAtSite.getAltReadCount() + count.getAltReadCount());
totalCountsMap.put(site, updatedCountAtSite);
//update the number of samples seen possessing each site
final MutableInt numberOfSamplesAtSite = numberOfSamplesMap.get(site);
if (numberOfSamplesAtSite == null) {
numberOfSamplesMap.put(site, new MutableInt(1));
} else {
numberOfSamplesAtSite.increment();
}
}
}
logger.info("Total number of unique sites present in samples: " + totalCountsMap.size());
//filter out sites that appear at a frequency strictly less than the provided threshold
final AllelicCountCollection totalCounts = new AllelicCountCollection();
numberOfSamplesMap.entrySet().stream().filter(e -> e.getValue().doubleValue() / totalNumberOfSamples >= siteFrequencyThreshold).map(e -> totalCountsMap.get(e.getKey())).forEach(totalCounts::add);
logger.info(String.format("Number of unique sites present in samples above site frequency = %4.3f: %d", siteFrequencyThreshold, totalCounts.getCounts().size()));
return new AllelicPanelOfNormals(totalCounts);
}
Aggregations