use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.
the class PlotACNVResults method validateContigs.
private void validateContigs(final Map<String, Integer> contigLengthMap) {
final Set<String> contigNames = contigLengthMap.keySet();
//validate contig names and lengths in SNP counts file
final AllelicCountCollection snpCounts = new AllelicCountCollection(snpCountsFile);
final Set<String> snpCountsContigNames = snpCounts.getCounts().stream().map(AllelicCount::getContig).collect(Collectors.toSet());
if (!contigNames.containsAll(snpCountsContigNames)) {
logger.warn("Contigs present in the SNP counts file are missing from the sequence dictionary and will not be plotted.");
}
final Map<String, Integer> snpCountsContigMaxPositionMap = snpCounts.getCounts().stream().filter(c -> contigNames.contains(c.getContig())).collect(Collectors.toMap(AllelicCount::getContig, AllelicCount::getEnd, Integer::max));
snpCountsContigMaxPositionMap.keySet().forEach(c -> Utils.validateArg(snpCountsContigMaxPositionMap.get(c) <= contigLengthMap.get(c), "Position present in the SNP-counts file exceeds contig length in the sequence dictionary."));
//validate contig names and lengths in tangent file
final ReadCountCollection tangent;
try {
tangent = ReadCountCollectionUtils.parse(tangentFile);
} catch (final IOException e) {
throw new UserException.CouldNotReadInputFile(tangentFile, e);
}
final Set<String> tangentContigNames = tangent.targets().stream().map(Target::getContig).collect(Collectors.toSet());
if (!contigNames.containsAll(tangentContigNames)) {
logger.warn("Contigs present in the tangent-normalized coverage file are missing from the sequence dictionary and will not be plotted.");
}
final Map<String, Integer> tangentContigMaxPositionMap = tangent.targets().stream().filter(t -> contigNames.contains(t.getContig())).collect(Collectors.toMap(Target::getContig, Target::getEnd, Integer::max));
tangentContigMaxPositionMap.keySet().forEach(c -> Utils.validateArg(tangentContigMaxPositionMap.get(c) <= contigLengthMap.get(c), "Position present in the tangent-normalized coverage file exceeds contig length in the sequence dictionary."));
//validate contig names and lengths in segments file
final List<ACNVModeledSegment> segments = SegmentUtils.readACNVModeledSegmentFile(segmentsFile);
final Set<String> segmentsContigNames = segments.stream().map(ACNVModeledSegment::getContig).collect(Collectors.toSet());
if (!contigNames.containsAll(segmentsContigNames)) {
logger.warn("Contigs present in the segments file are missing from the sequence dictionary and will not be plotted.");
}
final Map<String, Integer> segmentsContigMaxPositionMap = segments.stream().filter(s -> contigNames.contains(s.getContig())).collect(Collectors.toMap(ACNVModeledSegment::getContig, ACNVModeledSegment::getEnd, Integer::max));
segmentsContigMaxPositionMap.keySet().forEach(c -> Utils.validateArg(segmentsContigMaxPositionMap.get(c) <= contigLengthMap.get(c), "Position present in the segments file exceeds contig length in the sequence dictionary."));
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.
the class AllelicPanelOfNormals method initializeSiteToHyperparameterPairMap.
//transforms ref/alt counts at each site to hyperparameters, see docs/CNVs/CNV-methods.pdf for details
private void initializeSiteToHyperparameterPairMap(final AllelicCountCollection counts) {
logger.info("Initializing allelic panel of normals...");
for (final AllelicCount count : counts.getCounts()) {
final SimpleInterval site = count.getInterval();
final HyperparameterValues hyperparameterValues = new HyperparameterValues(globalHyperparameterValues.alpha, globalHyperparameterValues.beta, count.getAltReadCount(), count.getRefReadCount());
if (siteToHyperparameterValuesMap.containsKey(site)) {
throw new UserException.BadInput("Input AllelicCountCollection for allelic panel of normals contains duplicate sites.");
} else {
siteToHyperparameterValuesMap.put(site, hyperparameterValues);
}
}
logger.info("Allelic panel of normals initialized.");
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.
the class AlleleFractionLikelihoodsUnitTest method testRefMinor.
//ALT_MINOR <--> REF_MINOR is equivalent to f <--> 1 - f
@Test
public void testRefMinor() {
//pi is just a prefactor so we don't need to test it thoroughly here
final double pi = 0.01;
for (final double f : Arrays.asList(0.1, 0.2, 0.3)) {
for (final double mean : Arrays.asList(0.9, 1.0, 1.1)) {
for (final double variance : Arrays.asList(0.02, 0.01)) {
final AlleleFractionGlobalParameters parameters = new AlleleFractionGlobalParameters(mean, variance, pi);
for (final int a : Arrays.asList(1, 10, 20)) {
//alt count
for (final int r : Arrays.asList(1, 10, 20)) {
//ref count
final AllelicCount count = new AllelicCount(DUMMY, r, a);
final double altMinorLk = AlleleFractionLikelihoods.hetLogLikelihood(parameters, f, count, AlleleFractionIndicator.ALT_MINOR);
final double refMinorLk = AlleleFractionLikelihoods.hetLogLikelihood(parameters, 1 - f, count, AlleleFractionIndicator.REF_MINOR);
Assert.assertEquals(altMinorLk, refMinorLk, 1e-10);
}
}
}
}
}
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.
the class AlleleFractionLikelihoodsUnitTest method testHetLogLikelihoodMinorFractionNearOne.
// if f is very close to 1 we have an analytic result for comparison
@Test
public void testHetLogLikelihoodMinorFractionNearOne() {
//pi is just a prefactor so we don't need to test it thoroughly here
final double pi = 0.01;
for (final double f : Arrays.asList(1 - 1e-6, 1 - 1e-7, 1 - 1e-8)) {
for (final double mean : Arrays.asList(0.9, 1.0, 1.1)) {
for (final double variance : Arrays.asList(0.01, 0.005, 0.001)) {
final double alpha = mean * mean / variance;
final double beta = mean / variance;
final AlleleFractionGlobalParameters parameters = new AlleleFractionGlobalParameters(mean, variance, pi);
for (final int a : Arrays.asList(1, 10, 20)) {
//alt count
for (final int r : Arrays.asList(1, 10, 20)) {
//ref count
final AllelicCount count = new AllelicCount(DUMMY, r, a);
final double actual = AlleleFractionLikelihoods.hetLogLikelihood(parameters, f, count, AlleleFractionIndicator.ALT_MINOR);
final double expected = -r * log(beta) + Gamma.logGamma(alpha + r) - Gamma.logGamma(alpha) + log((1 - pi) / 2) - r * log(f / (1 - f));
Assert.assertEquals(actual, expected, 1e-4);
}
}
}
}
}
}
use of org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount in project gatk-protected by broadinstitute.
the class AlleleFractionLikelihoodsUnitTest method testHetLogLikelihoodOutlierProbabilityDependence.
@Test
public void testHetLogLikelihoodOutlierProbabilityDependence() {
final AllelicCount count = new AllelicCount(DUMMY, 11, 37);
final double f = 0.25;
final double mean = 1.0;
final double variance = 0.01;
final double pi1 = 0.1;
final double pi2 = 0.2;
final double pi3 = 0.3;
final AlleleFractionGlobalParameters parameters1 = new AlleleFractionGlobalParameters(mean, variance, pi1);
final AlleleFractionGlobalParameters parameters2 = new AlleleFractionGlobalParameters(mean, variance, pi2);
final AlleleFractionGlobalParameters parameters3 = new AlleleFractionGlobalParameters(mean, variance, pi3);
final double lk1 = AlleleFractionLikelihoods.hetLogLikelihood(parameters1, f, count, AlleleFractionIndicator.ALT_MINOR);
final double lk2 = AlleleFractionLikelihoods.hetLogLikelihood(parameters2, f, count, AlleleFractionIndicator.ALT_MINOR);
final double lk3 = AlleleFractionLikelihoods.hetLogLikelihood(parameters3, f, count, AlleleFractionIndicator.ALT_MINOR);
Assert.assertEquals(lk2 - lk1, log(1 - pi2) - log(1 - pi1), EPSILON);
Assert.assertEquals(lk3 - lk2, log(1 - pi3) - log(1 - pi2), EPSILON);
}
Aggregations