use of org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary in project gatk-protected by broadinstitute.
the class AlleleFractionModellerUnitTest method testMCMC.
private void testMCMC(final double meanBiasSimulated, final double biasVarianceSimulated, final double meanBiasExpected, final double biasVarianceExpected, final AllelicPanelOfNormals allelicPoN) {
LoggingUtils.setLoggingLevel(Log.LogLevel.INFO);
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
final int numSamples = 150;
final int numBurnIn = 50;
final double averageHetsPerSegment = 50;
final int numSegments = 100;
final int averageDepth = 50;
final double outlierProbability = 0.02;
// note: the following tolerances could actually be made much smaller if we used more segments and/or
// more hets -- most of the error is the sampling error of a finite simulated data set, not numerical error of MCMC
final double minorFractionTolerance = 0.02;
final double meanBiasTolerance = 0.02;
final double biasVarianceTolerance = 0.01;
final double outlierProbabilityTolerance = 0.02;
final AlleleFractionSimulatedData simulatedData = new AlleleFractionSimulatedData(averageHetsPerSegment, numSegments, averageDepth, meanBiasSimulated, biasVarianceSimulated, outlierProbability);
final AlleleFractionModeller modeller = new AlleleFractionModeller(simulatedData.getSegmentedGenome(), allelicPoN);
modeller.fitMCMC(numSamples, numBurnIn);
final List<Double> meanBiasSamples = modeller.getmeanBiasSamples();
Assert.assertEquals(meanBiasSamples.size(), numSamples - numBurnIn);
final List<Double> biasVarianceSamples = modeller.getBiasVarianceSamples();
Assert.assertEquals(biasVarianceSamples.size(), numSamples - numBurnIn);
final List<Double> outlierProbabilitySamples = modeller.getOutlierProbabilitySamples();
Assert.assertEquals(outlierProbabilitySamples.size(), numSamples - numBurnIn);
final List<AlleleFractionState.MinorFractions> minorFractionsSamples = modeller.getMinorFractionsSamples();
Assert.assertEquals(minorFractionsSamples.size(), numSamples - numBurnIn);
for (final AlleleFractionState.MinorFractions sample : minorFractionsSamples) {
Assert.assertEquals(sample.size(), numSegments);
}
final List<List<Double>> minorFractionsSamplesBySegment = modeller.getMinorFractionSamplesBySegment();
final double mcmcMeanBias = meanBiasSamples.stream().mapToDouble(x -> x).average().getAsDouble();
final double mcmcBiasVariance = biasVarianceSamples.stream().mapToDouble(x -> x).average().getAsDouble();
final double mcmcOutlierProbability = outlierProbabilitySamples.stream().mapToDouble(x -> x).average().getAsDouble();
final List<Double> mcmcMinorFractions = minorFractionsSamplesBySegment.stream().map(list -> list.stream().mapToDouble(x -> x).average().getAsDouble()).collect(Collectors.toList());
double totalSegmentError = 0.0;
for (int segment = 0; segment < numSegments; segment++) {
totalSegmentError += Math.abs(mcmcMinorFractions.get(segment) - simulatedData.getTrueState().segmentMinorFraction(segment));
}
Assert.assertEquals(mcmcMeanBias, meanBiasExpected, meanBiasTolerance);
Assert.assertEquals(mcmcBiasVariance, biasVarianceExpected, biasVarianceTolerance);
Assert.assertEquals(mcmcOutlierProbability, outlierProbability, outlierProbabilityTolerance);
Assert.assertEquals(totalSegmentError / numSegments, 0.0, minorFractionTolerance);
//test posterior summaries
final Map<AlleleFractionParameter, PosteriorSummary> globalParameterPosteriorSummaries = modeller.getGlobalParameterPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
final PosteriorSummary meanBiasPosteriorSummary = globalParameterPosteriorSummaries.get(AlleleFractionParameter.MEAN_BIAS);
final double meanBiasPosteriorCenter = meanBiasPosteriorSummary.getCenter();
Assert.assertEquals(meanBiasPosteriorCenter, meanBiasExpected, meanBiasTolerance);
final PosteriorSummary biasVariancePosteriorSummary = globalParameterPosteriorSummaries.get(AlleleFractionParameter.BIAS_VARIANCE);
final double biasVariancePosteriorCenter = biasVariancePosteriorSummary.getCenter();
Assert.assertEquals(biasVariancePosteriorCenter, biasVarianceExpected, biasVarianceTolerance);
final PosteriorSummary outlierProbabilityPosteriorSummary = globalParameterPosteriorSummaries.get(AlleleFractionParameter.OUTLIER_PROBABILITY);
final double outlierProbabilityPosteriorCenter = outlierProbabilityPosteriorSummary.getCenter();
Assert.assertEquals(outlierProbabilityPosteriorCenter, outlierProbability, outlierProbabilityTolerance);
final List<PosteriorSummary> minorAlleleFractionPosteriorSummaries = modeller.getMinorAlleleFractionsPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
final List<Double> minorFractionsPosteriorCenters = minorAlleleFractionPosteriorSummaries.stream().map(PosteriorSummary::getCenter).collect(Collectors.toList());
double totalPosteriorCentersSegmentError = 0.0;
for (int segment = 0; segment < numSegments; segment++) {
totalPosteriorCentersSegmentError += Math.abs(minorFractionsPosteriorCenters.get(segment) - simulatedData.getTrueState().segmentMinorFraction(segment));
}
Assert.assertEquals(totalPosteriorCentersSegmentError / numSegments, 0.0, minorFractionTolerance);
}
use of org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary in project gatk-protected by broadinstitute.
the class AllelicSplitCallerModelStateUnitTest method testBasicInit.
@Test
public void testBasicInit() {
final ACNVModeledSegment acnvModeledSegment = new ACNVModeledSegment(new SimpleInterval("1", 1000, 1500), new PosteriorSummary(-4000, -4001, -4002), new PosteriorSummary(-4000, -4001, -4002));
final List<ACNVModeledSegment> tempList = new ArrayList<>();
tempList.add(acnvModeledSegment);
final AllelicBalanceCallerModelState state = AllelicBalanceCallerModelState.createInitialCNLOHCallerModelState(0.2, tempList, HomoSapiensConstants.DEFAULT_PLOIDY, CNLOHCaller.NUM_RHOS);
Assert.assertNotNull(state);
Assert.assertNotNull(state.getEffectivePis());
Assert.assertTrue(state.getEffectivePis().length > 0);
Assert.assertTrue(state.getmVals().length > 0);
Assert.assertTrue(state.getnVals().length > 0);
Assert.assertEquals(MathUtils.sum(state.getEffectivePis()), 1.0, 1e-10);
}
use of org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary in project gatk-protected by broadinstitute.
the class ACNVModeledSegmentConversionUtilsUnitTest method testSimpleConversionCannotYieldSegmentMeanOfZero.
@Test
public void testSimpleConversionCannotYieldSegmentMeanOfZero() {
final ACNVModeledSegment acnvModeledSegment = new ACNVModeledSegment(new SimpleInterval("1", 1000, 1500), new PosteriorSummary(-4000, -4001, -4002), new PosteriorSummary(-4000, -4001, -4002));
final List<Target> targets = new ArrayList<>();
targets.add(new Target("test", new SimpleInterval("1", 1300, 1302)));
final double[] targetDummyValues = new double[targets.size()];
final TargetCollection<ReadCountRecord.SingleSampleRecord> targetCollection = new HashedListTargetCollection<>(Collections.singletonList(new ReadCountRecord.SingleSampleRecord(targets.get(0), 0.0)));
final ModeledSegment guess = ACNVModeledSegmentConversionUtils.convertACNVModeledSegmentToModeledSegment(acnvModeledSegment, targetCollection);
Assert.assertTrue(guess.getSegmentMeanInCRSpace() > 0);
Assert.assertEquals(guess.getSegmentMean(), ParamUtils.log2(PCATangentNormalizationUtils.EPSILON), 1e-10);
}
use of org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary in project gatk-protected by broadinstitute.
the class ACNVModeller method fitModel.
/**
* Performs Markov-Chain Monte Carlo model fitting using the
* number of total samples and number of burn-in samples pecified at construction.
*/
public void fitModel() {
//perform MCMC to generate posterior samples
logger.info("Fitting copy-ratio model...");
copyRatioModeller = new CopyRatioModeller(segmentedGenome);
copyRatioModeller.fitMCMC(numSamplesCopyRatio, numBurnInCopyRatio);
logger.info("Fitting allele-fraction model...");
alleleFractionModeller = new AlleleFractionModeller(segmentedGenome, allelicPoN);
alleleFractionModeller.fitMCMC(numSamplesAlleleFraction, numBurnInAlleleFraction);
//update list of ACNVModeledSegment with new PosteriorSummaries
segments.clear();
final List<SimpleInterval> unmodeledSegments = segmentedGenome.getSegments();
final List<PosteriorSummary> segmentMeansPosteriorSummaries = copyRatioModeller.getSegmentMeansPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
final List<PosteriorSummary> minorAlleleFractionsPosteriorSummaries = alleleFractionModeller.getMinorAlleleFractionsPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
for (int segment = 0; segment < unmodeledSegments.size(); segment++) {
segments.add(new ACNVModeledSegment(unmodeledSegments.get(segment), segmentMeansPosteriorSummaries.get(segment), minorAlleleFractionsPosteriorSummaries.get(segment)));
}
isModelFit = true;
}
use of org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary in project gatk by broadinstitute.
the class JointAFCRSegmenterUnitTest method testSegmentation.
@Test
public void testSegmentation() {
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
// probability that a datum is a het i.e. #hets / (#hets + #targets)
final double hetProportion = 0.25;
final List<Double> trueWeights = Arrays.asList(0.2, 0.5, 0.3);
final double[] trueMinorAlleleFractions = new double[] { 0.12, 0.32, 0.5 };
final double[] trueLog2CopyRatios = new double[] { -2.0, 0.0, 1.7 };
final List<AFCRHiddenState> trueJointStates = IntStream.range(0, trueLog2CopyRatios.length).mapToObj(n -> new AFCRHiddenState(trueMinorAlleleFractions[n], trueLog2CopyRatios[n])).collect(Collectors.toList());
final double trueMemoryLength = 1e5;
final double trueCauchyWidth = 0.2;
final int initialNumCRStates = 20;
final int initialNumAFStates = 20;
final AlleleFractionGlobalParameters trueAFParams = new AlleleFractionGlobalParameters(1.0, 0.01, 0.01);
final JointAFCRHMM trueJointModel = new JointAFCRHMM(trueJointStates, trueWeights, trueMemoryLength, trueAFParams, AllelicPanelOfNormals.EMPTY_PON, trueCauchyWidth);
// generate joint truth
final int chainLength = 10000;
final List<SimpleInterval> positions = CopyRatioSegmenterUnitTest.randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
final List<Integer> trueHiddenStates = trueJointModel.generateHiddenStateChain(positions);
final List<AFCRHiddenState> trueAFCRSequence = trueHiddenStates.stream().map(trueJointModel::getHiddenStateValue).collect(Collectors.toList());
final double[] trueCopyRatioSequence = trueAFCRSequence.stream().mapToDouble(AFCRHiddenState::getLog2CopyRatio).toArray();
final double[] trueAlleleFractionSequence = trueAFCRSequence.stream().mapToDouble(AFCRHiddenState::getMinorAlleleFraction).toArray();
// generate separate af and cr data
final GammaDistribution biasGenerator = AlleleFractionSegmenterUnitTest.getGammaDistribution(trueAFParams, rng);
final double outlierProbability = trueAFParams.getOutlierProbability();
final AllelicCountCollection afData = new AllelicCountCollection();
final List<Double> crData = new ArrayList<>();
final List<Target> crTargets = new ArrayList<>();
for (int n = 0; n < positions.size(); n++) {
final SimpleInterval position = positions.get(n);
final AFCRHiddenState jointState = trueAFCRSequence.get(n);
final double minorFraction = jointState.getMinorAlleleFraction();
final double log2CopyRatio = jointState.getLog2CopyRatio();
if (rng.nextDouble() < hetProportion) {
// het datum
afData.add(AlleleFractionSegmenterUnitTest.generateAllelicCount(minorFraction, position, rng, biasGenerator, outlierProbability));
} else {
//target datum
crTargets.add(new Target(position));
crData.add(CopyRatioSegmenterUnitTest.generateData(trueCauchyWidth, log2CopyRatio, rng));
}
}
final ReadCountCollection rcc = new ReadCountCollection(crTargets, Arrays.asList("SAMPLE"), new Array2DRowRealMatrix(crData.stream().mapToDouble(x -> x).toArray()));
final JointAFCRSegmenter segmenter = JointAFCRSegmenter.createJointSegmenter(initialNumCRStates, rcc, initialNumAFStates, afData, AllelicPanelOfNormals.EMPTY_PON);
final TargetCollection<SimpleInterval> tc = new HashedListTargetCollection<>(positions);
final List<Pair<SimpleInterval, AFCRHiddenState>> segmentation = segmenter.findSegments();
final List<ACNVModeledSegment> jointSegments = segmentation.stream().map(pair -> {
final SimpleInterval position = pair.getLeft();
final AFCRHiddenState jointState = pair.getRight();
final PosteriorSummary crSummary = PerformJointSegmentation.errorlessPosterior(jointState.getLog2CopyRatio());
final PosteriorSummary afSummary = PerformJointSegmentation.errorlessPosterior(jointState.getMinorAlleleFraction());
return new ACNVModeledSegment(position, crSummary, afSummary);
}).collect(Collectors.toList());
final double[] segmentCopyRatios = jointSegments.stream().flatMap(s -> Collections.nCopies(tc.targetCount(s.getInterval()), s.getSegmentMeanPosteriorSummary().getCenter()).stream()).mapToDouble(x -> x).toArray();
final double[] segmentMinorFractions = jointSegments.stream().flatMap(s -> Collections.nCopies(tc.targetCount(s.getInterval()), s.getMinorAlleleFractionPosteriorSummary().getCenter()).stream()).mapToDouble(x -> x).toArray();
final double averageMinorFractionError = Arrays.stream(MathArrays.ebeSubtract(trueAlleleFractionSequence, segmentMinorFractions)).map(Math::abs).average().getAsDouble();
final double averageCopyRatioError = Arrays.stream(MathArrays.ebeSubtract(trueCopyRatioSequence, segmentCopyRatios)).map(Math::abs).average().getAsDouble();
Assert.assertEquals(averageMinorFractionError, 0, 0.04);
Assert.assertEquals(averageCopyRatioError, 0, 0.04);
}
Aggregations