Search in sources :

Example 1 with PosteriorSummary

use of org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary in project gatk-protected by broadinstitute.

the class ACNVModeller method fitModel.

/**
     * Performs Markov-Chain Monte Carlo model fitting using the
     * number of total samples and number of burn-in samples pecified at construction.
     */
public void fitModel() {
    //perform MCMC to generate posterior samples
    logger.info("Fitting copy-ratio model...");
    copyRatioModeller = new CopyRatioModeller(segmentedGenome);
    copyRatioModeller.fitMCMC(numSamplesCopyRatio, numBurnInCopyRatio);
    logger.info("Fitting allele-fraction model...");
    alleleFractionModeller = new AlleleFractionModeller(segmentedGenome, allelicPoN);
    alleleFractionModeller.fitMCMC(numSamplesAlleleFraction, numBurnInAlleleFraction);
    //update list of ACNVModeledSegment with new PosteriorSummaries
    segments.clear();
    final List<SimpleInterval> unmodeledSegments = segmentedGenome.getSegments();
    final List<PosteriorSummary> segmentMeansPosteriorSummaries = copyRatioModeller.getSegmentMeansPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
    final List<PosteriorSummary> minorAlleleFractionsPosteriorSummaries = alleleFractionModeller.getMinorAlleleFractionsPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
    for (int segment = 0; segment < unmodeledSegments.size(); segment++) {
        segments.add(new ACNVModeledSegment(unmodeledSegments.get(segment), segmentMeansPosteriorSummaries.get(segment), minorAlleleFractionsPosteriorSummaries.get(segment)));
    }
    isModelFit = true;
}
Also used : PosteriorSummary(org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary) CopyRatioModeller(org.broadinstitute.hellbender.tools.exome.copyratio.CopyRatioModeller) AlleleFractionModeller(org.broadinstitute.hellbender.tools.exome.allelefraction.AlleleFractionModeller) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 2 with PosteriorSummary

use of org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary in project gatk by broadinstitute.

the class PerformJointSegmentation method errorlessPosterior.

@VisibleForTesting
protected static PosteriorSummary errorlessPosterior(final double value) {
    final PosteriorSummary result = new PosteriorSummary(value, value, value);
    result.setDeciles(new DecileCollection(Arrays.asList(value)));
    return result;
}
Also used : PosteriorSummary(org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary) DecileCollection(org.broadinstitute.hellbender.utils.mcmc.DecileCollection) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 3 with PosteriorSummary

use of org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary in project gatk by broadinstitute.

the class AlleleFractionModellerUnitTest method testMCMC.

private void testMCMC(final double meanBiasSimulated, final double biasVarianceSimulated, final double meanBiasExpected, final double biasVarianceExpected, final AllelicPanelOfNormals allelicPoN) {
    LoggingUtils.setLoggingLevel(Log.LogLevel.INFO);
    final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
    final int numSamples = 150;
    final int numBurnIn = 50;
    final double averageHetsPerSegment = 50;
    final int numSegments = 100;
    final int averageDepth = 50;
    final double outlierProbability = 0.02;
    // note: the following tolerances could actually be made much smaller if we used more segments and/or
    // more hets -- most of the error is the sampling error of a finite simulated data set, not numerical error of MCMC
    final double minorFractionTolerance = 0.02;
    final double meanBiasTolerance = 0.02;
    final double biasVarianceTolerance = 0.01;
    final double outlierProbabilityTolerance = 0.02;
    final AlleleFractionSimulatedData simulatedData = new AlleleFractionSimulatedData(averageHetsPerSegment, numSegments, averageDepth, meanBiasSimulated, biasVarianceSimulated, outlierProbability);
    final AlleleFractionModeller modeller = new AlleleFractionModeller(simulatedData.getSegmentedGenome(), allelicPoN);
    modeller.fitMCMC(numSamples, numBurnIn);
    final List<Double> meanBiasSamples = modeller.getmeanBiasSamples();
    Assert.assertEquals(meanBiasSamples.size(), numSamples - numBurnIn);
    final List<Double> biasVarianceSamples = modeller.getBiasVarianceSamples();
    Assert.assertEquals(biasVarianceSamples.size(), numSamples - numBurnIn);
    final List<Double> outlierProbabilitySamples = modeller.getOutlierProbabilitySamples();
    Assert.assertEquals(outlierProbabilitySamples.size(), numSamples - numBurnIn);
    final List<AlleleFractionState.MinorFractions> minorFractionsSamples = modeller.getMinorFractionsSamples();
    Assert.assertEquals(minorFractionsSamples.size(), numSamples - numBurnIn);
    for (final AlleleFractionState.MinorFractions sample : minorFractionsSamples) {
        Assert.assertEquals(sample.size(), numSegments);
    }
    final List<List<Double>> minorFractionsSamplesBySegment = modeller.getMinorFractionSamplesBySegment();
    final double mcmcMeanBias = meanBiasSamples.stream().mapToDouble(x -> x).average().getAsDouble();
    final double mcmcBiasVariance = biasVarianceSamples.stream().mapToDouble(x -> x).average().getAsDouble();
    final double mcmcOutlierProbability = outlierProbabilitySamples.stream().mapToDouble(x -> x).average().getAsDouble();
    final List<Double> mcmcMinorFractions = minorFractionsSamplesBySegment.stream().map(list -> list.stream().mapToDouble(x -> x).average().getAsDouble()).collect(Collectors.toList());
    double totalSegmentError = 0.0;
    for (int segment = 0; segment < numSegments; segment++) {
        totalSegmentError += Math.abs(mcmcMinorFractions.get(segment) - simulatedData.getTrueState().segmentMinorFraction(segment));
    }
    Assert.assertEquals(mcmcMeanBias, meanBiasExpected, meanBiasTolerance);
    Assert.assertEquals(mcmcBiasVariance, biasVarianceExpected, biasVarianceTolerance);
    Assert.assertEquals(mcmcOutlierProbability, outlierProbability, outlierProbabilityTolerance);
    Assert.assertEquals(totalSegmentError / numSegments, 0.0, minorFractionTolerance);
    //test posterior summaries
    final Map<AlleleFractionParameter, PosteriorSummary> globalParameterPosteriorSummaries = modeller.getGlobalParameterPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
    final PosteriorSummary meanBiasPosteriorSummary = globalParameterPosteriorSummaries.get(AlleleFractionParameter.MEAN_BIAS);
    final double meanBiasPosteriorCenter = meanBiasPosteriorSummary.getCenter();
    Assert.assertEquals(meanBiasPosteriorCenter, meanBiasExpected, meanBiasTolerance);
    final PosteriorSummary biasVariancePosteriorSummary = globalParameterPosteriorSummaries.get(AlleleFractionParameter.BIAS_VARIANCE);
    final double biasVariancePosteriorCenter = biasVariancePosteriorSummary.getCenter();
    Assert.assertEquals(biasVariancePosteriorCenter, biasVarianceExpected, biasVarianceTolerance);
    final PosteriorSummary outlierProbabilityPosteriorSummary = globalParameterPosteriorSummaries.get(AlleleFractionParameter.OUTLIER_PROBABILITY);
    final double outlierProbabilityPosteriorCenter = outlierProbabilityPosteriorSummary.getCenter();
    Assert.assertEquals(outlierProbabilityPosteriorCenter, outlierProbability, outlierProbabilityTolerance);
    final List<PosteriorSummary> minorAlleleFractionPosteriorSummaries = modeller.getMinorAlleleFractionsPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
    final List<Double> minorFractionsPosteriorCenters = minorAlleleFractionPosteriorSummaries.stream().map(PosteriorSummary::getCenter).collect(Collectors.toList());
    double totalPosteriorCentersSegmentError = 0.0;
    for (int segment = 0; segment < numSegments; segment++) {
        totalPosteriorCentersSegmentError += Math.abs(minorFractionsPosteriorCenters.get(segment) - simulatedData.getTrueState().segmentMinorFraction(segment));
    }
    Assert.assertEquals(totalPosteriorCentersSegmentError / numSegments, 0.0, minorFractionTolerance);
}
Also used : Arrays(java.util.Arrays) DataProvider(org.testng.annotations.DataProvider) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Genome(org.broadinstitute.hellbender.tools.exome.Genome) Test(org.testng.annotations.Test) SegmentUtils(org.broadinstitute.hellbender.tools.exome.SegmentUtils) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) Log(htsjdk.samtools.util.Log) Assert(org.testng.Assert) PosteriorSummary(org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary) Map(java.util.Map) AllelicPanelOfNormals(org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals) SparkContextFactory(org.broadinstitute.hellbender.engine.spark.SparkContextFactory) SegmentedGenome(org.broadinstitute.hellbender.tools.exome.SegmentedGenome) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) LoggingUtils(org.broadinstitute.hellbender.utils.LoggingUtils) PosteriorSummary(org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary) List(java.util.List) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext)

Example 4 with PosteriorSummary

use of org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary in project gatk by broadinstitute.

the class AlleleFractionModellerUnitTest method testBiasCorrection.

/**
     * Tests that the allelic PoN is appropriately used to correct reference bias.  The basic set up for the test data is
     * simulated hets at 1000 sites (1:1-1000) across 3 segments.  The outer two segments are balanced with
     * minor-allele fraction = 0.5; however, in the middle segment consisting of 100 sites (1:451-550), all of the sites
     *
     * <p>
     *     1) are balanced and have biases identical to the sites in the other two segments,
     *     which are drawn from a gamma distribution with alpha = 65, beta = 60 -> mean bias = 1.083 ("SAMPLE_NORMAL")
     * </p>
     *
     * <p>
     *     2) are balanced and have relatively high biases,
     *     which are drawn from a gamma distribution with alpha = 9, beta = 6 -> mean bias = 1.5 ("SAMPLE_WITH_BAD_SNPS")
     * </p>
     *
     * <p>
     *     3) have minor-allele fraction = 0.33, copy ratio = 1.5, and biases identical to the sites in the other two segments,
     *     which are drawn from a gamma distribution with alpha = 65, beta = 60 -> mean bias = 1.083 ("SAMPLE_EVENT").
     * </p>
     *
     * In this segment, using a PoN that doesn't know about the high reference bias of these sites ("ALLELIC_PON_NORMAL")
     * we should infer a minor-allele fraction of 6 / (6 + 9) = 0.40 in scenario 2; however, with a PoN that does know
     * about the high bias at these sites ("ALLELIC_PON_WITH_BAD_SNPS") we correctly infer that all of the segments are balanced.
     *
     * <p>
     *     Note that alpha and beta are not actually correctly recovered in this PoN via MLE because the biases are
     *     drawn from a mixture of gamma distributions (as opposed to a single gamma distribution as assumed in the model).
     *     TODO https://github.com/broadinstitute/gatk-protected/issues/421
     * </p>
     */
@Test(dataProvider = "biasCorrection")
public void testBiasCorrection(final AllelicCountCollection sample, final AllelicPanelOfNormals allelicPoN, final double minorFractionExpectedInMiddleSegment) {
    LoggingUtils.setLoggingLevel(Log.LogLevel.INFO);
    final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
    final double minorFractionTolerance = 0.025;
    final Genome genome = new Genome(AlleleFractionSimulatedData.TRIVIAL_TARGETS, sample.getCounts());
    final List<SimpleInterval> segments = SegmentUtils.readIntervalsFromSegmentFile(SEGMENTS_FILE);
    final SegmentedGenome segmentedGenome = new SegmentedGenome(segments, genome);
    final int numSamples = 150;
    final int numBurnIn = 50;
    final AlleleFractionModeller modeller = new AlleleFractionModeller(segmentedGenome, allelicPoN);
    modeller.fitMCMC(numSamples, numBurnIn);
    final List<PosteriorSummary> minorAlleleFractionPosteriorSummaries = modeller.getMinorAlleleFractionsPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
    final List<Double> minorFractionsResult = minorAlleleFractionPosteriorSummaries.stream().map(PosteriorSummary::getCenter).collect(Collectors.toList());
    final double minorFractionBalanced = 0.5;
    final List<Double> minorFractionsExpected = Arrays.asList(minorFractionBalanced, minorFractionExpectedInMiddleSegment, minorFractionBalanced);
    for (int segment = 0; segment < 3; segment++) {
        Assert.assertEquals(minorFractionsResult.get(segment), minorFractionsExpected.get(segment), minorFractionTolerance);
    }
}
Also used : PosteriorSummary(org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary) SegmentedGenome(org.broadinstitute.hellbender.tools.exome.SegmentedGenome) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Genome(org.broadinstitute.hellbender.tools.exome.Genome) SegmentedGenome(org.broadinstitute.hellbender.tools.exome.SegmentedGenome) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 5 with PosteriorSummary

use of org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary in project gatk by broadinstitute.

the class AllelicSplitCallerModelStateUnitTest method testSerializationRoundTrip.

@Test
public void testSerializationRoundTrip() {
    final ACNVModeledSegment acnvModeledSegment = new ACNVModeledSegment(new SimpleInterval("1", 1000, 1500), new PosteriorSummary(-4000, -4001, -4002), new PosteriorSummary(-4000, -4001, -4002));
    final List<ACNVModeledSegment> tempList = new ArrayList<>();
    tempList.add(acnvModeledSegment);
    final AllelicBalanceCallerModelState state = AllelicBalanceCallerModelState.createInitialCNLOHCallerModelState(0.2, tempList, HomoSapiensConstants.DEFAULT_PLOIDY, CNLOHCaller.NUM_RHOS);
    SparkTestUtils.roundTripInKryo(state, AllelicBalanceCallerModelState.class, SparkContextFactory.getTestSparkContext().getConf());
}
Also used : PosteriorSummary(org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary) ArrayList(java.util.ArrayList) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) ACNVModeledSegment(org.broadinstitute.hellbender.tools.exome.ACNVModeledSegment) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

PosteriorSummary (org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary)20 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)14 Test (org.testng.annotations.Test)14 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)12 ArrayList (java.util.ArrayList)6 Collectors (java.util.stream.Collectors)6 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)6 Genome (org.broadinstitute.hellbender.tools.exome.Genome)6 SegmentedGenome (org.broadinstitute.hellbender.tools.exome.SegmentedGenome)6 Assert (org.testng.Assert)6 Log (htsjdk.samtools.util.Log)4 File (java.io.File)4 List (java.util.List)4 Map (java.util.Map)4 SparkContextFactory (org.broadinstitute.hellbender.engine.spark.SparkContextFactory)4 ACNVModeledSegment (org.broadinstitute.hellbender.tools.exome.ACNVModeledSegment)4 AllelicCountCollection (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection)4 AllelicPanelOfNormals (org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals)4 LoggingUtils (org.broadinstitute.hellbender.utils.LoggingUtils)4 DecileCollection (org.broadinstitute.hellbender.utils.mcmc.DecileCollection)4