Search in sources :

Example 11 with PosteriorSummary

use of org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary in project gatk-protected by broadinstitute.

the class SegmentUtils method toACNVModeledSegment.

private static ACNVModeledSegment toACNVModeledSegment(final DataLine dataLine) {
    final PosteriorSummary segmentMeanPosteriorSummary = new PosteriorSummary(dataLine.getDouble(SegmentTableColumn.SEGMENT_MEAN_POSTERIOR_MODE.toString()), dataLine.getDouble(SegmentTableColumn.SEGMENT_MEAN_POSTERIOR_LOWER.toString()), dataLine.getDouble(SegmentTableColumn.SEGMENT_MEAN_POSTERIOR_UPPER.toString()));
    final PosteriorSummary minorAlleleFractionPosteriorSummary = new PosteriorSummary(dataLine.getDouble(SegmentTableColumn.MINOR_ALLELE_FRACTION_POSTERIOR_MODE.toString()), dataLine.getDouble(SegmentTableColumn.MINOR_ALLELE_FRACTION_POSTERIOR_LOWER.toString()), dataLine.getDouble(SegmentTableColumn.MINOR_ALLELE_FRACTION_POSTERIOR_UPPER.toString()));
    final DecileCollection mafDecileCollection = new DecileCollection(SegmentTableColumn.ACNV_MODELED_SEGMENT_MAF_DECILES_SUMMARY_COLUMNS.names().stream().map(dataLine::getDouble).collect(Collectors.toList()));
    final DecileCollection segmentMeanDecileCollection = new DecileCollection(SegmentTableColumn.ACNV_MODELED_SEGMENT_MEAN_DECILES_SUMMARY_COLUMNS.names().stream().map(dataLine::getDouble).collect(Collectors.toList()));
    minorAlleleFractionPosteriorSummary.setDeciles(mafDecileCollection);
    segmentMeanPosteriorSummary.setDeciles(segmentMeanDecileCollection);
    return new ACNVModeledSegment(toInterval(dataLine), segmentMeanPosteriorSummary, minorAlleleFractionPosteriorSummary);
}
Also used : PosteriorSummary(org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary) DecileCollection(org.broadinstitute.hellbender.utils.mcmc.DecileCollection)

Example 12 with PosteriorSummary

use of org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary in project gatk by broadinstitute.

the class SegmentUtils method toACNVModeledSegment.

private static ACNVModeledSegment toACNVModeledSegment(final DataLine dataLine) {
    final PosteriorSummary segmentMeanPosteriorSummary = new PosteriorSummary(dataLine.getDouble(SegmentTableColumn.SEGMENT_MEAN_POSTERIOR_MODE.toString()), dataLine.getDouble(SegmentTableColumn.SEGMENT_MEAN_POSTERIOR_LOWER.toString()), dataLine.getDouble(SegmentTableColumn.SEGMENT_MEAN_POSTERIOR_UPPER.toString()));
    final PosteriorSummary minorAlleleFractionPosteriorSummary = new PosteriorSummary(dataLine.getDouble(SegmentTableColumn.MINOR_ALLELE_FRACTION_POSTERIOR_MODE.toString()), dataLine.getDouble(SegmentTableColumn.MINOR_ALLELE_FRACTION_POSTERIOR_LOWER.toString()), dataLine.getDouble(SegmentTableColumn.MINOR_ALLELE_FRACTION_POSTERIOR_UPPER.toString()));
    final DecileCollection mafDecileCollection = new DecileCollection(SegmentTableColumn.ACNV_MODELED_SEGMENT_MAF_DECILES_SUMMARY_COLUMNS.names().stream().map(dataLine::getDouble).collect(Collectors.toList()));
    final DecileCollection segmentMeanDecileCollection = new DecileCollection(SegmentTableColumn.ACNV_MODELED_SEGMENT_MEAN_DECILES_SUMMARY_COLUMNS.names().stream().map(dataLine::getDouble).collect(Collectors.toList()));
    minorAlleleFractionPosteriorSummary.setDeciles(mafDecileCollection);
    segmentMeanPosteriorSummary.setDeciles(segmentMeanDecileCollection);
    return new ACNVModeledSegment(toInterval(dataLine), segmentMeanPosteriorSummary, minorAlleleFractionPosteriorSummary);
}
Also used : PosteriorSummary(org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary) DecileCollection(org.broadinstitute.hellbender.utils.mcmc.DecileCollection)

Example 13 with PosteriorSummary

use of org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary in project gatk by broadinstitute.

the class ACNVModeller method fitModel.

/**
     * Performs Markov-Chain Monte Carlo model fitting using the
     * number of total samples and number of burn-in samples pecified at construction.
     */
public void fitModel() {
    //perform MCMC to generate posterior samples
    logger.info("Fitting copy-ratio model...");
    copyRatioModeller = new CopyRatioModeller(segmentedGenome);
    copyRatioModeller.fitMCMC(numSamplesCopyRatio, numBurnInCopyRatio);
    logger.info("Fitting allele-fraction model...");
    alleleFractionModeller = new AlleleFractionModeller(segmentedGenome, allelicPoN);
    alleleFractionModeller.fitMCMC(numSamplesAlleleFraction, numBurnInAlleleFraction);
    //update list of ACNVModeledSegment with new PosteriorSummaries
    segments.clear();
    final List<SimpleInterval> unmodeledSegments = segmentedGenome.getSegments();
    final List<PosteriorSummary> segmentMeansPosteriorSummaries = copyRatioModeller.getSegmentMeansPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
    final List<PosteriorSummary> minorAlleleFractionsPosteriorSummaries = alleleFractionModeller.getMinorAlleleFractionsPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
    for (int segment = 0; segment < unmodeledSegments.size(); segment++) {
        segments.add(new ACNVModeledSegment(unmodeledSegments.get(segment), segmentMeansPosteriorSummaries.get(segment), minorAlleleFractionsPosteriorSummaries.get(segment)));
    }
    isModelFit = true;
}
Also used : PosteriorSummary(org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary) CopyRatioModeller(org.broadinstitute.hellbender.tools.exome.copyratio.CopyRatioModeller) AlleleFractionModeller(org.broadinstitute.hellbender.tools.exome.allelefraction.AlleleFractionModeller) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 14 with PosteriorSummary

use of org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary in project gatk by broadinstitute.

the class CopyRatioModellerUnitTest method testRunMCMCOnCopyRatioSegmentedGenome.

/**
     * Tests Bayesian inference of the copy-ratio model via MCMC.
     * <p>
     *     Recovery of input values for the variance and outlier-probability global parameters is checked.
     *     In particular, the true input value of the variance must fall within
     *     {@link CopyRatioModellerUnitTest#MULTIPLES_OF_SD_THRESHOLD}
     *     standard deviations of the posterior mean and the standard deviation of the posterior must agree
     *     with the analytic value to within a relative error of
     *     {@link CopyRatioModellerUnitTest#RELATIVE_ERROR_THRESHOLD} for 250 samples
     *     (after 250 burn-in samples have been discarded).  Similar criteria are applied
     *     to the recovery of the true input value for the outlier probability.
     * </p>
     * <p>
     *     Furthermore, the number of truth values for the segment-level means falling outside confidence intervals of
     *     1-sigma, 2-sigma, and 3-sigma given by the posteriors in each segment should be roughly consistent with
     *     a normal distribution (i.e., ~32, ~5, and ~0, respectively; we allow for errors of
     *     {@link CopyRatioModellerUnitTest#DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_1_SIGMA},
     *     {@link CopyRatioModellerUnitTest#DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_2_SIGMA}, and
     *     {@link CopyRatioModellerUnitTest#DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_3_SIGMA}, respectively).
     *     The mean of the standard deviations of the posteriors for the segment-level means should also be
     *     recovered to within a relative error of {@link CopyRatioModellerUnitTest#RELATIVE_ERROR_THRESHOLD}.
     * </p>
     * <p>
     *     Finally, the recovered values for the latent outlier-indicator parameters should agree with those used to
     *     generate the data.  For each indicator, the recovered value (i.e., outlier or non-outlier) is taken to be
     *     that given by the majority of posterior samples.  We require that at least
     *     {@link CopyRatioModellerUnitTest#FRACTION_OF_OUTLIER_INDICATORS_CORRECT_THRESHOLD}
     *     of the 10000 indicators are recovered correctly.
     * </p>
     * <p>
     *     With these specifications, this unit test is not overly brittle (i.e., it should pass for a large majority
     *     of randomly generated data sets), but it is still brittle enough to check for correctness of the sampling
     *     (for example, specifying a sufficiently incorrect likelihood will cause the test to fail).
     * </p>
     */
@Test
public void testRunMCMCOnCopyRatioSegmentedGenome() throws IOException {
    final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
    LoggingUtils.setLoggingLevel(Log.LogLevel.INFO);
    //load data (coverages and number of targets in each segment)
    final ReadCountCollection coverage = ReadCountCollectionUtils.parse(COVERAGES_FILE);
    //Genome with no SNPs
    final Genome genome = new Genome(coverage, Collections.emptyList());
    final SegmentedGenome segmentedGenome = new SegmentedGenome(SEGMENT_FILE, genome);
    //run MCMC
    final CopyRatioModeller modeller = new CopyRatioModeller(segmentedGenome);
    modeller.fitMCMC(NUM_SAMPLES, NUM_BURN_IN);
    //check statistics of global-parameter posterior samples (i.e., posterior mode and standard deviation)
    final Map<CopyRatioParameter, PosteriorSummary> globalParameterPosteriorSummaries = modeller.getGlobalParameterPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
    final PosteriorSummary variancePosteriorSummary = globalParameterPosteriorSummaries.get(CopyRatioParameter.VARIANCE);
    final double variancePosteriorCenter = variancePosteriorSummary.getCenter();
    final double variancePosteriorStandardDeviation = (variancePosteriorSummary.getUpper() - variancePosteriorSummary.getLower()) / 2;
    Assert.assertEquals(Math.abs(variancePosteriorCenter - VARIANCE_TRUTH), 0., MULTIPLES_OF_SD_THRESHOLD * VARIANCE_POSTERIOR_STANDARD_DEVIATION_TRUTH);
    Assert.assertEquals(relativeError(variancePosteriorStandardDeviation, VARIANCE_POSTERIOR_STANDARD_DEVIATION_TRUTH), 0., RELATIVE_ERROR_THRESHOLD);
    final PosteriorSummary outlierProbabilityPosteriorSummary = globalParameterPosteriorSummaries.get(CopyRatioParameter.OUTLIER_PROBABILITY);
    final double outlierProbabilityPosteriorCenter = outlierProbabilityPosteriorSummary.getCenter();
    final double outlierProbabilityPosteriorStandardDeviation = (outlierProbabilityPosteriorSummary.getUpper() - outlierProbabilityPosteriorSummary.getLower()) / 2;
    Assert.assertEquals(Math.abs(outlierProbabilityPosteriorCenter - OUTLIER_PROBABILITY_TRUTH), 0., MULTIPLES_OF_SD_THRESHOLD * OUTLIER_PROBABILITY_POSTERIOR_STANDARD_DEVIATION_TRUTH);
    Assert.assertEquals(relativeError(outlierProbabilityPosteriorStandardDeviation, OUTLIER_PROBABILITY_POSTERIOR_STANDARD_DEVIATION_TRUTH), 0., RELATIVE_ERROR_THRESHOLD);
    //check statistics of segment-mean posterior samples (i.e., posterior means and standard deviations)
    final List<Double> meansTruth = loadList(MEANS_TRUTH_FILE, Double::parseDouble);
    int numMeansOutsideOneSigma = 0;
    int numMeansOutsideTwoSigma = 0;
    int numMeansOutsideThreeSigma = 0;
    final int numSegments = meansTruth.size();
    //segment-mean posteriors are expected to be Gaussian, so PosteriorSummary for
    // {@link CopyRatioModellerUnitTest#CREDIBLE_INTERVAL_ALPHA}=0.32 is
    //(posterior mean, posterior mean - posterior standard devation, posterior mean + posterior standard deviation)
    final List<PosteriorSummary> meanPosteriorSummaries = modeller.getSegmentMeansPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
    final double[] meanPosteriorStandardDeviations = new double[numSegments];
    for (int segment = 0; segment < numSegments; segment++) {
        final double meanPosteriorCenter = meanPosteriorSummaries.get(segment).getCenter();
        final double meanPosteriorStandardDeviation = (meanPosteriorSummaries.get(segment).getUpper() - meanPosteriorSummaries.get(segment).getLower()) / 2.;
        meanPosteriorStandardDeviations[segment] = meanPosteriorStandardDeviation;
        final double absoluteDifferenceFromTruth = Math.abs(meanPosteriorCenter - meansTruth.get(segment));
        if (absoluteDifferenceFromTruth > meanPosteriorStandardDeviation) {
            numMeansOutsideOneSigma++;
        }
        if (absoluteDifferenceFromTruth > 2 * meanPosteriorStandardDeviation) {
            numMeansOutsideTwoSigma++;
        }
        if (absoluteDifferenceFromTruth > 3 * meanPosteriorStandardDeviation) {
            numMeansOutsideThreeSigma++;
        }
    }
    final double meanPosteriorStandardDeviationsMean = new Mean().evaluate(meanPosteriorStandardDeviations);
    Assert.assertEquals(numMeansOutsideOneSigma, 100 - 68, DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_1_SIGMA);
    Assert.assertEquals(numMeansOutsideTwoSigma, 100 - 95, DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_2_SIGMA);
    Assert.assertTrue(numMeansOutsideThreeSigma <= DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_3_SIGMA);
    Assert.assertEquals(relativeError(meanPosteriorStandardDeviationsMean, MEAN_POSTERIOR_STANDARD_DEVIATION_MEAN_TRUTH), 0., RELATIVE_ERROR_THRESHOLD);
    //check accuracy of latent outlier-indicator posterior samples
    final List<CopyRatioState.OutlierIndicators> outlierIndicatorSamples = modeller.getOutlierIndicatorsSamples();
    int numIndicatorsCorrect = 0;
    final int numIndicatorSamples = outlierIndicatorSamples.size();
    final List<Integer> outlierIndicatorsTruthAsInt = loadList(OUTLIER_INDICATORS_TRUTH_FILE, Integer::parseInt);
    final List<Boolean> outlierIndicatorsTruth = outlierIndicatorsTruthAsInt.stream().map(i -> i == 1).collect(Collectors.toList());
    for (int target = 0; target < coverage.targets().size(); target++) {
        int numSamplesOutliers = 0;
        for (final CopyRatioState.OutlierIndicators sample : outlierIndicatorSamples) {
            if (sample.get(target)) {
                numSamplesOutliers++;
            }
        }
        //take predicted state of indicator to be given by the majority of samples
        if ((numSamplesOutliers >= numIndicatorSamples / 2.) == outlierIndicatorsTruth.get(target)) {
            numIndicatorsCorrect++;
        }
    }
    final double fractionOfOutlierIndicatorsCorrect = (double) numIndicatorsCorrect / coverage.targets().size();
    Assert.assertTrue(fractionOfOutlierIndicatorsCorrect >= FRACTION_OF_OUTLIER_INDICATORS_CORRECT_THRESHOLD);
}
Also used : BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Genome(org.broadinstitute.hellbender.tools.exome.Genome) FileUtils(org.apache.commons.io.FileUtils) Test(org.testng.annotations.Test) IOException(java.io.IOException) Function(java.util.function.Function) Collectors(java.util.stream.Collectors) File(java.io.File) Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) List(java.util.List) Log(htsjdk.samtools.util.Log) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) UserException(org.broadinstitute.hellbender.exceptions.UserException) Assert(org.testng.Assert) PosteriorSummary(org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary) ReadCountCollectionUtils(org.broadinstitute.hellbender.tools.exome.ReadCountCollectionUtils) Map(java.util.Map) SparkContextFactory(org.broadinstitute.hellbender.engine.spark.SparkContextFactory) SegmentedGenome(org.broadinstitute.hellbender.tools.exome.SegmentedGenome) LoggingUtils(org.broadinstitute.hellbender.utils.LoggingUtils) Collections(java.util.Collections) Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) PosteriorSummary(org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary) SegmentedGenome(org.broadinstitute.hellbender.tools.exome.SegmentedGenome) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Genome(org.broadinstitute.hellbender.tools.exome.Genome) SegmentedGenome(org.broadinstitute.hellbender.tools.exome.SegmentedGenome) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 15 with PosteriorSummary

use of org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary in project gatk by broadinstitute.

the class ACNVModeledSegmentConversionUtilsUnitTest method testSimpleConversionCannotYieldSegmentMeanOfZero.

@Test
public void testSimpleConversionCannotYieldSegmentMeanOfZero() {
    final ACNVModeledSegment acnvModeledSegment = new ACNVModeledSegment(new SimpleInterval("1", 1000, 1500), new PosteriorSummary(-4000, -4001, -4002), new PosteriorSummary(-4000, -4001, -4002));
    final List<Target> targets = new ArrayList<>();
    targets.add(new Target("test", new SimpleInterval("1", 1300, 1302)));
    final double[] targetDummyValues = new double[targets.size()];
    final TargetCollection<ReadCountRecord.SingleSampleRecord> targetCollection = new HashedListTargetCollection<>(Collections.singletonList(new ReadCountRecord.SingleSampleRecord(targets.get(0), 0.0)));
    final ModeledSegment guess = ACNVModeledSegmentConversionUtils.convertACNVModeledSegmentToModeledSegment(acnvModeledSegment, targetCollection);
    Assert.assertTrue(guess.getSegmentMeanInCRSpace() > 0);
    Assert.assertEquals(guess.getSegmentMean(), ParamUtils.log2(PCATangentNormalizationUtils.EPSILON), 1e-10);
}
Also used : PosteriorSummary(org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary) ArrayList(java.util.ArrayList) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

PosteriorSummary (org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary)20 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)14 Test (org.testng.annotations.Test)14 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)12 ArrayList (java.util.ArrayList)6 Collectors (java.util.stream.Collectors)6 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)6 Genome (org.broadinstitute.hellbender.tools.exome.Genome)6 SegmentedGenome (org.broadinstitute.hellbender.tools.exome.SegmentedGenome)6 Assert (org.testng.Assert)6 Log (htsjdk.samtools.util.Log)4 File (java.io.File)4 List (java.util.List)4 Map (java.util.Map)4 SparkContextFactory (org.broadinstitute.hellbender.engine.spark.SparkContextFactory)4 ACNVModeledSegment (org.broadinstitute.hellbender.tools.exome.ACNVModeledSegment)4 AllelicCountCollection (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection)4 AllelicPanelOfNormals (org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals)4 LoggingUtils (org.broadinstitute.hellbender.utils.LoggingUtils)4 DecileCollection (org.broadinstitute.hellbender.utils.mcmc.DecileCollection)4