Search in sources :

Example 6 with AllelicPanelOfNormals

use of org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals in project gatk by broadinstitute.

the class AlleleFractionModellerUnitTest method testMCMC.

private void testMCMC(final double meanBiasSimulated, final double biasVarianceSimulated, final double meanBiasExpected, final double biasVarianceExpected, final AllelicPanelOfNormals allelicPoN) {
    LoggingUtils.setLoggingLevel(Log.LogLevel.INFO);
    final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
    final int numSamples = 150;
    final int numBurnIn = 50;
    final double averageHetsPerSegment = 50;
    final int numSegments = 100;
    final int averageDepth = 50;
    final double outlierProbability = 0.02;
    // note: the following tolerances could actually be made much smaller if we used more segments and/or
    // more hets -- most of the error is the sampling error of a finite simulated data set, not numerical error of MCMC
    final double minorFractionTolerance = 0.02;
    final double meanBiasTolerance = 0.02;
    final double biasVarianceTolerance = 0.01;
    final double outlierProbabilityTolerance = 0.02;
    final AlleleFractionSimulatedData simulatedData = new AlleleFractionSimulatedData(averageHetsPerSegment, numSegments, averageDepth, meanBiasSimulated, biasVarianceSimulated, outlierProbability);
    final AlleleFractionModeller modeller = new AlleleFractionModeller(simulatedData.getSegmentedGenome(), allelicPoN);
    modeller.fitMCMC(numSamples, numBurnIn);
    final List<Double> meanBiasSamples = modeller.getmeanBiasSamples();
    Assert.assertEquals(meanBiasSamples.size(), numSamples - numBurnIn);
    final List<Double> biasVarianceSamples = modeller.getBiasVarianceSamples();
    Assert.assertEquals(biasVarianceSamples.size(), numSamples - numBurnIn);
    final List<Double> outlierProbabilitySamples = modeller.getOutlierProbabilitySamples();
    Assert.assertEquals(outlierProbabilitySamples.size(), numSamples - numBurnIn);
    final List<AlleleFractionState.MinorFractions> minorFractionsSamples = modeller.getMinorFractionsSamples();
    Assert.assertEquals(minorFractionsSamples.size(), numSamples - numBurnIn);
    for (final AlleleFractionState.MinorFractions sample : minorFractionsSamples) {
        Assert.assertEquals(sample.size(), numSegments);
    }
    final List<List<Double>> minorFractionsSamplesBySegment = modeller.getMinorFractionSamplesBySegment();
    final double mcmcMeanBias = meanBiasSamples.stream().mapToDouble(x -> x).average().getAsDouble();
    final double mcmcBiasVariance = biasVarianceSamples.stream().mapToDouble(x -> x).average().getAsDouble();
    final double mcmcOutlierProbability = outlierProbabilitySamples.stream().mapToDouble(x -> x).average().getAsDouble();
    final List<Double> mcmcMinorFractions = minorFractionsSamplesBySegment.stream().map(list -> list.stream().mapToDouble(x -> x).average().getAsDouble()).collect(Collectors.toList());
    double totalSegmentError = 0.0;
    for (int segment = 0; segment < numSegments; segment++) {
        totalSegmentError += Math.abs(mcmcMinorFractions.get(segment) - simulatedData.getTrueState().segmentMinorFraction(segment));
    }
    Assert.assertEquals(mcmcMeanBias, meanBiasExpected, meanBiasTolerance);
    Assert.assertEquals(mcmcBiasVariance, biasVarianceExpected, biasVarianceTolerance);
    Assert.assertEquals(mcmcOutlierProbability, outlierProbability, outlierProbabilityTolerance);
    Assert.assertEquals(totalSegmentError / numSegments, 0.0, minorFractionTolerance);
    //test posterior summaries
    final Map<AlleleFractionParameter, PosteriorSummary> globalParameterPosteriorSummaries = modeller.getGlobalParameterPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
    final PosteriorSummary meanBiasPosteriorSummary = globalParameterPosteriorSummaries.get(AlleleFractionParameter.MEAN_BIAS);
    final double meanBiasPosteriorCenter = meanBiasPosteriorSummary.getCenter();
    Assert.assertEquals(meanBiasPosteriorCenter, meanBiasExpected, meanBiasTolerance);
    final PosteriorSummary biasVariancePosteriorSummary = globalParameterPosteriorSummaries.get(AlleleFractionParameter.BIAS_VARIANCE);
    final double biasVariancePosteriorCenter = biasVariancePosteriorSummary.getCenter();
    Assert.assertEquals(biasVariancePosteriorCenter, biasVarianceExpected, biasVarianceTolerance);
    final PosteriorSummary outlierProbabilityPosteriorSummary = globalParameterPosteriorSummaries.get(AlleleFractionParameter.OUTLIER_PROBABILITY);
    final double outlierProbabilityPosteriorCenter = outlierProbabilityPosteriorSummary.getCenter();
    Assert.assertEquals(outlierProbabilityPosteriorCenter, outlierProbability, outlierProbabilityTolerance);
    final List<PosteriorSummary> minorAlleleFractionPosteriorSummaries = modeller.getMinorAlleleFractionsPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
    final List<Double> minorFractionsPosteriorCenters = minorAlleleFractionPosteriorSummaries.stream().map(PosteriorSummary::getCenter).collect(Collectors.toList());
    double totalPosteriorCentersSegmentError = 0.0;
    for (int segment = 0; segment < numSegments; segment++) {
        totalPosteriorCentersSegmentError += Math.abs(minorFractionsPosteriorCenters.get(segment) - simulatedData.getTrueState().segmentMinorFraction(segment));
    }
    Assert.assertEquals(totalPosteriorCentersSegmentError / numSegments, 0.0, minorFractionTolerance);
}
Also used : Arrays(java.util.Arrays) DataProvider(org.testng.annotations.DataProvider) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Genome(org.broadinstitute.hellbender.tools.exome.Genome) Test(org.testng.annotations.Test) SegmentUtils(org.broadinstitute.hellbender.tools.exome.SegmentUtils) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) Log(htsjdk.samtools.util.Log) Assert(org.testng.Assert) PosteriorSummary(org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary) Map(java.util.Map) AllelicPanelOfNormals(org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals) SparkContextFactory(org.broadinstitute.hellbender.engine.spark.SparkContextFactory) SegmentedGenome(org.broadinstitute.hellbender.tools.exome.SegmentedGenome) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) LoggingUtils(org.broadinstitute.hellbender.utils.LoggingUtils) PosteriorSummary(org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary) List(java.util.List) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext)

Example 7 with AllelicPanelOfNormals

use of org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals in project gatk-protected by broadinstitute.

the class PerformAlleleFractionSegmentation method doWork.

@Override
public Object doWork() {
    final String sampleName = FilenameUtils.getBaseName(snpCountsFile.getAbsolutePath());
    final AllelicPanelOfNormals allelicPoN = allelicPoNFile != null ? AllelicPanelOfNormals.read(allelicPoNFile) : AllelicPanelOfNormals.EMPTY_PON;
    final AllelicCountCollection acc = new AllelicCountCollection(snpCountsFile);
    final AlleleFractionSegmenter segmenter = new AlleleFractionSegmenter(initialNumStates, acc, allelicPoN);
    final List<ModeledSegment> segments = segmenter.getModeledSegments();
    SegmentUtils.writeModeledSegmentFile(outputSegmentsFile, segments, sampleName, true);
    return "SUCCESS";
}
Also used : AllelicPanelOfNormals(org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) ModeledSegment(org.broadinstitute.hellbender.tools.exome.ModeledSegment)

Example 8 with AllelicPanelOfNormals

use of org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals in project gatk-protected by broadinstitute.

the class AlleleFractionModellerUnitTest method dataBiasCorrection.

@DataProvider(name = "biasCorrection")
public Object[][] dataBiasCorrection() {
    LoggingUtils.setLoggingLevel(Log.LogLevel.INFO);
    final AllelicCountCollection sampleNormal = new AllelicCountCollection(SAMPLE_NORMAL_FILE);
    final AllelicCountCollection sampleWithBadSNPs = new AllelicCountCollection(SAMPLE_WITH_BAD_SNPS_FILE);
    final AllelicCountCollection sampleWithEvent = new AllelicCountCollection(SAMPLE_WITH_EVENT_FILE);
    final AllelicPanelOfNormals allelicPoNNormal = new AllelicPanelOfNormals(new AllelicCountCollection(ALLELIC_PON_NORMAL_COUNTS_FILE));
    final AllelicPanelOfNormals allelicPoNWithBadSNPs = new AllelicPanelOfNormals(new AllelicCountCollection(ALLELIC_PON_WITH_BAD_SNPS_COUNTS_FILE));
    final double minorFractionExpectedInMiddleSegmentNormal = 0.5;
    final double minorFractionExpectedInMiddleSegmentWithBadSNPsAndNormalPoN = 0.4;
    final double minorFractionExpectedInMiddleSegmentWithEvent = 0.33;
    return new Object[][] { { sampleNormal, allelicPoNNormal, minorFractionExpectedInMiddleSegmentNormal }, { sampleWithBadSNPs, allelicPoNNormal, minorFractionExpectedInMiddleSegmentWithBadSNPsAndNormalPoN }, { sampleWithEvent, allelicPoNNormal, minorFractionExpectedInMiddleSegmentWithEvent }, { sampleWithBadSNPs, allelicPoNWithBadSNPs, minorFractionExpectedInMiddleSegmentNormal } };
}
Also used : AllelicPanelOfNormals(org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) DataProvider(org.testng.annotations.DataProvider)

Example 9 with AllelicPanelOfNormals

use of org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals in project gatk-protected by broadinstitute.

the class AlleleFractionModellerUnitTest method testMCMC.

private void testMCMC(final double meanBiasSimulated, final double biasVarianceSimulated, final double meanBiasExpected, final double biasVarianceExpected, final AllelicPanelOfNormals allelicPoN) {
    LoggingUtils.setLoggingLevel(Log.LogLevel.INFO);
    final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
    final int numSamples = 150;
    final int numBurnIn = 50;
    final double averageHetsPerSegment = 50;
    final int numSegments = 100;
    final int averageDepth = 50;
    final double outlierProbability = 0.02;
    // note: the following tolerances could actually be made much smaller if we used more segments and/or
    // more hets -- most of the error is the sampling error of a finite simulated data set, not numerical error of MCMC
    final double minorFractionTolerance = 0.02;
    final double meanBiasTolerance = 0.02;
    final double biasVarianceTolerance = 0.01;
    final double outlierProbabilityTolerance = 0.02;
    final AlleleFractionSimulatedData simulatedData = new AlleleFractionSimulatedData(averageHetsPerSegment, numSegments, averageDepth, meanBiasSimulated, biasVarianceSimulated, outlierProbability);
    final AlleleFractionModeller modeller = new AlleleFractionModeller(simulatedData.getSegmentedGenome(), allelicPoN);
    modeller.fitMCMC(numSamples, numBurnIn);
    final List<Double> meanBiasSamples = modeller.getmeanBiasSamples();
    Assert.assertEquals(meanBiasSamples.size(), numSamples - numBurnIn);
    final List<Double> biasVarianceSamples = modeller.getBiasVarianceSamples();
    Assert.assertEquals(biasVarianceSamples.size(), numSamples - numBurnIn);
    final List<Double> outlierProbabilitySamples = modeller.getOutlierProbabilitySamples();
    Assert.assertEquals(outlierProbabilitySamples.size(), numSamples - numBurnIn);
    final List<AlleleFractionState.MinorFractions> minorFractionsSamples = modeller.getMinorFractionsSamples();
    Assert.assertEquals(minorFractionsSamples.size(), numSamples - numBurnIn);
    for (final AlleleFractionState.MinorFractions sample : minorFractionsSamples) {
        Assert.assertEquals(sample.size(), numSegments);
    }
    final List<List<Double>> minorFractionsSamplesBySegment = modeller.getMinorFractionSamplesBySegment();
    final double mcmcMeanBias = meanBiasSamples.stream().mapToDouble(x -> x).average().getAsDouble();
    final double mcmcBiasVariance = biasVarianceSamples.stream().mapToDouble(x -> x).average().getAsDouble();
    final double mcmcOutlierProbability = outlierProbabilitySamples.stream().mapToDouble(x -> x).average().getAsDouble();
    final List<Double> mcmcMinorFractions = minorFractionsSamplesBySegment.stream().map(list -> list.stream().mapToDouble(x -> x).average().getAsDouble()).collect(Collectors.toList());
    double totalSegmentError = 0.0;
    for (int segment = 0; segment < numSegments; segment++) {
        totalSegmentError += Math.abs(mcmcMinorFractions.get(segment) - simulatedData.getTrueState().segmentMinorFraction(segment));
    }
    Assert.assertEquals(mcmcMeanBias, meanBiasExpected, meanBiasTolerance);
    Assert.assertEquals(mcmcBiasVariance, biasVarianceExpected, biasVarianceTolerance);
    Assert.assertEquals(mcmcOutlierProbability, outlierProbability, outlierProbabilityTolerance);
    Assert.assertEquals(totalSegmentError / numSegments, 0.0, minorFractionTolerance);
    //test posterior summaries
    final Map<AlleleFractionParameter, PosteriorSummary> globalParameterPosteriorSummaries = modeller.getGlobalParameterPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
    final PosteriorSummary meanBiasPosteriorSummary = globalParameterPosteriorSummaries.get(AlleleFractionParameter.MEAN_BIAS);
    final double meanBiasPosteriorCenter = meanBiasPosteriorSummary.getCenter();
    Assert.assertEquals(meanBiasPosteriorCenter, meanBiasExpected, meanBiasTolerance);
    final PosteriorSummary biasVariancePosteriorSummary = globalParameterPosteriorSummaries.get(AlleleFractionParameter.BIAS_VARIANCE);
    final double biasVariancePosteriorCenter = biasVariancePosteriorSummary.getCenter();
    Assert.assertEquals(biasVariancePosteriorCenter, biasVarianceExpected, biasVarianceTolerance);
    final PosteriorSummary outlierProbabilityPosteriorSummary = globalParameterPosteriorSummaries.get(AlleleFractionParameter.OUTLIER_PROBABILITY);
    final double outlierProbabilityPosteriorCenter = outlierProbabilityPosteriorSummary.getCenter();
    Assert.assertEquals(outlierProbabilityPosteriorCenter, outlierProbability, outlierProbabilityTolerance);
    final List<PosteriorSummary> minorAlleleFractionPosteriorSummaries = modeller.getMinorAlleleFractionsPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
    final List<Double> minorFractionsPosteriorCenters = minorAlleleFractionPosteriorSummaries.stream().map(PosteriorSummary::getCenter).collect(Collectors.toList());
    double totalPosteriorCentersSegmentError = 0.0;
    for (int segment = 0; segment < numSegments; segment++) {
        totalPosteriorCentersSegmentError += Math.abs(minorFractionsPosteriorCenters.get(segment) - simulatedData.getTrueState().segmentMinorFraction(segment));
    }
    Assert.assertEquals(totalPosteriorCentersSegmentError / numSegments, 0.0, minorFractionTolerance);
}
Also used : Arrays(java.util.Arrays) DataProvider(org.testng.annotations.DataProvider) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Genome(org.broadinstitute.hellbender.tools.exome.Genome) Test(org.testng.annotations.Test) SegmentUtils(org.broadinstitute.hellbender.tools.exome.SegmentUtils) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) Log(htsjdk.samtools.util.Log) Assert(org.testng.Assert) PosteriorSummary(org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary) Map(java.util.Map) AllelicPanelOfNormals(org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals) SparkContextFactory(org.broadinstitute.hellbender.engine.spark.SparkContextFactory) SegmentedGenome(org.broadinstitute.hellbender.tools.exome.SegmentedGenome) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) LoggingUtils(org.broadinstitute.hellbender.utils.LoggingUtils) PosteriorSummary(org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary) List(java.util.List) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext)

Example 10 with AllelicPanelOfNormals

use of org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals in project gatk by broadinstitute.

the class CreateAllelicPanelOfNormalsIntegrationTest method testHDF5OutputOnly.

@Test(dataProvider = "dataSiteFrequency")
public void testHDF5OutputOnly(final String[] testArguments, final File expectedHDF5File, final File expectedTSVFile) {
    final File allelicPoNHDF5File = createTempFile("create-allelic-pon-test", ".pon");
    allelicPoNHDF5File.delete();
    final String[] commonArguments = ArrayUtils.addAll(PULLDOWN_FILE_ARGUMENTS, "--" + StandardArgumentDefinitions.OUTPUT_LONG_NAME, allelicPoNHDF5File.getAbsolutePath());
    final String[] arguments = ArrayUtils.addAll(commonArguments, testArguments);
    runCommandLine(arguments);
    final AllelicPanelOfNormals resultHDF5 = AllelicPanelOfNormals.read(allelicPoNHDF5File);
    final AllelicPanelOfNormals expectedHDF5 = AllelicPanelOfNormals.read(expectedHDF5File);
    AllelicPoNTestUtils.assertAllelicPoNsEqual(resultHDF5, expectedHDF5);
    //check overwrite
    runCommandLine(arguments);
    final AllelicPanelOfNormals resultHDF5Overwrite = AllelicPanelOfNormals.read(allelicPoNHDF5File);
    AllelicPoNTestUtils.assertAllelicPoNsEqual(resultHDF5Overwrite, expectedHDF5);
}
Also used : AllelicPanelOfNormals(org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals) File(java.io.File) Test(org.testng.annotations.Test) CommandLineProgramTest(org.broadinstitute.hellbender.CommandLineProgramTest)

Aggregations

AllelicPanelOfNormals (org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals)20 AllelicCountCollection (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection)12 File (java.io.File)10 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)8 Test (org.testng.annotations.Test)8 HDF5Library (org.broadinstitute.hdf5.HDF5Library)6 Arrays (java.util.Arrays)4 List (java.util.List)4 Collectors (java.util.stream.Collectors)4 CommandLineProgramTest (org.broadinstitute.hellbender.CommandLineProgramTest)4 PosteriorSummary (org.broadinstitute.hellbender.utils.mcmc.PosteriorSummary)4 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)4 DataProvider (org.testng.annotations.DataProvider)4 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 Log (htsjdk.samtools.util.Log)2 IOException (java.io.IOException)2 Map (java.util.Map)2 Pair (org.apache.commons.lang3.tuple.Pair)2 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)2 Argument (org.broadinstitute.barclay.argparser.Argument)2