use of org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals in project gatk by broadinstitute.
the class AlleleFractionModellerUnitTest method testMCMC.
private void testMCMC(final double meanBiasSimulated, final double biasVarianceSimulated, final double meanBiasExpected, final double biasVarianceExpected, final AllelicPanelOfNormals allelicPoN) {
LoggingUtils.setLoggingLevel(Log.LogLevel.INFO);
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
final int numSamples = 150;
final int numBurnIn = 50;
final double averageHetsPerSegment = 50;
final int numSegments = 100;
final int averageDepth = 50;
final double outlierProbability = 0.02;
// note: the following tolerances could actually be made much smaller if we used more segments and/or
// more hets -- most of the error is the sampling error of a finite simulated data set, not numerical error of MCMC
final double minorFractionTolerance = 0.02;
final double meanBiasTolerance = 0.02;
final double biasVarianceTolerance = 0.01;
final double outlierProbabilityTolerance = 0.02;
final AlleleFractionSimulatedData simulatedData = new AlleleFractionSimulatedData(averageHetsPerSegment, numSegments, averageDepth, meanBiasSimulated, biasVarianceSimulated, outlierProbability);
final AlleleFractionModeller modeller = new AlleleFractionModeller(simulatedData.getSegmentedGenome(), allelicPoN);
modeller.fitMCMC(numSamples, numBurnIn);
final List<Double> meanBiasSamples = modeller.getmeanBiasSamples();
Assert.assertEquals(meanBiasSamples.size(), numSamples - numBurnIn);
final List<Double> biasVarianceSamples = modeller.getBiasVarianceSamples();
Assert.assertEquals(biasVarianceSamples.size(), numSamples - numBurnIn);
final List<Double> outlierProbabilitySamples = modeller.getOutlierProbabilitySamples();
Assert.assertEquals(outlierProbabilitySamples.size(), numSamples - numBurnIn);
final List<AlleleFractionState.MinorFractions> minorFractionsSamples = modeller.getMinorFractionsSamples();
Assert.assertEquals(minorFractionsSamples.size(), numSamples - numBurnIn);
for (final AlleleFractionState.MinorFractions sample : minorFractionsSamples) {
Assert.assertEquals(sample.size(), numSegments);
}
final List<List<Double>> minorFractionsSamplesBySegment = modeller.getMinorFractionSamplesBySegment();
final double mcmcMeanBias = meanBiasSamples.stream().mapToDouble(x -> x).average().getAsDouble();
final double mcmcBiasVariance = biasVarianceSamples.stream().mapToDouble(x -> x).average().getAsDouble();
final double mcmcOutlierProbability = outlierProbabilitySamples.stream().mapToDouble(x -> x).average().getAsDouble();
final List<Double> mcmcMinorFractions = minorFractionsSamplesBySegment.stream().map(list -> list.stream().mapToDouble(x -> x).average().getAsDouble()).collect(Collectors.toList());
double totalSegmentError = 0.0;
for (int segment = 0; segment < numSegments; segment++) {
totalSegmentError += Math.abs(mcmcMinorFractions.get(segment) - simulatedData.getTrueState().segmentMinorFraction(segment));
}
Assert.assertEquals(mcmcMeanBias, meanBiasExpected, meanBiasTolerance);
Assert.assertEquals(mcmcBiasVariance, biasVarianceExpected, biasVarianceTolerance);
Assert.assertEquals(mcmcOutlierProbability, outlierProbability, outlierProbabilityTolerance);
Assert.assertEquals(totalSegmentError / numSegments, 0.0, minorFractionTolerance);
//test posterior summaries
final Map<AlleleFractionParameter, PosteriorSummary> globalParameterPosteriorSummaries = modeller.getGlobalParameterPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
final PosteriorSummary meanBiasPosteriorSummary = globalParameterPosteriorSummaries.get(AlleleFractionParameter.MEAN_BIAS);
final double meanBiasPosteriorCenter = meanBiasPosteriorSummary.getCenter();
Assert.assertEquals(meanBiasPosteriorCenter, meanBiasExpected, meanBiasTolerance);
final PosteriorSummary biasVariancePosteriorSummary = globalParameterPosteriorSummaries.get(AlleleFractionParameter.BIAS_VARIANCE);
final double biasVariancePosteriorCenter = biasVariancePosteriorSummary.getCenter();
Assert.assertEquals(biasVariancePosteriorCenter, biasVarianceExpected, biasVarianceTolerance);
final PosteriorSummary outlierProbabilityPosteriorSummary = globalParameterPosteriorSummaries.get(AlleleFractionParameter.OUTLIER_PROBABILITY);
final double outlierProbabilityPosteriorCenter = outlierProbabilityPosteriorSummary.getCenter();
Assert.assertEquals(outlierProbabilityPosteriorCenter, outlierProbability, outlierProbabilityTolerance);
final List<PosteriorSummary> minorAlleleFractionPosteriorSummaries = modeller.getMinorAlleleFractionsPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
final List<Double> minorFractionsPosteriorCenters = minorAlleleFractionPosteriorSummaries.stream().map(PosteriorSummary::getCenter).collect(Collectors.toList());
double totalPosteriorCentersSegmentError = 0.0;
for (int segment = 0; segment < numSegments; segment++) {
totalPosteriorCentersSegmentError += Math.abs(minorFractionsPosteriorCenters.get(segment) - simulatedData.getTrueState().segmentMinorFraction(segment));
}
Assert.assertEquals(totalPosteriorCentersSegmentError / numSegments, 0.0, minorFractionTolerance);
}
use of org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals in project gatk-protected by broadinstitute.
the class PerformAlleleFractionSegmentation method doWork.
@Override
public Object doWork() {
final String sampleName = FilenameUtils.getBaseName(snpCountsFile.getAbsolutePath());
final AllelicPanelOfNormals allelicPoN = allelicPoNFile != null ? AllelicPanelOfNormals.read(allelicPoNFile) : AllelicPanelOfNormals.EMPTY_PON;
final AllelicCountCollection acc = new AllelicCountCollection(snpCountsFile);
final AlleleFractionSegmenter segmenter = new AlleleFractionSegmenter(initialNumStates, acc, allelicPoN);
final List<ModeledSegment> segments = segmenter.getModeledSegments();
SegmentUtils.writeModeledSegmentFile(outputSegmentsFile, segments, sampleName, true);
return "SUCCESS";
}
use of org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals in project gatk-protected by broadinstitute.
the class AlleleFractionModellerUnitTest method dataBiasCorrection.
@DataProvider(name = "biasCorrection")
public Object[][] dataBiasCorrection() {
LoggingUtils.setLoggingLevel(Log.LogLevel.INFO);
final AllelicCountCollection sampleNormal = new AllelicCountCollection(SAMPLE_NORMAL_FILE);
final AllelicCountCollection sampleWithBadSNPs = new AllelicCountCollection(SAMPLE_WITH_BAD_SNPS_FILE);
final AllelicCountCollection sampleWithEvent = new AllelicCountCollection(SAMPLE_WITH_EVENT_FILE);
final AllelicPanelOfNormals allelicPoNNormal = new AllelicPanelOfNormals(new AllelicCountCollection(ALLELIC_PON_NORMAL_COUNTS_FILE));
final AllelicPanelOfNormals allelicPoNWithBadSNPs = new AllelicPanelOfNormals(new AllelicCountCollection(ALLELIC_PON_WITH_BAD_SNPS_COUNTS_FILE));
final double minorFractionExpectedInMiddleSegmentNormal = 0.5;
final double minorFractionExpectedInMiddleSegmentWithBadSNPsAndNormalPoN = 0.4;
final double minorFractionExpectedInMiddleSegmentWithEvent = 0.33;
return new Object[][] { { sampleNormal, allelicPoNNormal, minorFractionExpectedInMiddleSegmentNormal }, { sampleWithBadSNPs, allelicPoNNormal, minorFractionExpectedInMiddleSegmentWithBadSNPsAndNormalPoN }, { sampleWithEvent, allelicPoNNormal, minorFractionExpectedInMiddleSegmentWithEvent }, { sampleWithBadSNPs, allelicPoNWithBadSNPs, minorFractionExpectedInMiddleSegmentNormal } };
}
use of org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals in project gatk-protected by broadinstitute.
the class AlleleFractionModellerUnitTest method testMCMC.
private void testMCMC(final double meanBiasSimulated, final double biasVarianceSimulated, final double meanBiasExpected, final double biasVarianceExpected, final AllelicPanelOfNormals allelicPoN) {
LoggingUtils.setLoggingLevel(Log.LogLevel.INFO);
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
final int numSamples = 150;
final int numBurnIn = 50;
final double averageHetsPerSegment = 50;
final int numSegments = 100;
final int averageDepth = 50;
final double outlierProbability = 0.02;
// note: the following tolerances could actually be made much smaller if we used more segments and/or
// more hets -- most of the error is the sampling error of a finite simulated data set, not numerical error of MCMC
final double minorFractionTolerance = 0.02;
final double meanBiasTolerance = 0.02;
final double biasVarianceTolerance = 0.01;
final double outlierProbabilityTolerance = 0.02;
final AlleleFractionSimulatedData simulatedData = new AlleleFractionSimulatedData(averageHetsPerSegment, numSegments, averageDepth, meanBiasSimulated, biasVarianceSimulated, outlierProbability);
final AlleleFractionModeller modeller = new AlleleFractionModeller(simulatedData.getSegmentedGenome(), allelicPoN);
modeller.fitMCMC(numSamples, numBurnIn);
final List<Double> meanBiasSamples = modeller.getmeanBiasSamples();
Assert.assertEquals(meanBiasSamples.size(), numSamples - numBurnIn);
final List<Double> biasVarianceSamples = modeller.getBiasVarianceSamples();
Assert.assertEquals(biasVarianceSamples.size(), numSamples - numBurnIn);
final List<Double> outlierProbabilitySamples = modeller.getOutlierProbabilitySamples();
Assert.assertEquals(outlierProbabilitySamples.size(), numSamples - numBurnIn);
final List<AlleleFractionState.MinorFractions> minorFractionsSamples = modeller.getMinorFractionsSamples();
Assert.assertEquals(minorFractionsSamples.size(), numSamples - numBurnIn);
for (final AlleleFractionState.MinorFractions sample : minorFractionsSamples) {
Assert.assertEquals(sample.size(), numSegments);
}
final List<List<Double>> minorFractionsSamplesBySegment = modeller.getMinorFractionSamplesBySegment();
final double mcmcMeanBias = meanBiasSamples.stream().mapToDouble(x -> x).average().getAsDouble();
final double mcmcBiasVariance = biasVarianceSamples.stream().mapToDouble(x -> x).average().getAsDouble();
final double mcmcOutlierProbability = outlierProbabilitySamples.stream().mapToDouble(x -> x).average().getAsDouble();
final List<Double> mcmcMinorFractions = minorFractionsSamplesBySegment.stream().map(list -> list.stream().mapToDouble(x -> x).average().getAsDouble()).collect(Collectors.toList());
double totalSegmentError = 0.0;
for (int segment = 0; segment < numSegments; segment++) {
totalSegmentError += Math.abs(mcmcMinorFractions.get(segment) - simulatedData.getTrueState().segmentMinorFraction(segment));
}
Assert.assertEquals(mcmcMeanBias, meanBiasExpected, meanBiasTolerance);
Assert.assertEquals(mcmcBiasVariance, biasVarianceExpected, biasVarianceTolerance);
Assert.assertEquals(mcmcOutlierProbability, outlierProbability, outlierProbabilityTolerance);
Assert.assertEquals(totalSegmentError / numSegments, 0.0, minorFractionTolerance);
//test posterior summaries
final Map<AlleleFractionParameter, PosteriorSummary> globalParameterPosteriorSummaries = modeller.getGlobalParameterPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
final PosteriorSummary meanBiasPosteriorSummary = globalParameterPosteriorSummaries.get(AlleleFractionParameter.MEAN_BIAS);
final double meanBiasPosteriorCenter = meanBiasPosteriorSummary.getCenter();
Assert.assertEquals(meanBiasPosteriorCenter, meanBiasExpected, meanBiasTolerance);
final PosteriorSummary biasVariancePosteriorSummary = globalParameterPosteriorSummaries.get(AlleleFractionParameter.BIAS_VARIANCE);
final double biasVariancePosteriorCenter = biasVariancePosteriorSummary.getCenter();
Assert.assertEquals(biasVariancePosteriorCenter, biasVarianceExpected, biasVarianceTolerance);
final PosteriorSummary outlierProbabilityPosteriorSummary = globalParameterPosteriorSummaries.get(AlleleFractionParameter.OUTLIER_PROBABILITY);
final double outlierProbabilityPosteriorCenter = outlierProbabilityPosteriorSummary.getCenter();
Assert.assertEquals(outlierProbabilityPosteriorCenter, outlierProbability, outlierProbabilityTolerance);
final List<PosteriorSummary> minorAlleleFractionPosteriorSummaries = modeller.getMinorAlleleFractionsPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
final List<Double> minorFractionsPosteriorCenters = minorAlleleFractionPosteriorSummaries.stream().map(PosteriorSummary::getCenter).collect(Collectors.toList());
double totalPosteriorCentersSegmentError = 0.0;
for (int segment = 0; segment < numSegments; segment++) {
totalPosteriorCentersSegmentError += Math.abs(minorFractionsPosteriorCenters.get(segment) - simulatedData.getTrueState().segmentMinorFraction(segment));
}
Assert.assertEquals(totalPosteriorCentersSegmentError / numSegments, 0.0, minorFractionTolerance);
}
use of org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals in project gatk by broadinstitute.
the class CreateAllelicPanelOfNormalsIntegrationTest method testHDF5OutputOnly.
@Test(dataProvider = "dataSiteFrequency")
public void testHDF5OutputOnly(final String[] testArguments, final File expectedHDF5File, final File expectedTSVFile) {
final File allelicPoNHDF5File = createTempFile("create-allelic-pon-test", ".pon");
allelicPoNHDF5File.delete();
final String[] commonArguments = ArrayUtils.addAll(PULLDOWN_FILE_ARGUMENTS, "--" + StandardArgumentDefinitions.OUTPUT_LONG_NAME, allelicPoNHDF5File.getAbsolutePath());
final String[] arguments = ArrayUtils.addAll(commonArguments, testArguments);
runCommandLine(arguments);
final AllelicPanelOfNormals resultHDF5 = AllelicPanelOfNormals.read(allelicPoNHDF5File);
final AllelicPanelOfNormals expectedHDF5 = AllelicPanelOfNormals.read(expectedHDF5File);
AllelicPoNTestUtils.assertAllelicPoNsEqual(resultHDF5, expectedHDF5);
//check overwrite
runCommandLine(arguments);
final AllelicPanelOfNormals resultHDF5Overwrite = AllelicPanelOfNormals.read(allelicPoNHDF5File);
AllelicPoNTestUtils.assertAllelicPoNsEqual(resultHDF5Overwrite, expectedHDF5);
}
Aggregations