use of org.broadinstitute.hellbender.tools.exome.SegmentedGenome in project gatk by broadinstitute.
the class AlleleFractionModellerUnitTest method testBiasCorrection.
/**
* Tests that the allelic PoN is appropriately used to correct reference bias. The basic set up for the test data is
* simulated hets at 1000 sites (1:1-1000) across 3 segments. The outer two segments are balanced with
* minor-allele fraction = 0.5; however, in the middle segment consisting of 100 sites (1:451-550), all of the sites
*
* <p>
* 1) are balanced and have biases identical to the sites in the other two segments,
* which are drawn from a gamma distribution with alpha = 65, beta = 60 -> mean bias = 1.083 ("SAMPLE_NORMAL")
* </p>
*
* <p>
* 2) are balanced and have relatively high biases,
* which are drawn from a gamma distribution with alpha = 9, beta = 6 -> mean bias = 1.5 ("SAMPLE_WITH_BAD_SNPS")
* </p>
*
* <p>
* 3) have minor-allele fraction = 0.33, copy ratio = 1.5, and biases identical to the sites in the other two segments,
* which are drawn from a gamma distribution with alpha = 65, beta = 60 -> mean bias = 1.083 ("SAMPLE_EVENT").
* </p>
*
* In this segment, using a PoN that doesn't know about the high reference bias of these sites ("ALLELIC_PON_NORMAL")
* we should infer a minor-allele fraction of 6 / (6 + 9) = 0.40 in scenario 2; however, with a PoN that does know
* about the high bias at these sites ("ALLELIC_PON_WITH_BAD_SNPS") we correctly infer that all of the segments are balanced.
*
* <p>
* Note that alpha and beta are not actually correctly recovered in this PoN via MLE because the biases are
* drawn from a mixture of gamma distributions (as opposed to a single gamma distribution as assumed in the model).
* TODO https://github.com/broadinstitute/gatk-protected/issues/421
* </p>
*/
@Test(dataProvider = "biasCorrection")
public void testBiasCorrection(final AllelicCountCollection sample, final AllelicPanelOfNormals allelicPoN, final double minorFractionExpectedInMiddleSegment) {
LoggingUtils.setLoggingLevel(Log.LogLevel.INFO);
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
final double minorFractionTolerance = 0.025;
final Genome genome = new Genome(AlleleFractionSimulatedData.TRIVIAL_TARGETS, sample.getCounts());
final List<SimpleInterval> segments = SegmentUtils.readIntervalsFromSegmentFile(SEGMENTS_FILE);
final SegmentedGenome segmentedGenome = new SegmentedGenome(segments, genome);
final int numSamples = 150;
final int numBurnIn = 50;
final AlleleFractionModeller modeller = new AlleleFractionModeller(segmentedGenome, allelicPoN);
modeller.fitMCMC(numSamples, numBurnIn);
final List<PosteriorSummary> minorAlleleFractionPosteriorSummaries = modeller.getMinorAlleleFractionsPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
final List<Double> minorFractionsResult = minorAlleleFractionPosteriorSummaries.stream().map(PosteriorSummary::getCenter).collect(Collectors.toList());
final double minorFractionBalanced = 0.5;
final List<Double> minorFractionsExpected = Arrays.asList(minorFractionBalanced, minorFractionExpectedInMiddleSegment, minorFractionBalanced);
for (int segment = 0; segment < 3; segment++) {
Assert.assertEquals(minorFractionsResult.get(segment), minorFractionsExpected.get(segment), minorFractionTolerance);
}
}
use of org.broadinstitute.hellbender.tools.exome.SegmentedGenome in project gatk-protected by broadinstitute.
the class AlleleFractionModellerUnitTest method testBiasCorrection.
/**
* Tests that the allelic PoN is appropriately used to correct reference bias. The basic set up for the test data is
* simulated hets at 1000 sites (1:1-1000) across 3 segments. The outer two segments are balanced with
* minor-allele fraction = 0.5; however, in the middle segment consisting of 100 sites (1:451-550), all of the sites
*
* <p>
* 1) are balanced and have biases identical to the sites in the other two segments,
* which are drawn from a gamma distribution with alpha = 65, beta = 60 -> mean bias = 1.083 ("SAMPLE_NORMAL")
* </p>
*
* <p>
* 2) are balanced and have relatively high biases,
* which are drawn from a gamma distribution with alpha = 9, beta = 6 -> mean bias = 1.5 ("SAMPLE_WITH_BAD_SNPS")
* </p>
*
* <p>
* 3) have minor-allele fraction = 0.33, copy ratio = 1.5, and biases identical to the sites in the other two segments,
* which are drawn from a gamma distribution with alpha = 65, beta = 60 -> mean bias = 1.083 ("SAMPLE_EVENT").
* </p>
*
* In this segment, using a PoN that doesn't know about the high reference bias of these sites ("ALLELIC_PON_NORMAL")
* we should infer a minor-allele fraction of 6 / (6 + 9) = 0.40 in scenario 2; however, with a PoN that does know
* about the high bias at these sites ("ALLELIC_PON_WITH_BAD_SNPS") we correctly infer that all of the segments are balanced.
*
* <p>
* Note that alpha and beta are not actually correctly recovered in this PoN via MLE because the biases are
* drawn from a mixture of gamma distributions (as opposed to a single gamma distribution as assumed in the model).
* TODO https://github.com/broadinstitute/gatk-protected/issues/421
* </p>
*/
@Test(dataProvider = "biasCorrection")
public void testBiasCorrection(final AllelicCountCollection sample, final AllelicPanelOfNormals allelicPoN, final double minorFractionExpectedInMiddleSegment) {
LoggingUtils.setLoggingLevel(Log.LogLevel.INFO);
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
final double minorFractionTolerance = 0.025;
final Genome genome = new Genome(AlleleFractionSimulatedData.TRIVIAL_TARGETS, sample.getCounts());
final List<SimpleInterval> segments = SegmentUtils.readIntervalsFromSegmentFile(SEGMENTS_FILE);
final SegmentedGenome segmentedGenome = new SegmentedGenome(segments, genome);
final int numSamples = 150;
final int numBurnIn = 50;
final AlleleFractionModeller modeller = new AlleleFractionModeller(segmentedGenome, allelicPoN);
modeller.fitMCMC(numSamples, numBurnIn);
final List<PosteriorSummary> minorAlleleFractionPosteriorSummaries = modeller.getMinorAlleleFractionsPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
final List<Double> minorFractionsResult = minorAlleleFractionPosteriorSummaries.stream().map(PosteriorSummary::getCenter).collect(Collectors.toList());
final double minorFractionBalanced = 0.5;
final List<Double> minorFractionsExpected = Arrays.asList(minorFractionBalanced, minorFractionExpectedInMiddleSegment, minorFractionBalanced);
for (int segment = 0; segment < 3; segment++) {
Assert.assertEquals(minorFractionsResult.get(segment), minorFractionsExpected.get(segment), minorFractionTolerance);
}
}
use of org.broadinstitute.hellbender.tools.exome.SegmentedGenome in project gatk by broadinstitute.
the class CopyRatioModellerUnitTest method testRunMCMCOnCopyRatioSegmentedGenome.
/**
* Tests Bayesian inference of the copy-ratio model via MCMC.
* <p>
* Recovery of input values for the variance and outlier-probability global parameters is checked.
* In particular, the true input value of the variance must fall within
* {@link CopyRatioModellerUnitTest#MULTIPLES_OF_SD_THRESHOLD}
* standard deviations of the posterior mean and the standard deviation of the posterior must agree
* with the analytic value to within a relative error of
* {@link CopyRatioModellerUnitTest#RELATIVE_ERROR_THRESHOLD} for 250 samples
* (after 250 burn-in samples have been discarded). Similar criteria are applied
* to the recovery of the true input value for the outlier probability.
* </p>
* <p>
* Furthermore, the number of truth values for the segment-level means falling outside confidence intervals of
* 1-sigma, 2-sigma, and 3-sigma given by the posteriors in each segment should be roughly consistent with
* a normal distribution (i.e., ~32, ~5, and ~0, respectively; we allow for errors of
* {@link CopyRatioModellerUnitTest#DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_1_SIGMA},
* {@link CopyRatioModellerUnitTest#DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_2_SIGMA}, and
* {@link CopyRatioModellerUnitTest#DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_3_SIGMA}, respectively).
* The mean of the standard deviations of the posteriors for the segment-level means should also be
* recovered to within a relative error of {@link CopyRatioModellerUnitTest#RELATIVE_ERROR_THRESHOLD}.
* </p>
* <p>
* Finally, the recovered values for the latent outlier-indicator parameters should agree with those used to
* generate the data. For each indicator, the recovered value (i.e., outlier or non-outlier) is taken to be
* that given by the majority of posterior samples. We require that at least
* {@link CopyRatioModellerUnitTest#FRACTION_OF_OUTLIER_INDICATORS_CORRECT_THRESHOLD}
* of the 10000 indicators are recovered correctly.
* </p>
* <p>
* With these specifications, this unit test is not overly brittle (i.e., it should pass for a large majority
* of randomly generated data sets), but it is still brittle enough to check for correctness of the sampling
* (for example, specifying a sufficiently incorrect likelihood will cause the test to fail).
* </p>
*/
@Test
public void testRunMCMCOnCopyRatioSegmentedGenome() throws IOException {
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
LoggingUtils.setLoggingLevel(Log.LogLevel.INFO);
//load data (coverages and number of targets in each segment)
final ReadCountCollection coverage = ReadCountCollectionUtils.parse(COVERAGES_FILE);
//Genome with no SNPs
final Genome genome = new Genome(coverage, Collections.emptyList());
final SegmentedGenome segmentedGenome = new SegmentedGenome(SEGMENT_FILE, genome);
//run MCMC
final CopyRatioModeller modeller = new CopyRatioModeller(segmentedGenome);
modeller.fitMCMC(NUM_SAMPLES, NUM_BURN_IN);
//check statistics of global-parameter posterior samples (i.e., posterior mode and standard deviation)
final Map<CopyRatioParameter, PosteriorSummary> globalParameterPosteriorSummaries = modeller.getGlobalParameterPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
final PosteriorSummary variancePosteriorSummary = globalParameterPosteriorSummaries.get(CopyRatioParameter.VARIANCE);
final double variancePosteriorCenter = variancePosteriorSummary.getCenter();
final double variancePosteriorStandardDeviation = (variancePosteriorSummary.getUpper() - variancePosteriorSummary.getLower()) / 2;
Assert.assertEquals(Math.abs(variancePosteriorCenter - VARIANCE_TRUTH), 0., MULTIPLES_OF_SD_THRESHOLD * VARIANCE_POSTERIOR_STANDARD_DEVIATION_TRUTH);
Assert.assertEquals(relativeError(variancePosteriorStandardDeviation, VARIANCE_POSTERIOR_STANDARD_DEVIATION_TRUTH), 0., RELATIVE_ERROR_THRESHOLD);
final PosteriorSummary outlierProbabilityPosteriorSummary = globalParameterPosteriorSummaries.get(CopyRatioParameter.OUTLIER_PROBABILITY);
final double outlierProbabilityPosteriorCenter = outlierProbabilityPosteriorSummary.getCenter();
final double outlierProbabilityPosteriorStandardDeviation = (outlierProbabilityPosteriorSummary.getUpper() - outlierProbabilityPosteriorSummary.getLower()) / 2;
Assert.assertEquals(Math.abs(outlierProbabilityPosteriorCenter - OUTLIER_PROBABILITY_TRUTH), 0., MULTIPLES_OF_SD_THRESHOLD * OUTLIER_PROBABILITY_POSTERIOR_STANDARD_DEVIATION_TRUTH);
Assert.assertEquals(relativeError(outlierProbabilityPosteriorStandardDeviation, OUTLIER_PROBABILITY_POSTERIOR_STANDARD_DEVIATION_TRUTH), 0., RELATIVE_ERROR_THRESHOLD);
//check statistics of segment-mean posterior samples (i.e., posterior means and standard deviations)
final List<Double> meansTruth = loadList(MEANS_TRUTH_FILE, Double::parseDouble);
int numMeansOutsideOneSigma = 0;
int numMeansOutsideTwoSigma = 0;
int numMeansOutsideThreeSigma = 0;
final int numSegments = meansTruth.size();
//segment-mean posteriors are expected to be Gaussian, so PosteriorSummary for
// {@link CopyRatioModellerUnitTest#CREDIBLE_INTERVAL_ALPHA}=0.32 is
//(posterior mean, posterior mean - posterior standard devation, posterior mean + posterior standard deviation)
final List<PosteriorSummary> meanPosteriorSummaries = modeller.getSegmentMeansPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
final double[] meanPosteriorStandardDeviations = new double[numSegments];
for (int segment = 0; segment < numSegments; segment++) {
final double meanPosteriorCenter = meanPosteriorSummaries.get(segment).getCenter();
final double meanPosteriorStandardDeviation = (meanPosteriorSummaries.get(segment).getUpper() - meanPosteriorSummaries.get(segment).getLower()) / 2.;
meanPosteriorStandardDeviations[segment] = meanPosteriorStandardDeviation;
final double absoluteDifferenceFromTruth = Math.abs(meanPosteriorCenter - meansTruth.get(segment));
if (absoluteDifferenceFromTruth > meanPosteriorStandardDeviation) {
numMeansOutsideOneSigma++;
}
if (absoluteDifferenceFromTruth > 2 * meanPosteriorStandardDeviation) {
numMeansOutsideTwoSigma++;
}
if (absoluteDifferenceFromTruth > 3 * meanPosteriorStandardDeviation) {
numMeansOutsideThreeSigma++;
}
}
final double meanPosteriorStandardDeviationsMean = new Mean().evaluate(meanPosteriorStandardDeviations);
Assert.assertEquals(numMeansOutsideOneSigma, 100 - 68, DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_1_SIGMA);
Assert.assertEquals(numMeansOutsideTwoSigma, 100 - 95, DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_2_SIGMA);
Assert.assertTrue(numMeansOutsideThreeSigma <= DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_3_SIGMA);
Assert.assertEquals(relativeError(meanPosteriorStandardDeviationsMean, MEAN_POSTERIOR_STANDARD_DEVIATION_MEAN_TRUTH), 0., RELATIVE_ERROR_THRESHOLD);
//check accuracy of latent outlier-indicator posterior samples
final List<CopyRatioState.OutlierIndicators> outlierIndicatorSamples = modeller.getOutlierIndicatorsSamples();
int numIndicatorsCorrect = 0;
final int numIndicatorSamples = outlierIndicatorSamples.size();
final List<Integer> outlierIndicatorsTruthAsInt = loadList(OUTLIER_INDICATORS_TRUTH_FILE, Integer::parseInt);
final List<Boolean> outlierIndicatorsTruth = outlierIndicatorsTruthAsInt.stream().map(i -> i == 1).collect(Collectors.toList());
for (int target = 0; target < coverage.targets().size(); target++) {
int numSamplesOutliers = 0;
for (final CopyRatioState.OutlierIndicators sample : outlierIndicatorSamples) {
if (sample.get(target)) {
numSamplesOutliers++;
}
}
//take predicted state of indicator to be given by the majority of samples
if ((numSamplesOutliers >= numIndicatorSamples / 2.) == outlierIndicatorsTruth.get(target)) {
numIndicatorsCorrect++;
}
}
final double fractionOfOutlierIndicatorsCorrect = (double) numIndicatorsCorrect / coverage.targets().size();
Assert.assertTrue(fractionOfOutlierIndicatorsCorrect >= FRACTION_OF_OUTLIER_INDICATORS_CORRECT_THRESHOLD);
}
use of org.broadinstitute.hellbender.tools.exome.SegmentedGenome in project gatk-protected by broadinstitute.
the class CopyRatioModellerUnitTest method testRunMCMCOnCopyRatioSegmentedGenome.
/**
* Tests Bayesian inference of the copy-ratio model via MCMC.
* <p>
* Recovery of input values for the variance and outlier-probability global parameters is checked.
* In particular, the true input value of the variance must fall within
* {@link CopyRatioModellerUnitTest#MULTIPLES_OF_SD_THRESHOLD}
* standard deviations of the posterior mean and the standard deviation of the posterior must agree
* with the analytic value to within a relative error of
* {@link CopyRatioModellerUnitTest#RELATIVE_ERROR_THRESHOLD} for 250 samples
* (after 250 burn-in samples have been discarded). Similar criteria are applied
* to the recovery of the true input value for the outlier probability.
* </p>
* <p>
* Furthermore, the number of truth values for the segment-level means falling outside confidence intervals of
* 1-sigma, 2-sigma, and 3-sigma given by the posteriors in each segment should be roughly consistent with
* a normal distribution (i.e., ~32, ~5, and ~0, respectively; we allow for errors of
* {@link CopyRatioModellerUnitTest#DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_1_SIGMA},
* {@link CopyRatioModellerUnitTest#DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_2_SIGMA}, and
* {@link CopyRatioModellerUnitTest#DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_3_SIGMA}, respectively).
* The mean of the standard deviations of the posteriors for the segment-level means should also be
* recovered to within a relative error of {@link CopyRatioModellerUnitTest#RELATIVE_ERROR_THRESHOLD}.
* </p>
* <p>
* Finally, the recovered values for the latent outlier-indicator parameters should agree with those used to
* generate the data. For each indicator, the recovered value (i.e., outlier or non-outlier) is taken to be
* that given by the majority of posterior samples. We require that at least
* {@link CopyRatioModellerUnitTest#FRACTION_OF_OUTLIER_INDICATORS_CORRECT_THRESHOLD}
* of the 10000 indicators are recovered correctly.
* </p>
* <p>
* With these specifications, this unit test is not overly brittle (i.e., it should pass for a large majority
* of randomly generated data sets), but it is still brittle enough to check for correctness of the sampling
* (for example, specifying a sufficiently incorrect likelihood will cause the test to fail).
* </p>
*/
@Test
public void testRunMCMCOnCopyRatioSegmentedGenome() throws IOException {
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
LoggingUtils.setLoggingLevel(Log.LogLevel.INFO);
//load data (coverages and number of targets in each segment)
final ReadCountCollection coverage = ReadCountCollectionUtils.parse(COVERAGES_FILE);
//Genome with no SNPs
final Genome genome = new Genome(coverage, Collections.emptyList());
final SegmentedGenome segmentedGenome = new SegmentedGenome(SEGMENT_FILE, genome);
//run MCMC
final CopyRatioModeller modeller = new CopyRatioModeller(segmentedGenome);
modeller.fitMCMC(NUM_SAMPLES, NUM_BURN_IN);
//check statistics of global-parameter posterior samples (i.e., posterior mode and standard deviation)
final Map<CopyRatioParameter, PosteriorSummary> globalParameterPosteriorSummaries = modeller.getGlobalParameterPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
final PosteriorSummary variancePosteriorSummary = globalParameterPosteriorSummaries.get(CopyRatioParameter.VARIANCE);
final double variancePosteriorCenter = variancePosteriorSummary.getCenter();
final double variancePosteriorStandardDeviation = (variancePosteriorSummary.getUpper() - variancePosteriorSummary.getLower()) / 2;
Assert.assertEquals(Math.abs(variancePosteriorCenter - VARIANCE_TRUTH), 0., MULTIPLES_OF_SD_THRESHOLD * VARIANCE_POSTERIOR_STANDARD_DEVIATION_TRUTH);
Assert.assertEquals(relativeError(variancePosteriorStandardDeviation, VARIANCE_POSTERIOR_STANDARD_DEVIATION_TRUTH), 0., RELATIVE_ERROR_THRESHOLD);
final PosteriorSummary outlierProbabilityPosteriorSummary = globalParameterPosteriorSummaries.get(CopyRatioParameter.OUTLIER_PROBABILITY);
final double outlierProbabilityPosteriorCenter = outlierProbabilityPosteriorSummary.getCenter();
final double outlierProbabilityPosteriorStandardDeviation = (outlierProbabilityPosteriorSummary.getUpper() - outlierProbabilityPosteriorSummary.getLower()) / 2;
Assert.assertEquals(Math.abs(outlierProbabilityPosteriorCenter - OUTLIER_PROBABILITY_TRUTH), 0., MULTIPLES_OF_SD_THRESHOLD * OUTLIER_PROBABILITY_POSTERIOR_STANDARD_DEVIATION_TRUTH);
Assert.assertEquals(relativeError(outlierProbabilityPosteriorStandardDeviation, OUTLIER_PROBABILITY_POSTERIOR_STANDARD_DEVIATION_TRUTH), 0., RELATIVE_ERROR_THRESHOLD);
//check statistics of segment-mean posterior samples (i.e., posterior means and standard deviations)
final List<Double> meansTruth = loadList(MEANS_TRUTH_FILE, Double::parseDouble);
int numMeansOutsideOneSigma = 0;
int numMeansOutsideTwoSigma = 0;
int numMeansOutsideThreeSigma = 0;
final int numSegments = meansTruth.size();
//segment-mean posteriors are expected to be Gaussian, so PosteriorSummary for
// {@link CopyRatioModellerUnitTest#CREDIBLE_INTERVAL_ALPHA}=0.32 is
//(posterior mean, posterior mean - posterior standard devation, posterior mean + posterior standard deviation)
final List<PosteriorSummary> meanPosteriorSummaries = modeller.getSegmentMeansPosteriorSummaries(CREDIBLE_INTERVAL_ALPHA, ctx);
final double[] meanPosteriorStandardDeviations = new double[numSegments];
for (int segment = 0; segment < numSegments; segment++) {
final double meanPosteriorCenter = meanPosteriorSummaries.get(segment).getCenter();
final double meanPosteriorStandardDeviation = (meanPosteriorSummaries.get(segment).getUpper() - meanPosteriorSummaries.get(segment).getLower()) / 2.;
meanPosteriorStandardDeviations[segment] = meanPosteriorStandardDeviation;
final double absoluteDifferenceFromTruth = Math.abs(meanPosteriorCenter - meansTruth.get(segment));
if (absoluteDifferenceFromTruth > meanPosteriorStandardDeviation) {
numMeansOutsideOneSigma++;
}
if (absoluteDifferenceFromTruth > 2 * meanPosteriorStandardDeviation) {
numMeansOutsideTwoSigma++;
}
if (absoluteDifferenceFromTruth > 3 * meanPosteriorStandardDeviation) {
numMeansOutsideThreeSigma++;
}
}
final double meanPosteriorStandardDeviationsMean = new Mean().evaluate(meanPosteriorStandardDeviations);
Assert.assertEquals(numMeansOutsideOneSigma, 100 - 68, DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_1_SIGMA);
Assert.assertEquals(numMeansOutsideTwoSigma, 100 - 95, DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_2_SIGMA);
Assert.assertTrue(numMeansOutsideThreeSigma <= DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_3_SIGMA);
Assert.assertEquals(relativeError(meanPosteriorStandardDeviationsMean, MEAN_POSTERIOR_STANDARD_DEVIATION_MEAN_TRUTH), 0., RELATIVE_ERROR_THRESHOLD);
//check accuracy of latent outlier-indicator posterior samples
final List<CopyRatioState.OutlierIndicators> outlierIndicatorSamples = modeller.getOutlierIndicatorsSamples();
int numIndicatorsCorrect = 0;
final int numIndicatorSamples = outlierIndicatorSamples.size();
final List<Integer> outlierIndicatorsTruthAsInt = loadList(OUTLIER_INDICATORS_TRUTH_FILE, Integer::parseInt);
final List<Boolean> outlierIndicatorsTruth = outlierIndicatorsTruthAsInt.stream().map(i -> i == 1).collect(Collectors.toList());
for (int target = 0; target < coverage.targets().size(); target++) {
int numSamplesOutliers = 0;
for (final CopyRatioState.OutlierIndicators sample : outlierIndicatorSamples) {
if (sample.get(target)) {
numSamplesOutliers++;
}
}
//take predicted state of indicator to be given by the majority of samples
if ((numSamplesOutliers >= numIndicatorSamples / 2.) == outlierIndicatorsTruth.get(target)) {
numIndicatorsCorrect++;
}
}
final double fractionOfOutlierIndicatorsCorrect = (double) numIndicatorsCorrect / coverage.targets().size();
Assert.assertTrue(fractionOfOutlierIndicatorsCorrect >= FRACTION_OF_OUTLIER_INDICATORS_CORRECT_THRESHOLD);
}
use of org.broadinstitute.hellbender.tools.exome.SegmentedGenome in project gatk-protected by broadinstitute.
the class AlleleFractionDataUnitTest method testData.
@Test
public void testData() {
final List<AllelicCount> ac = new ArrayList<>();
final List<SimpleInterval> segments = new ArrayList<>();
// segment 0: hets 0-2
segments.add(new SimpleInterval("chr", 1, 5));
ac.add(new AllelicCount(new SimpleInterval("chr", 1, 1), 0, 5));
ac.add(new AllelicCount(new SimpleInterval("chr", 2, 2), 5, 0));
ac.add(new AllelicCount(new SimpleInterval("chr", 3, 3), 5, 5));
// segment 1: hets 3-4
segments.add(new SimpleInterval("chr", 10, 15));
ac.add(new AllelicCount(new SimpleInterval("chr", 10, 10), 1, 1));
ac.add(new AllelicCount(new SimpleInterval("chr", 11, 11), 2, 2));
final Genome genome = new Genome(AlleleFractionSimulatedData.TRIVIAL_TARGETS, ac);
final SegmentedGenome segmentedGenome = new SegmentedGenome(segments, genome);
final AlleleFractionData dc = new AlleleFractionData(segmentedGenome);
Assert.assertEquals(dc.getNumSegments(), 2);
Assert.assertEquals(dc.getRefCount(0), 0);
Assert.assertEquals(dc.getAltCount(0), 5);
Assert.assertEquals(dc.getReadCount(2), 10);
Assert.assertEquals(dc.getReadCount(3), 2);
Assert.assertEquals(dc.getRefCount(4), 2);
Assert.assertEquals(dc.getAltCount(4), 2);
Assert.assertEquals(dc.getAllelicCount(0).getAltReadCount(), 5);
Assert.assertEquals(dc.getAllelicCount(1).getAltReadCount(), 0);
Assert.assertEquals(dc.getAllelicCount(3).getRefReadCount(), 1);
Assert.assertEquals(dc.getAllelicCount(4).getRefReadCount(), 2);
Assert.assertEquals(dc.getCountsInSegment(0).get(1).getRefReadCount(), 5);
Assert.assertEquals(dc.getCountsInSegment(0).get(1).getAltReadCount(), 0);
final List<Integer> hetsInSegment0 = dc.getHetsInSegment(0);
Assert.assertEquals(hetsInSegment0.size(), 3);
Assert.assertEquals((int) hetsInSegment0.get(0), 0);
Assert.assertEquals((int) hetsInSegment0.get(2), 2);
final List<Integer> hetsInSegment1 = dc.getHetsInSegment(1);
Assert.assertEquals(hetsInSegment1.size(), 2);
Assert.assertEquals((int) hetsInSegment1.get(0), 3);
Assert.assertEquals((int) hetsInSegment1.get(1), 4);
}
Aggregations