Search in sources :

Example 11 with Segment

use of org.apache.commons.math3.geometry.euclidean.twod.Segment in project gatk by broadinstitute.

the class AlleleFractionSegmenterUnitTest method testChromosomesOnDifferentSegments.

@Test
public void testChromosomesOnDifferentSegments() {
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
    final double[] trueMinorAlleleFractions = new double[] { 0.12, 0.32, 0.5 };
    final double trueMemoryLength = 1e5;
    final AlleleFractionGlobalParameters trueParams = new AlleleFractionGlobalParameters(1.0, 0.01, 0.01);
    // randomly set positions
    final int chainLength = 100;
    final List<SimpleInterval> positions = CopyRatioSegmenterUnitTest.randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
    positions.addAll(CopyRatioSegmenterUnitTest.randomPositions("chr2", chainLength, rng, trueMemoryLength / 4));
    positions.addAll(CopyRatioSegmenterUnitTest.randomPositions("chr3", chainLength, rng, trueMemoryLength / 4));
    //fix everything to the same state 2
    final int trueState = 2;
    final List<Double> minorAlleleFractionSequence = Collections.nCopies(positions.size(), trueMinorAlleleFractions[trueState]);
    final AllelicCountCollection counts = generateCounts(minorAlleleFractionSequence, positions, rng, trueParams);
    final AlleleFractionSegmenter segmenter = new AlleleFractionSegmenter(10, counts, AllelicPanelOfNormals.EMPTY_PON);
    final List<ModeledSegment> segments = segmenter.getModeledSegments();
    //check that each chromosome has at least one segment
    final int numDifferentContigsInSegments = (int) segments.stream().map(ModeledSegment::getContig).distinct().count();
    Assert.assertEquals(numDifferentContigsInSegments, 3);
}
Also used : AlleleFractionGlobalParameters(org.broadinstitute.hellbender.tools.exome.allelefraction.AlleleFractionGlobalParameters) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) Random(java.util.Random) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) ModeledSegment(org.broadinstitute.hellbender.tools.exome.ModeledSegment) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Test(org.testng.annotations.Test)

Example 12 with Segment

use of org.apache.commons.math3.geometry.euclidean.twod.Segment in project gatk by broadinstitute.

the class CopyRatioSegmenterUnitTest method testChromosomesOnDifferentSegments.

@Test
public void testChromosomesOnDifferentSegments() {
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
    final double[] trueLog2CopyRatios = new double[] { -2.0, 0.0, 1.7 };
    final double trueMemoryLength = 1e5;
    final double trueStandardDeviation = 0.2;
    // randomly set positions
    final int chainLength = 100;
    final List<SimpleInterval> positions = randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
    positions.addAll(randomPositions("chr2", chainLength, rng, trueMemoryLength / 4));
    positions.addAll(randomPositions("chr3", chainLength, rng, trueMemoryLength / 4));
    //fix everything to the same state 2
    final int trueState = 2;
    final List<Double> data = new ArrayList<>();
    for (int n = 0; n < positions.size(); n++) {
        final double copyRatio = trueLog2CopyRatios[trueState];
        final double observed = generateData(trueStandardDeviation, copyRatio, rng);
        data.add(observed);
    }
    final List<Target> targets = positions.stream().map(Target::new).collect(Collectors.toList());
    final ReadCountCollection rcc = new ReadCountCollection(targets, Arrays.asList("SAMPLE"), new Array2DRowRealMatrix(data.stream().mapToDouble(x -> x).toArray()));
    final CopyRatioSegmenter segmenter = new CopyRatioSegmenter(10, rcc);
    final List<ModeledSegment> segments = segmenter.getModeledSegments();
    //check that each chromosome has at least one segment
    final int numDifferentContigsInSegments = (int) segments.stream().map(ModeledSegment::getContig).distinct().count();
    Assert.assertEquals(numDifferentContigsInSegments, 3);
}
Also used : IntStream(java.util.stream.IntStream) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) java.util(java.util) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) ModeledSegment(org.broadinstitute.hellbender.tools.exome.ModeledSegment) Assert(org.testng.Assert) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) RandomGeneratorFactory(org.apache.commons.math3.random.RandomGeneratorFactory) Target(org.broadinstitute.hellbender.tools.exome.Target) Test(org.testng.annotations.Test) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) ReadCountCollection(org.broadinstitute.hellbender.tools.exome.ReadCountCollection) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) Target(org.broadinstitute.hellbender.tools.exome.Target) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) ModeledSegment(org.broadinstitute.hellbender.tools.exome.ModeledSegment) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Test(org.testng.annotations.Test)

Example 13 with Segment

use of org.apache.commons.math3.geometry.euclidean.twod.Segment in project gatk-protected by broadinstitute.

the class AlleleFractionSegmenterUnitTest method testChromosomesOnDifferentSegments.

@Test
public void testChromosomesOnDifferentSegments() {
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(563));
    final double[] trueMinorAlleleFractions = new double[] { 0.12, 0.32, 0.5 };
    final double trueMemoryLength = 1e5;
    final AlleleFractionGlobalParameters trueParams = new AlleleFractionGlobalParameters(1.0, 0.01, 0.01);
    // randomly set positions
    final int chainLength = 100;
    final List<SimpleInterval> positions = CopyRatioSegmenterUnitTest.randomPositions("chr1", chainLength, rng, trueMemoryLength / 4);
    positions.addAll(CopyRatioSegmenterUnitTest.randomPositions("chr2", chainLength, rng, trueMemoryLength / 4));
    positions.addAll(CopyRatioSegmenterUnitTest.randomPositions("chr3", chainLength, rng, trueMemoryLength / 4));
    //fix everything to the same state 2
    final int trueState = 2;
    final List<Double> minorAlleleFractionSequence = Collections.nCopies(positions.size(), trueMinorAlleleFractions[trueState]);
    final AllelicCountCollection counts = generateCounts(minorAlleleFractionSequence, positions, rng, trueParams);
    final AlleleFractionSegmenter segmenter = new AlleleFractionSegmenter(10, counts, AllelicPanelOfNormals.EMPTY_PON);
    final List<ModeledSegment> segments = segmenter.getModeledSegments();
    //check that each chromosome has at least one segment
    final int numDifferentContigsInSegments = (int) segments.stream().map(ModeledSegment::getContig).distinct().count();
    Assert.assertEquals(numDifferentContigsInSegments, 3);
}
Also used : AlleleFractionGlobalParameters(org.broadinstitute.hellbender.tools.exome.allelefraction.AlleleFractionGlobalParameters) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) Random(java.util.Random) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) ModeledSegment(org.broadinstitute.hellbender.tools.exome.ModeledSegment) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Test(org.testng.annotations.Test)

Example 14 with Segment

use of org.apache.commons.math3.geometry.euclidean.twod.Segment in project gatk-protected by broadinstitute.

the class CoverageDropoutDetector method retrieveGaussianMixtureModelForFilteredTargets.

/** <p>Produces a Gaussian mixture model based on the difference between targets and segment means.</p>
     * <p>Filters targets to populations where more than the minProportion lie in a single segment.</p>
     * <p>Returns null if no pass filtering.  Please note that in these cases,
     * in the rest of this class, we use this to assume that a GMM is not a good model.</p>
     *
     * @param segments  -- segments with segment mean in log2 copy ratio space
     * @param targets -- targets with a log2 copy ratio estimate
     * @param minProportion -- minimum proportion of all targets that a given segment must have in order to be used
     *                      in the evaluation
     * @param numComponents -- number of components to use in the GMM.  Usually, this is 2.
     * @return  never {@code null}.  Fitting result with indications whether it converged or was even attempted.
     */
private MixtureMultivariateNormalFitResult retrieveGaussianMixtureModelForFilteredTargets(final List<ModeledSegment> segments, final TargetCollection<ReadCountRecord.SingleSampleRecord> targets, double minProportion, int numComponents) {
    // For each target in a segment that contains enough targets, normalize the difference against the segment mean
    //  and collapse the filtered targets into the copy ratio estimates.
    final List<Double> filteredTargetsSegDiff = getNumProbeFilteredTargetList(segments, targets, minProportion);
    if (filteredTargetsSegDiff.size() < numComponents) {
        return new MixtureMultivariateNormalFitResult(null, false, false);
    }
    // Assume that Apache Commons wants data points in the first dimension.
    // Note that second dimension of length 2 (instead of 1) is to wrok around funny Apache commons API.
    final double[][] filteredTargetsSegDiff2d = new double[filteredTargetsSegDiff.size()][2];
    // Convert the filtered targets into 2d array (even if second dimension is length 1).  The second dimension is
    //  uncorrelated Gaussian.  This is only to get around funny API in Apache Commons, which will throw an
    //  exception if the length of the second dimension is < 2
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));
    final NormalDistribution nd = new NormalDistribution(rng, 0, .1);
    for (int i = 0; i < filteredTargetsSegDiff.size(); i++) {
        filteredTargetsSegDiff2d[i][0] = filteredTargetsSegDiff.get(i);
        filteredTargetsSegDiff2d[i][1] = nd.sample();
    }
    final MixtureMultivariateNormalDistribution estimateEM0 = MultivariateNormalMixtureExpectationMaximization.estimate(filteredTargetsSegDiff2d, numComponents);
    final MultivariateNormalMixtureExpectationMaximization multivariateNormalMixtureExpectationMaximization = new MultivariateNormalMixtureExpectationMaximization(filteredTargetsSegDiff2d);
    try {
        multivariateNormalMixtureExpectationMaximization.fit(estimateEM0);
    } catch (final MaxCountExceededException | ConvergenceException | SingularMatrixException e) {
        //  did not converge.  Include the model as it was when the exception was thrown.
        return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), false, true);
    }
    return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), true, true);
}
Also used : RandomGenerator(org.apache.commons.math3.random.RandomGenerator) MaxCountExceededException(org.apache.commons.math3.exception.MaxCountExceededException) MixtureMultivariateNormalDistribution(org.apache.commons.math3.distribution.MixtureMultivariateNormalDistribution) Random(java.util.Random) NormalDistribution(org.apache.commons.math3.distribution.NormalDistribution) MixtureMultivariateNormalDistribution(org.apache.commons.math3.distribution.MixtureMultivariateNormalDistribution) ConvergenceException(org.apache.commons.math3.exception.ConvergenceException) SingularMatrixException(org.apache.commons.math3.linear.SingularMatrixException) MultivariateNormalMixtureExpectationMaximization(org.apache.commons.math3.distribution.fitting.MultivariateNormalMixtureExpectationMaximization)

Example 15 with Segment

use of org.apache.commons.math3.geometry.euclidean.twod.Segment in project gatk-protected by broadinstitute.

the class GibbsSamplerCopyRatioUnitTest method testRunMCMCOnCopyRatioSegmentedGenome.

/**
     * Tests Bayesian inference of a toy copy-ratio model via MCMC.
     * <p>
     *     Recovery of input values for the variance global parameter and the segment-level mean parameters is checked.
     *     In particular, the mean and standard deviation of the posterior for the variance must be recovered to within
     *     a relative error of 1% and 5%, respectively, in 500 samples (after 250 burn-in samples have been discarded).
     * </p>
     * <p>
     *     Furthermore, the number of truth values for the segment-level means falling outside confidence intervals of
     *     1-sigma, 2-sigma, and 3-sigma given by the posteriors in each segment should be roughly consistent with
     *     a normal distribution (i.e., ~32, ~5, and ~0, respectively; we allow for errors of 10, 5, and 2).
     *     Finally, the mean of the standard deviations of the posteriors for the segment-level means should be
     *     recovered to within a relative error of 5%.
     * </p>
     * <p>
     *     With these specifications, this unit test is not overly brittle (i.e., it should pass for a large majority
     *     of randomly generated data sets), but it is still brittle enough to check for correctness of the sampling
     *     (for example, specifying a sufficiently incorrect likelihood will cause the test to fail).
     * </p>
     */
@Test
public void testRunMCMCOnCopyRatioSegmentedGenome() {
    //Create new instance of the Modeller helper class, passing all quantities needed to initialize state and data.
    final CopyRatioModeller modeller = new CopyRatioModeller(VARIANCE_INITIAL, MEAN_INITIAL, COVERAGES_FILE, NUM_TARGETS_PER_SEGMENT_FILE);
    //Create a GibbsSampler, passing the total number of samples (including burn-in samples)
    //and the model held by the Modeller.
    final GibbsSampler<CopyRatioParameter, CopyRatioState, CopyRatioDataCollection> gibbsSampler = new GibbsSampler<>(NUM_SAMPLES, modeller.model);
    //Run the MCMC.
    gibbsSampler.runMCMC();
    //Check that the statistics---i.e., the mean and standard deviation---of the variance posterior
    //agree with those found by emcee/analytically to a relative error of 1% and 5%, respectively.
    final double[] varianceSamples = Doubles.toArray(gibbsSampler.getSamples(CopyRatioParameter.VARIANCE, Double.class, NUM_BURN_IN));
    final double variancePosteriorCenter = new Mean().evaluate(varianceSamples);
    final double variancePosteriorStandardDeviation = new StandardDeviation().evaluate(varianceSamples);
    Assert.assertEquals(relativeError(variancePosteriorCenter, VARIANCE_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_CENTERS);
    Assert.assertEquals(relativeError(variancePosteriorStandardDeviation, VARIANCE_POSTERIOR_STANDARD_DEVIATION_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
    //Check statistics---i.e., the mean and standard deviation---of the segment-level mean posteriors.
    //In particular, check that the number of segments where the true mean falls outside confidence intervals
    //is roughly consistent with a normal distribution.
    final List<Double> meansTruth = loadList(MEANS_TRUTH_FILE, Double::parseDouble);
    final int numSegments = meansTruth.size();
    final List<SegmentMeans> meansSamples = gibbsSampler.getSamples(CopyRatioParameter.SEGMENT_MEANS, SegmentMeans.class, NUM_BURN_IN);
    int numMeansOutsideOneSigma = 0;
    int numMeansOutsideTwoSigma = 0;
    int numMeansOutsideThreeSigma = 0;
    final List<Double> meanPosteriorStandardDeviations = new ArrayList<>();
    for (int segment = 0; segment < numSegments; segment++) {
        final int j = segment;
        final double[] meanInSegmentSamples = Doubles.toArray(meansSamples.stream().map(s -> s.get(j)).collect(Collectors.toList()));
        final double meanPosteriorCenter = new Mean().evaluate(meanInSegmentSamples);
        final double meanPosteriorStandardDeviation = new StandardDeviation().evaluate(meanInSegmentSamples);
        meanPosteriorStandardDeviations.add(meanPosteriorStandardDeviation);
        final double absoluteDifferenceFromTruth = Math.abs(meanPosteriorCenter - meansTruth.get(segment));
        if (absoluteDifferenceFromTruth > meanPosteriorStandardDeviation) {
            numMeansOutsideOneSigma++;
        }
        if (absoluteDifferenceFromTruth > 2 * meanPosteriorStandardDeviation) {
            numMeansOutsideTwoSigma++;
        }
        if (absoluteDifferenceFromTruth > 3 * meanPosteriorStandardDeviation) {
            numMeansOutsideThreeSigma++;
        }
    }
    final double meanPosteriorStandardDeviationsMean = new Mean().evaluate(Doubles.toArray(meanPosteriorStandardDeviations));
    Assert.assertEquals(numMeansOutsideOneSigma, 100 - 68, DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_1_SIGMA);
    Assert.assertEquals(numMeansOutsideTwoSigma, 100 - 95, DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_2_SIGMA);
    Assert.assertTrue(numMeansOutsideThreeSigma <= DELTA_NUMBER_OF_MEANS_ALLOWED_OUTSIDE_3_SIGMA);
    Assert.assertEquals(relativeError(meanPosteriorStandardDeviationsMean, MEAN_POSTERIOR_STANDARD_DEVIATION_MEAN_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
}
Also used : Mean(org.apache.commons.math3.stat.descriptive.moment.Mean) ArrayList(java.util.ArrayList) StandardDeviation(org.apache.commons.math3.stat.descriptive.moment.StandardDeviation) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

Test (org.testng.annotations.Test)12 Collectors (java.util.stream.Collectors)8 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)8 RealMatrix (org.apache.commons.math3.linear.RealMatrix)8 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)8 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)8 ArrayList (java.util.ArrayList)6 List (java.util.List)6 RandomGenerator (org.apache.commons.math3.random.RandomGenerator)6 AllelicCount (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)5 File (java.io.File)4 IOException (java.io.IOException)4 Collections (java.util.Collections)4 Random (java.util.Random)4 Function (java.util.function.Function)4 IntStream (java.util.stream.IntStream)4 NormalDistribution (org.apache.commons.math3.distribution.NormalDistribution)4 MaxCountExceededException (org.apache.commons.math3.exception.MaxCountExceededException)4 Mean (org.apache.commons.math3.stat.descriptive.moment.Mean)4 Utils (org.broadinstitute.hellbender.utils.Utils)4