use of org.apache.commons.math3.random.RandomGenerator in project gatk-protected by broadinstitute.
the class AdaptiveMetropolisSamplerUnitTest method testGaussian.
@Test
public void testGaussian() {
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));
for (final double theoreticalMean : Arrays.asList(0)) {
for (final double precision : Arrays.asList(1.0)) {
final double variance = 1 / precision;
//Note: this is the theoretical standard deviation of the sample mean given uncorrelated
//samples. The sample mean will have a greater variance here because samples are correlated.
final double standardDeviationOfMean = Math.sqrt(variance / NUM_SAMPLES);
final Function<Double, Double> logPDF = x -> -(precision / 2) * (x - theoreticalMean) * (x - theoreticalMean);
final AdaptiveMetropolisSampler sampler = new AdaptiveMetropolisSampler(INITIAL_GAUSSIAN_GUESS, INITIAL_STEP_SIZE, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY);
final List<Double> samples = sampler.sample(rng, logPDF, NUM_SAMPLES, NUM_BURN_IN_STEPS);
final double sampleMean = samples.stream().mapToDouble(x -> x).average().getAsDouble();
final double sampleMeanSquare = samples.stream().mapToDouble(x -> x * x).average().getAsDouble();
final double sampleVariance = (sampleMeanSquare - sampleMean * sampleMean) * NUM_SAMPLES / (NUM_SAMPLES - 1);
Assert.assertEquals(sampleMean, theoreticalMean, 6 * standardDeviationOfMean);
Assert.assertEquals(sampleVariance, variance, variance / 10);
}
}
}
use of org.apache.commons.math3.random.RandomGenerator in project gatk-protected by broadinstitute.
the class GATKProtectedMathUtilsTest method testRandomSelectFlatProbability.
@Test
public void testRandomSelectFlatProbability() {
final RandomGenerator rg = RandomGeneratorFactory.createRandomGenerator(new Random(13));
final int NUM_SAMPLES = 1000;
final List<Integer> choices = Arrays.asList(0, 1, 2);
final List<Integer> result = IntStream.range(0, NUM_SAMPLES).map(n -> GATKProtectedMathUtils.randomSelect(choices, j -> 1.0 / choices.size(), rg)).boxed().collect(Collectors.toList());
Assert.assertEquals(result.stream().filter(n -> n == 0).count(), NUM_SAMPLES / choices.size(), 50);
}
use of org.apache.commons.math3.random.RandomGenerator in project gatk-protected by broadinstitute.
the class GATKProtectedMathUtilsTest method testRandomSelect.
@Test
public void testRandomSelect() {
final RandomGenerator rg = RandomGeneratorFactory.createRandomGenerator(new Random(13));
final int NUM_SAMPLES = 1000;
final List<Integer> choices = Arrays.asList(-1, 0, 1);
final List<Integer> result = IntStream.range(0, NUM_SAMPLES).map(n -> GATKProtectedMathUtils.randomSelect(choices, j -> j * j / 2.0, rg)).boxed().collect(Collectors.toList());
Assert.assertEquals(result.stream().filter(n -> n == 0).count(), 0);
Assert.assertEquals(result.stream().filter(n -> n == 1).count(), NUM_SAMPLES / 2, 50);
}
use of org.apache.commons.math3.random.RandomGenerator in project gatk by broadinstitute.
the class CoverageDropoutDetector method retrieveGaussianMixtureModelForFilteredTargets.
/** <p>Produces a Gaussian mixture model based on the difference between targets and segment means.</p>
* <p>Filters targets to populations where more than the minProportion lie in a single segment.</p>
* <p>Returns null if no pass filtering. Please note that in these cases,
* in the rest of this class, we use this to assume that a GMM is not a good model.</p>
*
* @param segments -- segments with segment mean in log2 copy ratio space
* @param targets -- targets with a log2 copy ratio estimate
* @param minProportion -- minimum proportion of all targets that a given segment must have in order to be used
* in the evaluation
* @param numComponents -- number of components to use in the GMM. Usually, this is 2.
* @return never {@code null}. Fitting result with indications whether it converged or was even attempted.
*/
private MixtureMultivariateNormalFitResult retrieveGaussianMixtureModelForFilteredTargets(final List<ModeledSegment> segments, final TargetCollection<ReadCountRecord.SingleSampleRecord> targets, double minProportion, int numComponents) {
// For each target in a segment that contains enough targets, normalize the difference against the segment mean
// and collapse the filtered targets into the copy ratio estimates.
final List<Double> filteredTargetsSegDiff = getNumProbeFilteredTargetList(segments, targets, minProportion);
if (filteredTargetsSegDiff.size() < numComponents) {
return new MixtureMultivariateNormalFitResult(null, false, false);
}
// Assume that Apache Commons wants data points in the first dimension.
// Note that second dimension of length 2 (instead of 1) is to wrok around funny Apache commons API.
final double[][] filteredTargetsSegDiff2d = new double[filteredTargetsSegDiff.size()][2];
// Convert the filtered targets into 2d array (even if second dimension is length 1). The second dimension is
// uncorrelated Gaussian. This is only to get around funny API in Apache Commons, which will throw an
// exception if the length of the second dimension is < 2
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));
final NormalDistribution nd = new NormalDistribution(rng, 0, .1);
for (int i = 0; i < filteredTargetsSegDiff.size(); i++) {
filteredTargetsSegDiff2d[i][0] = filteredTargetsSegDiff.get(i);
filteredTargetsSegDiff2d[i][1] = nd.sample();
}
final MixtureMultivariateNormalDistribution estimateEM0 = MultivariateNormalMixtureExpectationMaximization.estimate(filteredTargetsSegDiff2d, numComponents);
final MultivariateNormalMixtureExpectationMaximization multivariateNormalMixtureExpectationMaximization = new MultivariateNormalMixtureExpectationMaximization(filteredTargetsSegDiff2d);
try {
multivariateNormalMixtureExpectationMaximization.fit(estimateEM0);
} catch (final MaxCountExceededException | ConvergenceException | SingularMatrixException e) {
// did not converge. Include the model as it was when the exception was thrown.
return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), false, true);
}
return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), true, true);
}
use of org.apache.commons.math3.random.RandomGenerator in project gatk by broadinstitute.
the class GATKProtectedMathUtilsTest method testRandomSelectFlatProbability.
@Test
public void testRandomSelectFlatProbability() {
final RandomGenerator rg = RandomGeneratorFactory.createRandomGenerator(new Random(13));
final int NUM_SAMPLES = 1000;
final List<Integer> choices = Arrays.asList(0, 1, 2);
final List<Integer> result = IntStream.range(0, NUM_SAMPLES).map(n -> GATKProtectedMathUtils.randomSelect(choices, j -> 1.0 / choices.size(), rg)).boxed().collect(Collectors.toList());
Assert.assertEquals(result.stream().filter(n -> n == 0).count(), NUM_SAMPLES / choices.size(), 50);
}
Aggregations