use of org.apache.commons.math3.analysis.function.Gaussian in project GDSC-SMLM by aherbert.
the class FIRE method fitGaussian.
/**
* Fit gaussian.
*
* @param x
* the x
* @param y
* the y
* @return new double[] { norm, mean, sigma }
*/
private double[] fitGaussian(float[] x, float[] y) {
WeightedObservedPoints obs = new WeightedObservedPoints();
for (int i = 0; i < x.length; i++) obs.add(x[i], y[i]);
Collection<WeightedObservedPoint> observations = obs.toList();
GaussianCurveFitter fitter = GaussianCurveFitter.create().withMaxIterations(2000);
GaussianCurveFitter.ParameterGuesser guess = new GaussianCurveFitter.ParameterGuesser(observations);
double[] initialGuess = null;
try {
initialGuess = guess.guess();
return fitter.withStartPoint(initialGuess).fit(observations);
} catch (TooManyEvaluationsException e) {
// Use the initial estimate
return initialGuess;
} catch (Exception e) {
// Just in case there is another exception type, or the initial estimate failed
return null;
}
}
use of org.apache.commons.math3.analysis.function.Gaussian in project gatk-protected by broadinstitute.
the class CoverageDropoutDetector method retrieveGaussianMixtureModelForFilteredTargets.
/** <p>Produces a Gaussian mixture model based on the difference between targets and segment means.</p>
* <p>Filters targets to populations where more than the minProportion lie in a single segment.</p>
* <p>Returns null if no pass filtering. Please note that in these cases,
* in the rest of this class, we use this to assume that a GMM is not a good model.</p>
*
* @param segments -- segments with segment mean in log2 copy ratio space
* @param targets -- targets with a log2 copy ratio estimate
* @param minProportion -- minimum proportion of all targets that a given segment must have in order to be used
* in the evaluation
* @param numComponents -- number of components to use in the GMM. Usually, this is 2.
* @return never {@code null}. Fitting result with indications whether it converged or was even attempted.
*/
private MixtureMultivariateNormalFitResult retrieveGaussianMixtureModelForFilteredTargets(final List<ModeledSegment> segments, final TargetCollection<ReadCountRecord.SingleSampleRecord> targets, double minProportion, int numComponents) {
// For each target in a segment that contains enough targets, normalize the difference against the segment mean
// and collapse the filtered targets into the copy ratio estimates.
final List<Double> filteredTargetsSegDiff = getNumProbeFilteredTargetList(segments, targets, minProportion);
if (filteredTargetsSegDiff.size() < numComponents) {
return new MixtureMultivariateNormalFitResult(null, false, false);
}
// Assume that Apache Commons wants data points in the first dimension.
// Note that second dimension of length 2 (instead of 1) is to wrok around funny Apache commons API.
final double[][] filteredTargetsSegDiff2d = new double[filteredTargetsSegDiff.size()][2];
// Convert the filtered targets into 2d array (even if second dimension is length 1). The second dimension is
// uncorrelated Gaussian. This is only to get around funny API in Apache Commons, which will throw an
// exception if the length of the second dimension is < 2
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));
final NormalDistribution nd = new NormalDistribution(rng, 0, .1);
for (int i = 0; i < filteredTargetsSegDiff.size(); i++) {
filteredTargetsSegDiff2d[i][0] = filteredTargetsSegDiff.get(i);
filteredTargetsSegDiff2d[i][1] = nd.sample();
}
final MixtureMultivariateNormalDistribution estimateEM0 = MultivariateNormalMixtureExpectationMaximization.estimate(filteredTargetsSegDiff2d, numComponents);
final MultivariateNormalMixtureExpectationMaximization multivariateNormalMixtureExpectationMaximization = new MultivariateNormalMixtureExpectationMaximization(filteredTargetsSegDiff2d);
try {
multivariateNormalMixtureExpectationMaximization.fit(estimateEM0);
} catch (final MaxCountExceededException | ConvergenceException | SingularMatrixException e) {
// did not converge. Include the model as it was when the exception was thrown.
return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), false, true);
}
return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), true, true);
}
use of org.apache.commons.math3.analysis.function.Gaussian in project gatk-protected by broadinstitute.
the class CNLOHCaller method calculateDoubleGaussian.
/**
* This function takes two half-Gaussian distributions and uses one for the values below the mode and the other for values above the
* mode.
*
* If the mode is outside the credible interval, the calculation is done as if the interval boundary is
* {@link CNLOHCaller#MIN_DIST_FROM_MODE} above/below the mode.
*
*
* @param val value to calculate the "pdf"
* @param credibleMode mode of the presumed distribution
* @param credibleLow 95% confidence interval on the low tail
* @param credibleHigh 95% confidence interval on the high tail
* @return pdf with a minimum value of {@link CNLOHCaller#MIN_L}
*/
@VisibleForTesting
static double calculateDoubleGaussian(final double val, final double credibleMode, final double credibleLow, final double credibleHigh) {
final double hiDist = Math.max(credibleHigh - credibleMode, MIN_DIST_FROM_MODE);
final double loDist = Math.max(credibleMode - credibleLow, MIN_DIST_FROM_MODE);
return new NormalDistribution(null, credibleMode, (val >= credibleMode ? hiDist : loDist) / NUM_STD_95_CONF_INTERVAL_GAUSSIAN).density(val);
}
use of org.apache.commons.math3.analysis.function.Gaussian in project gatk-protected by broadinstitute.
the class GibbsSamplerSingleGaussianUnitTest method testRunMCMCOnSingleGaussianModel.
/**
* Tests Bayesian inference of a Gaussian model via MCMC. Recovery of input values for the variance and mean
* global parameters is checked. In particular, the mean and standard deviation of the posteriors for
* both parameters must be recovered to within a relative error of 1% and 10%, respectively, in 250 samples
* (after 250 burn-in samples have been discarded).
*/
@Test
public void testRunMCMCOnSingleGaussianModel() {
//Create new instance of the Modeller helper class, passing all quantities needed to initialize state and data.
final GaussianModeller modeller = new GaussianModeller(VARIANCE_INITIAL, MEAN_INITIAL, datapointsList);
//Create a GibbsSampler, passing the total number of samples (including burn-in samples)
//and the model held by the Modeller.
final GibbsSampler<GaussianParameter, ParameterizedState<GaussianParameter>, GaussianDataCollection> gibbsSampler = new GibbsSampler<>(NUM_SAMPLES, modeller.model);
//Run the MCMC.
gibbsSampler.runMCMC();
//Get the samples of each of the parameter posteriors (discarding burn-in samples) by passing the
//parameter name, type, and burn-in number to the getSamples method.
final double[] varianceSamples = Doubles.toArray(gibbsSampler.getSamples(GaussianParameter.VARIANCE, Double.class, NUM_BURN_IN));
final double[] meanSamples = Doubles.toArray(gibbsSampler.getSamples(GaussianParameter.MEAN, Double.class, NUM_BURN_IN));
//Check that the statistics---i.e., the means and standard deviations---of the posteriors
//agree with those found by emcee/analytically to a relative error of 1% and 10%, respectively.
final double variancePosteriorCenter = new Mean().evaluate(varianceSamples);
final double variancePosteriorStandardDeviation = new StandardDeviation().evaluate(varianceSamples);
Assert.assertEquals(relativeError(variancePosteriorCenter, VARIANCE_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_CENTERS);
Assert.assertEquals(relativeError(variancePosteriorStandardDeviation, VARIANCE_POSTERIOR_STANDARD_DEVIATION_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
final double meanPosteriorCenter = new Mean().evaluate(meanSamples);
final double meanPosteriorStandardDeviation = new StandardDeviation().evaluate(meanSamples);
Assert.assertEquals(relativeError(meanPosteriorCenter, MEAN_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_CENTERS);
Assert.assertEquals(relativeError(meanPosteriorStandardDeviation, MEAN_POSTERIOR_STANDARD_DEVIATION_TRUTH), 0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
}
use of org.apache.commons.math3.analysis.function.Gaussian in project gatk by broadinstitute.
the class CoverageDropoutDetector method retrieveGaussianMixtureModelForFilteredTargets.
/** <p>Produces a Gaussian mixture model based on the difference between targets and segment means.</p>
* <p>Filters targets to populations where more than the minProportion lie in a single segment.</p>
* <p>Returns null if no pass filtering. Please note that in these cases,
* in the rest of this class, we use this to assume that a GMM is not a good model.</p>
*
* @param segments -- segments with segment mean in log2 copy ratio space
* @param targets -- targets with a log2 copy ratio estimate
* @param minProportion -- minimum proportion of all targets that a given segment must have in order to be used
* in the evaluation
* @param numComponents -- number of components to use in the GMM. Usually, this is 2.
* @return never {@code null}. Fitting result with indications whether it converged or was even attempted.
*/
private MixtureMultivariateNormalFitResult retrieveGaussianMixtureModelForFilteredTargets(final List<ModeledSegment> segments, final TargetCollection<ReadCountRecord.SingleSampleRecord> targets, double minProportion, int numComponents) {
// For each target in a segment that contains enough targets, normalize the difference against the segment mean
// and collapse the filtered targets into the copy ratio estimates.
final List<Double> filteredTargetsSegDiff = getNumProbeFilteredTargetList(segments, targets, minProportion);
if (filteredTargetsSegDiff.size() < numComponents) {
return new MixtureMultivariateNormalFitResult(null, false, false);
}
// Assume that Apache Commons wants data points in the first dimension.
// Note that second dimension of length 2 (instead of 1) is to wrok around funny Apache commons API.
final double[][] filteredTargetsSegDiff2d = new double[filteredTargetsSegDiff.size()][2];
// Convert the filtered targets into 2d array (even if second dimension is length 1). The second dimension is
// uncorrelated Gaussian. This is only to get around funny API in Apache Commons, which will throw an
// exception if the length of the second dimension is < 2
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));
final NormalDistribution nd = new NormalDistribution(rng, 0, .1);
for (int i = 0; i < filteredTargetsSegDiff.size(); i++) {
filteredTargetsSegDiff2d[i][0] = filteredTargetsSegDiff.get(i);
filteredTargetsSegDiff2d[i][1] = nd.sample();
}
final MixtureMultivariateNormalDistribution estimateEM0 = MultivariateNormalMixtureExpectationMaximization.estimate(filteredTargetsSegDiff2d, numComponents);
final MultivariateNormalMixtureExpectationMaximization multivariateNormalMixtureExpectationMaximization = new MultivariateNormalMixtureExpectationMaximization(filteredTargetsSegDiff2d);
try {
multivariateNormalMixtureExpectationMaximization.fit(estimateEM0);
} catch (final MaxCountExceededException | ConvergenceException | SingularMatrixException e) {
// did not converge. Include the model as it was when the exception was thrown.
return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), false, true);
}
return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), true, true);
}
Aggregations