use of org.apache.commons.math3.exception.ConvergenceException in project GDSC-SMLM by aherbert.
the class PCPALMFitting method fitClusteredModel.
/**
* Fits the correlation curve with r>0 to the clustered model using the estimated density and precision. Parameters
* must be fit within a tolerance of the starting values.
*
* @param gr
* @param sigmaS
* The estimated precision
* @param proteinDensity
* The estimated protein density
* @return The fitted parameters [precision, density, clusterRadius, clusterDensity]
*/
private double[] fitClusteredModel(double[][] gr, double sigmaS, double proteinDensity, String resultColour) {
final ClusteredModelFunctionGradient function = new ClusteredModelFunctionGradient();
clusteredModel = function;
log("Fitting %s: Estimated precision = %f nm, estimated protein density = %g um^-2", clusteredModel.getName(), sigmaS, proteinDensity * 1e6);
clusteredModel.setLogging(true);
for (int i = offset; i < gr[0].length; i++) {
// Only fit the curve above the estimated resolution (points below it will be subject to error)
if (gr[0][i] > sigmaS * fitAboveEstimatedPrecision)
clusteredModel.addPoint(gr[0][i], gr[1][i]);
}
double[] parameters;
// The model is: sigma, density, range, amplitude
double[] initialSolution = new double[] { sigmaS, proteinDensity, sigmaS * 5, 1 };
int evaluations = 0;
// Constrain the fitting to be close to the estimated precision (sigmaS) and protein density.
// LVM fitting does not support constrained fitting so use a bounded optimiser.
SumOfSquaresModelFunction clusteredModelMulti = new SumOfSquaresModelFunction(clusteredModel);
double[] x = clusteredModelMulti.x;
// Put some bounds around the initial guess. Use the fitting tolerance (in %) if provided.
double limit = (fittingTolerance > 0) ? 1 + fittingTolerance / 100 : 2;
double[] lB = new double[] { initialSolution[0] / limit, initialSolution[1] / limit, 0, 0 };
// The amplitude and range should not extend beyond the limits of the g(r) curve.
double[] uB = new double[] { initialSolution[0] * limit, initialSolution[1] * limit, Maths.max(x), Maths.max(gr[1]) };
log("Fitting %s using a bounded search: %s < precision < %s & %s < density < %s", clusteredModel.getName(), Utils.rounded(lB[0], 4), Utils.rounded(uB[0], 4), Utils.rounded(lB[1] * 1e6, 4), Utils.rounded(uB[1] * 1e6, 4));
PointValuePair constrainedSolution = runBoundedOptimiser(gr, initialSolution, lB, uB, clusteredModelMulti);
if (constrainedSolution == null)
return null;
parameters = constrainedSolution.getPointRef();
evaluations = boundedEvaluations;
// Refit using a LVM
if (useLSE) {
log("Re-fitting %s using a gradient optimisation", clusteredModel.getName());
LevenbergMarquardtOptimizer optimizer = new LevenbergMarquardtOptimizer();
Optimum lvmSolution;
try {
//@formatter:off
LeastSquaresProblem problem = new LeastSquaresBuilder().maxEvaluations(Integer.MAX_VALUE).maxIterations(3000).start(parameters).target(function.getY()).weight(new DiagonalMatrix(function.getWeights())).model(function, new MultivariateMatrixFunction() {
public double[][] value(double[] point) throws IllegalArgumentException {
return function.jacobian(point);
}
}).build();
//@formatter:on
lvmSolution = optimizer.optimize(problem);
evaluations += lvmSolution.getEvaluations();
double ss = lvmSolution.getResiduals().dotProduct(lvmSolution.getResiduals());
if (ss < constrainedSolution.getValue()) {
log("Re-fitting %s improved the SS from %s to %s (-%s%%)", clusteredModel.getName(), Utils.rounded(constrainedSolution.getValue(), 4), Utils.rounded(ss, 4), Utils.rounded(100 * (constrainedSolution.getValue() - ss) / constrainedSolution.getValue(), 4));
parameters = lvmSolution.getPoint().toArray();
}
} catch (TooManyIterationsException e) {
log("Failed to re-fit %s: Too many iterations (%s)", clusteredModel.getName(), e.getMessage());
} catch (ConvergenceException e) {
log("Failed to re-fit %s: %s", clusteredModel.getName(), e.getMessage());
}
}
clusteredModel.setLogging(false);
// Ensure the width is positive
parameters[0] = Math.abs(parameters[0]);
//parameters[2] = Math.abs(parameters[2]);
double ss = 0;
double[] obs = clusteredModel.getY();
double[] exp = clusteredModel.value(parameters);
for (int i = 0; i < obs.length; i++) ss += (obs[i] - exp[i]) * (obs[i] - exp[i]);
ic2 = Maths.getAkaikeInformationCriterionFromResiduals(ss, clusteredModel.size(), parameters.length);
final double fitSigmaS = parameters[0];
final double fitProteinDensity = parameters[1];
//The radius of the cluster domain
final double domainRadius = parameters[2];
//The density of the cluster domain
final double domainDensity = parameters[3];
// This is from the PC-PALM paper. However that paper fits the g(r)protein exponential convolved in 2D
// with the g(r)PSF. In this method we have just fit the exponential
final double nCluster = 2 * domainDensity * Math.PI * domainRadius * domainRadius * fitProteinDensity;
double e1 = parameterDrift(sigmaS, fitSigmaS);
double e2 = parameterDrift(proteinDensity, fitProteinDensity);
log(" %s fit: SS = %f. cAIC = %f. %d evaluations", clusteredModel.getName(), ss, ic2, evaluations);
log(" %s parameters:", clusteredModel.getName());
log(" Average precision = %s nm (%s%%)", Utils.rounded(fitSigmaS, 4), Utils.rounded(e1, 4));
log(" Average protein density = %s um^-2 (%s%%)", Utils.rounded(fitProteinDensity * 1e6, 4), Utils.rounded(e2, 4));
log(" Domain radius = %s nm", Utils.rounded(domainRadius, 4));
log(" Domain density = %s", Utils.rounded(domainDensity, 4));
log(" nCluster = %s", Utils.rounded(nCluster, 4));
// Check the fitted parameters are within tolerance of the initial estimates
valid2 = true;
if (fittingTolerance > 0 && (Math.abs(e1) > fittingTolerance || Math.abs(e2) > fittingTolerance)) {
log(" Failed to fit %s within tolerance (%s%%): Average precision = %f nm (%s%%), average protein density = %g um^-2 (%s%%)", clusteredModel.getName(), Utils.rounded(fittingTolerance, 4), fitSigmaS, Utils.rounded(e1, 4), fitProteinDensity * 1e6, Utils.rounded(e2, 4));
valid2 = false;
}
// Check extra parameters. Domain radius should be higher than the precision. Density should be positive
if (domainRadius < fitSigmaS) {
log(" Failed to fit %s: Domain radius is smaller than the average precision (%s < %s)", clusteredModel.getName(), Utils.rounded(domainRadius, 4), Utils.rounded(fitSigmaS, 4));
valid2 = false;
}
if (domainDensity < 0) {
log(" Failed to fit %s: Domain density is negative (%s)", clusteredModel.getName(), Utils.rounded(domainDensity, 4));
valid2 = false;
}
if (ic2 > ic1) {
log(" Failed to fit %s - Information Criterion has increased %s%%", clusteredModel.getName(), Utils.rounded((100 * (ic2 - ic1) / ic1), 4));
valid2 = false;
}
addResult(clusteredModel.getName(), resultColour, valid2, fitSigmaS, fitProteinDensity, domainRadius, domainDensity, nCluster, 0, ic2);
return parameters;
}
use of org.apache.commons.math3.exception.ConvergenceException in project gatk-protected by broadinstitute.
the class CoverageDropoutDetector method retrieveGaussianMixtureModelForFilteredTargets.
/** <p>Produces a Gaussian mixture model based on the difference between targets and segment means.</p>
* <p>Filters targets to populations where more than the minProportion lie in a single segment.</p>
* <p>Returns null if no pass filtering. Please note that in these cases,
* in the rest of this class, we use this to assume that a GMM is not a good model.</p>
*
* @param segments -- segments with segment mean in log2 copy ratio space
* @param targets -- targets with a log2 copy ratio estimate
* @param minProportion -- minimum proportion of all targets that a given segment must have in order to be used
* in the evaluation
* @param numComponents -- number of components to use in the GMM. Usually, this is 2.
* @return never {@code null}. Fitting result with indications whether it converged or was even attempted.
*/
private MixtureMultivariateNormalFitResult retrieveGaussianMixtureModelForFilteredTargets(final List<ModeledSegment> segments, final TargetCollection<ReadCountRecord.SingleSampleRecord> targets, double minProportion, int numComponents) {
// For each target in a segment that contains enough targets, normalize the difference against the segment mean
// and collapse the filtered targets into the copy ratio estimates.
final List<Double> filteredTargetsSegDiff = getNumProbeFilteredTargetList(segments, targets, minProportion);
if (filteredTargetsSegDiff.size() < numComponents) {
return new MixtureMultivariateNormalFitResult(null, false, false);
}
// Assume that Apache Commons wants data points in the first dimension.
// Note that second dimension of length 2 (instead of 1) is to wrok around funny Apache commons API.
final double[][] filteredTargetsSegDiff2d = new double[filteredTargetsSegDiff.size()][2];
// Convert the filtered targets into 2d array (even if second dimension is length 1). The second dimension is
// uncorrelated Gaussian. This is only to get around funny API in Apache Commons, which will throw an
// exception if the length of the second dimension is < 2
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));
final NormalDistribution nd = new NormalDistribution(rng, 0, .1);
for (int i = 0; i < filteredTargetsSegDiff.size(); i++) {
filteredTargetsSegDiff2d[i][0] = filteredTargetsSegDiff.get(i);
filteredTargetsSegDiff2d[i][1] = nd.sample();
}
final MixtureMultivariateNormalDistribution estimateEM0 = MultivariateNormalMixtureExpectationMaximization.estimate(filteredTargetsSegDiff2d, numComponents);
final MultivariateNormalMixtureExpectationMaximization multivariateNormalMixtureExpectationMaximization = new MultivariateNormalMixtureExpectationMaximization(filteredTargetsSegDiff2d);
try {
multivariateNormalMixtureExpectationMaximization.fit(estimateEM0);
} catch (final MaxCountExceededException | ConvergenceException | SingularMatrixException e) {
// did not converge. Include the model as it was when the exception was thrown.
return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), false, true);
}
return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), true, true);
}
use of org.apache.commons.math3.exception.ConvergenceException in project gatk by broadinstitute.
the class CoverageDropoutDetector method retrieveGaussianMixtureModelForFilteredTargets.
/** <p>Produces a Gaussian mixture model based on the difference between targets and segment means.</p>
* <p>Filters targets to populations where more than the minProportion lie in a single segment.</p>
* <p>Returns null if no pass filtering. Please note that in these cases,
* in the rest of this class, we use this to assume that a GMM is not a good model.</p>
*
* @param segments -- segments with segment mean in log2 copy ratio space
* @param targets -- targets with a log2 copy ratio estimate
* @param minProportion -- minimum proportion of all targets that a given segment must have in order to be used
* in the evaluation
* @param numComponents -- number of components to use in the GMM. Usually, this is 2.
* @return never {@code null}. Fitting result with indications whether it converged or was even attempted.
*/
private MixtureMultivariateNormalFitResult retrieveGaussianMixtureModelForFilteredTargets(final List<ModeledSegment> segments, final TargetCollection<ReadCountRecord.SingleSampleRecord> targets, double minProportion, int numComponents) {
// For each target in a segment that contains enough targets, normalize the difference against the segment mean
// and collapse the filtered targets into the copy ratio estimates.
final List<Double> filteredTargetsSegDiff = getNumProbeFilteredTargetList(segments, targets, minProportion);
if (filteredTargetsSegDiff.size() < numComponents) {
return new MixtureMultivariateNormalFitResult(null, false, false);
}
// Assume that Apache Commons wants data points in the first dimension.
// Note that second dimension of length 2 (instead of 1) is to wrok around funny Apache commons API.
final double[][] filteredTargetsSegDiff2d = new double[filteredTargetsSegDiff.size()][2];
// Convert the filtered targets into 2d array (even if second dimension is length 1). The second dimension is
// uncorrelated Gaussian. This is only to get around funny API in Apache Commons, which will throw an
// exception if the length of the second dimension is < 2
final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));
final NormalDistribution nd = new NormalDistribution(rng, 0, .1);
for (int i = 0; i < filteredTargetsSegDiff.size(); i++) {
filteredTargetsSegDiff2d[i][0] = filteredTargetsSegDiff.get(i);
filteredTargetsSegDiff2d[i][1] = nd.sample();
}
final MixtureMultivariateNormalDistribution estimateEM0 = MultivariateNormalMixtureExpectationMaximization.estimate(filteredTargetsSegDiff2d, numComponents);
final MultivariateNormalMixtureExpectationMaximization multivariateNormalMixtureExpectationMaximization = new MultivariateNormalMixtureExpectationMaximization(filteredTargetsSegDiff2d);
try {
multivariateNormalMixtureExpectationMaximization.fit(estimateEM0);
} catch (final MaxCountExceededException | ConvergenceException | SingularMatrixException e) {
// did not converge. Include the model as it was when the exception was thrown.
return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), false, true);
}
return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), true, true);
}
Aggregations