Search in sources :

Example 11 with ConvergenceException

use of org.apache.commons.math3.exception.ConvergenceException in project GDSC-SMLM by aherbert.

the class PCPALMFitting method fitClusteredModel.

/**
	 * Fits the correlation curve with r>0 to the clustered model using the estimated density and precision. Parameters
	 * must be fit within a tolerance of the starting values.
	 * 
	 * @param gr
	 * @param sigmaS
	 *            The estimated precision
	 * @param proteinDensity
	 *            The estimated protein density
	 * @return The fitted parameters [precision, density, clusterRadius, clusterDensity]
	 */
private double[] fitClusteredModel(double[][] gr, double sigmaS, double proteinDensity, String resultColour) {
    final ClusteredModelFunctionGradient function = new ClusteredModelFunctionGradient();
    clusteredModel = function;
    log("Fitting %s: Estimated precision = %f nm, estimated protein density = %g um^-2", clusteredModel.getName(), sigmaS, proteinDensity * 1e6);
    clusteredModel.setLogging(true);
    for (int i = offset; i < gr[0].length; i++) {
        // Only fit the curve above the estimated resolution (points below it will be subject to error)
        if (gr[0][i] > sigmaS * fitAboveEstimatedPrecision)
            clusteredModel.addPoint(gr[0][i], gr[1][i]);
    }
    double[] parameters;
    // The model is: sigma, density, range, amplitude
    double[] initialSolution = new double[] { sigmaS, proteinDensity, sigmaS * 5, 1 };
    int evaluations = 0;
    // Constrain the fitting to be close to the estimated precision (sigmaS) and protein density.
    // LVM fitting does not support constrained fitting so use a bounded optimiser.
    SumOfSquaresModelFunction clusteredModelMulti = new SumOfSquaresModelFunction(clusteredModel);
    double[] x = clusteredModelMulti.x;
    // Put some bounds around the initial guess. Use the fitting tolerance (in %) if provided.
    double limit = (fittingTolerance > 0) ? 1 + fittingTolerance / 100 : 2;
    double[] lB = new double[] { initialSolution[0] / limit, initialSolution[1] / limit, 0, 0 };
    // The amplitude and range should not extend beyond the limits of the g(r) curve.
    double[] uB = new double[] { initialSolution[0] * limit, initialSolution[1] * limit, Maths.max(x), Maths.max(gr[1]) };
    log("Fitting %s using a bounded search: %s < precision < %s & %s < density < %s", clusteredModel.getName(), Utils.rounded(lB[0], 4), Utils.rounded(uB[0], 4), Utils.rounded(lB[1] * 1e6, 4), Utils.rounded(uB[1] * 1e6, 4));
    PointValuePair constrainedSolution = runBoundedOptimiser(gr, initialSolution, lB, uB, clusteredModelMulti);
    if (constrainedSolution == null)
        return null;
    parameters = constrainedSolution.getPointRef();
    evaluations = boundedEvaluations;
    // Refit using a LVM  
    if (useLSE) {
        log("Re-fitting %s using a gradient optimisation", clusteredModel.getName());
        LevenbergMarquardtOptimizer optimizer = new LevenbergMarquardtOptimizer();
        Optimum lvmSolution;
        try {
            //@formatter:off
            LeastSquaresProblem problem = new LeastSquaresBuilder().maxEvaluations(Integer.MAX_VALUE).maxIterations(3000).start(parameters).target(function.getY()).weight(new DiagonalMatrix(function.getWeights())).model(function, new MultivariateMatrixFunction() {

                public double[][] value(double[] point) throws IllegalArgumentException {
                    return function.jacobian(point);
                }
            }).build();
            //@formatter:on
            lvmSolution = optimizer.optimize(problem);
            evaluations += lvmSolution.getEvaluations();
            double ss = lvmSolution.getResiduals().dotProduct(lvmSolution.getResiduals());
            if (ss < constrainedSolution.getValue()) {
                log("Re-fitting %s improved the SS from %s to %s (-%s%%)", clusteredModel.getName(), Utils.rounded(constrainedSolution.getValue(), 4), Utils.rounded(ss, 4), Utils.rounded(100 * (constrainedSolution.getValue() - ss) / constrainedSolution.getValue(), 4));
                parameters = lvmSolution.getPoint().toArray();
            }
        } catch (TooManyIterationsException e) {
            log("Failed to re-fit %s: Too many iterations (%s)", clusteredModel.getName(), e.getMessage());
        } catch (ConvergenceException e) {
            log("Failed to re-fit %s: %s", clusteredModel.getName(), e.getMessage());
        }
    }
    clusteredModel.setLogging(false);
    // Ensure the width is positive
    parameters[0] = Math.abs(parameters[0]);
    //parameters[2] = Math.abs(parameters[2]);
    double ss = 0;
    double[] obs = clusteredModel.getY();
    double[] exp = clusteredModel.value(parameters);
    for (int i = 0; i < obs.length; i++) ss += (obs[i] - exp[i]) * (obs[i] - exp[i]);
    ic2 = Maths.getAkaikeInformationCriterionFromResiduals(ss, clusteredModel.size(), parameters.length);
    final double fitSigmaS = parameters[0];
    final double fitProteinDensity = parameters[1];
    //The radius of the cluster domain
    final double domainRadius = parameters[2];
    //The density of the cluster domain
    final double domainDensity = parameters[3];
    // This is from the PC-PALM paper. However that paper fits the g(r)protein exponential convolved in 2D
    // with the g(r)PSF. In this method we have just fit the exponential
    final double nCluster = 2 * domainDensity * Math.PI * domainRadius * domainRadius * fitProteinDensity;
    double e1 = parameterDrift(sigmaS, fitSigmaS);
    double e2 = parameterDrift(proteinDensity, fitProteinDensity);
    log("  %s fit: SS = %f. cAIC = %f. %d evaluations", clusteredModel.getName(), ss, ic2, evaluations);
    log("  %s parameters:", clusteredModel.getName());
    log("    Average precision = %s nm (%s%%)", Utils.rounded(fitSigmaS, 4), Utils.rounded(e1, 4));
    log("    Average protein density = %s um^-2 (%s%%)", Utils.rounded(fitProteinDensity * 1e6, 4), Utils.rounded(e2, 4));
    log("    Domain radius = %s nm", Utils.rounded(domainRadius, 4));
    log("    Domain density = %s", Utils.rounded(domainDensity, 4));
    log("    nCluster = %s", Utils.rounded(nCluster, 4));
    // Check the fitted parameters are within tolerance of the initial estimates
    valid2 = true;
    if (fittingTolerance > 0 && (Math.abs(e1) > fittingTolerance || Math.abs(e2) > fittingTolerance)) {
        log("  Failed to fit %s within tolerance (%s%%): Average precision = %f nm (%s%%), average protein density = %g um^-2 (%s%%)", clusteredModel.getName(), Utils.rounded(fittingTolerance, 4), fitSigmaS, Utils.rounded(e1, 4), fitProteinDensity * 1e6, Utils.rounded(e2, 4));
        valid2 = false;
    }
    // Check extra parameters. Domain radius should be higher than the precision. Density should be positive
    if (domainRadius < fitSigmaS) {
        log("  Failed to fit %s: Domain radius is smaller than the average precision (%s < %s)", clusteredModel.getName(), Utils.rounded(domainRadius, 4), Utils.rounded(fitSigmaS, 4));
        valid2 = false;
    }
    if (domainDensity < 0) {
        log("  Failed to fit %s: Domain density is negative (%s)", clusteredModel.getName(), Utils.rounded(domainDensity, 4));
        valid2 = false;
    }
    if (ic2 > ic1) {
        log("  Failed to fit %s - Information Criterion has increased %s%%", clusteredModel.getName(), Utils.rounded((100 * (ic2 - ic1) / ic1), 4));
        valid2 = false;
    }
    addResult(clusteredModel.getName(), resultColour, valid2, fitSigmaS, fitProteinDensity, domainRadius, domainDensity, nCluster, 0, ic2);
    return parameters;
}
Also used : PointValuePair(org.apache.commons.math3.optim.PointValuePair) LeastSquaresBuilder(org.apache.commons.math3.fitting.leastsquares.LeastSquaresBuilder) Optimum(org.apache.commons.math3.fitting.leastsquares.LeastSquaresOptimizer.Optimum) LevenbergMarquardtOptimizer(org.apache.commons.math3.fitting.leastsquares.LevenbergMarquardtOptimizer) DiagonalMatrix(org.apache.commons.math3.linear.DiagonalMatrix) ConvergenceException(org.apache.commons.math3.exception.ConvergenceException) TooManyIterationsException(org.apache.commons.math3.exception.TooManyIterationsException) LeastSquaresProblem(org.apache.commons.math3.fitting.leastsquares.LeastSquaresProblem) MultivariateMatrixFunction(org.apache.commons.math3.analysis.MultivariateMatrixFunction)

Example 12 with ConvergenceException

use of org.apache.commons.math3.exception.ConvergenceException in project gatk-protected by broadinstitute.

the class CoverageDropoutDetector method retrieveGaussianMixtureModelForFilteredTargets.

/** <p>Produces a Gaussian mixture model based on the difference between targets and segment means.</p>
     * <p>Filters targets to populations where more than the minProportion lie in a single segment.</p>
     * <p>Returns null if no pass filtering.  Please note that in these cases,
     * in the rest of this class, we use this to assume that a GMM is not a good model.</p>
     *
     * @param segments  -- segments with segment mean in log2 copy ratio space
     * @param targets -- targets with a log2 copy ratio estimate
     * @param minProportion -- minimum proportion of all targets that a given segment must have in order to be used
     *                      in the evaluation
     * @param numComponents -- number of components to use in the GMM.  Usually, this is 2.
     * @return  never {@code null}.  Fitting result with indications whether it converged or was even attempted.
     */
private MixtureMultivariateNormalFitResult retrieveGaussianMixtureModelForFilteredTargets(final List<ModeledSegment> segments, final TargetCollection<ReadCountRecord.SingleSampleRecord> targets, double minProportion, int numComponents) {
    // For each target in a segment that contains enough targets, normalize the difference against the segment mean
    //  and collapse the filtered targets into the copy ratio estimates.
    final List<Double> filteredTargetsSegDiff = getNumProbeFilteredTargetList(segments, targets, minProportion);
    if (filteredTargetsSegDiff.size() < numComponents) {
        return new MixtureMultivariateNormalFitResult(null, false, false);
    }
    // Assume that Apache Commons wants data points in the first dimension.
    // Note that second dimension of length 2 (instead of 1) is to wrok around funny Apache commons API.
    final double[][] filteredTargetsSegDiff2d = new double[filteredTargetsSegDiff.size()][2];
    // Convert the filtered targets into 2d array (even if second dimension is length 1).  The second dimension is
    //  uncorrelated Gaussian.  This is only to get around funny API in Apache Commons, which will throw an
    //  exception if the length of the second dimension is < 2
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));
    final NormalDistribution nd = new NormalDistribution(rng, 0, .1);
    for (int i = 0; i < filteredTargetsSegDiff.size(); i++) {
        filteredTargetsSegDiff2d[i][0] = filteredTargetsSegDiff.get(i);
        filteredTargetsSegDiff2d[i][1] = nd.sample();
    }
    final MixtureMultivariateNormalDistribution estimateEM0 = MultivariateNormalMixtureExpectationMaximization.estimate(filteredTargetsSegDiff2d, numComponents);
    final MultivariateNormalMixtureExpectationMaximization multivariateNormalMixtureExpectationMaximization = new MultivariateNormalMixtureExpectationMaximization(filteredTargetsSegDiff2d);
    try {
        multivariateNormalMixtureExpectationMaximization.fit(estimateEM0);
    } catch (final MaxCountExceededException | ConvergenceException | SingularMatrixException e) {
        //  did not converge.  Include the model as it was when the exception was thrown.
        return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), false, true);
    }
    return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), true, true);
}
Also used : RandomGenerator(org.apache.commons.math3.random.RandomGenerator) MaxCountExceededException(org.apache.commons.math3.exception.MaxCountExceededException) MixtureMultivariateNormalDistribution(org.apache.commons.math3.distribution.MixtureMultivariateNormalDistribution) Random(java.util.Random) NormalDistribution(org.apache.commons.math3.distribution.NormalDistribution) MixtureMultivariateNormalDistribution(org.apache.commons.math3.distribution.MixtureMultivariateNormalDistribution) ConvergenceException(org.apache.commons.math3.exception.ConvergenceException) SingularMatrixException(org.apache.commons.math3.linear.SingularMatrixException) MultivariateNormalMixtureExpectationMaximization(org.apache.commons.math3.distribution.fitting.MultivariateNormalMixtureExpectationMaximization)

Example 13 with ConvergenceException

use of org.apache.commons.math3.exception.ConvergenceException in project gatk by broadinstitute.

the class CoverageDropoutDetector method retrieveGaussianMixtureModelForFilteredTargets.

/** <p>Produces a Gaussian mixture model based on the difference between targets and segment means.</p>
     * <p>Filters targets to populations where more than the minProportion lie in a single segment.</p>
     * <p>Returns null if no pass filtering.  Please note that in these cases,
     * in the rest of this class, we use this to assume that a GMM is not a good model.</p>
     *
     * @param segments  -- segments with segment mean in log2 copy ratio space
     * @param targets -- targets with a log2 copy ratio estimate
     * @param minProportion -- minimum proportion of all targets that a given segment must have in order to be used
     *                      in the evaluation
     * @param numComponents -- number of components to use in the GMM.  Usually, this is 2.
     * @return  never {@code null}.  Fitting result with indications whether it converged or was even attempted.
     */
private MixtureMultivariateNormalFitResult retrieveGaussianMixtureModelForFilteredTargets(final List<ModeledSegment> segments, final TargetCollection<ReadCountRecord.SingleSampleRecord> targets, double minProportion, int numComponents) {
    // For each target in a segment that contains enough targets, normalize the difference against the segment mean
    //  and collapse the filtered targets into the copy ratio estimates.
    final List<Double> filteredTargetsSegDiff = getNumProbeFilteredTargetList(segments, targets, minProportion);
    if (filteredTargetsSegDiff.size() < numComponents) {
        return new MixtureMultivariateNormalFitResult(null, false, false);
    }
    // Assume that Apache Commons wants data points in the first dimension.
    // Note that second dimension of length 2 (instead of 1) is to wrok around funny Apache commons API.
    final double[][] filteredTargetsSegDiff2d = new double[filteredTargetsSegDiff.size()][2];
    // Convert the filtered targets into 2d array (even if second dimension is length 1).  The second dimension is
    //  uncorrelated Gaussian.  This is only to get around funny API in Apache Commons, which will throw an
    //  exception if the length of the second dimension is < 2
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));
    final NormalDistribution nd = new NormalDistribution(rng, 0, .1);
    for (int i = 0; i < filteredTargetsSegDiff.size(); i++) {
        filteredTargetsSegDiff2d[i][0] = filteredTargetsSegDiff.get(i);
        filteredTargetsSegDiff2d[i][1] = nd.sample();
    }
    final MixtureMultivariateNormalDistribution estimateEM0 = MultivariateNormalMixtureExpectationMaximization.estimate(filteredTargetsSegDiff2d, numComponents);
    final MultivariateNormalMixtureExpectationMaximization multivariateNormalMixtureExpectationMaximization = new MultivariateNormalMixtureExpectationMaximization(filteredTargetsSegDiff2d);
    try {
        multivariateNormalMixtureExpectationMaximization.fit(estimateEM0);
    } catch (final MaxCountExceededException | ConvergenceException | SingularMatrixException e) {
        //  did not converge.  Include the model as it was when the exception was thrown.
        return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), false, true);
    }
    return new MixtureMultivariateNormalFitResult(multivariateNormalMixtureExpectationMaximization.getFittedModel(), true, true);
}
Also used : RandomGenerator(org.apache.commons.math3.random.RandomGenerator) MaxCountExceededException(org.apache.commons.math3.exception.MaxCountExceededException) MixtureMultivariateNormalDistribution(org.apache.commons.math3.distribution.MixtureMultivariateNormalDistribution) Random(java.util.Random) NormalDistribution(org.apache.commons.math3.distribution.NormalDistribution) MixtureMultivariateNormalDistribution(org.apache.commons.math3.distribution.MixtureMultivariateNormalDistribution) ConvergenceException(org.apache.commons.math3.exception.ConvergenceException) SingularMatrixException(org.apache.commons.math3.linear.SingularMatrixException) MultivariateNormalMixtureExpectationMaximization(org.apache.commons.math3.distribution.fitting.MultivariateNormalMixtureExpectationMaximization)

Aggregations

ConvergenceException (org.apache.commons.math3.exception.ConvergenceException)12 TooManyIterationsException (org.apache.commons.math3.exception.TooManyIterationsException)10 LeastSquaresBuilder (org.apache.commons.math3.fitting.leastsquares.LeastSquaresBuilder)8 Optimum (org.apache.commons.math3.fitting.leastsquares.LeastSquaresOptimizer.Optimum)8 LeastSquaresProblem (org.apache.commons.math3.fitting.leastsquares.LeastSquaresProblem)8 LevenbergMarquardtOptimizer (org.apache.commons.math3.fitting.leastsquares.LevenbergMarquardtOptimizer)8 DiagonalMatrix (org.apache.commons.math3.linear.DiagonalMatrix)8 MultivariateMatrixFunction (org.apache.commons.math3.analysis.MultivariateMatrixFunction)7 PointValuePair (org.apache.commons.math3.optim.PointValuePair)6 TooManyEvaluationsException (org.apache.commons.math3.exception.TooManyEvaluationsException)5 InitialGuess (org.apache.commons.math3.optim.InitialGuess)4 MaxEval (org.apache.commons.math3.optim.MaxEval)4 OptimizationData (org.apache.commons.math3.optim.OptimizationData)4 SimpleBounds (org.apache.commons.math3.optim.SimpleBounds)4 ObjectiveFunction (org.apache.commons.math3.optim.nonlinear.scalar.ObjectiveFunction)4 CMAESOptimizer (org.apache.commons.math3.optim.nonlinear.scalar.noderiv.CMAESOptimizer)4 RandomGenerator (org.apache.commons.math3.random.RandomGenerator)4 NormalDistribution (org.apache.commons.math3.distribution.NormalDistribution)3 SingularMatrixException (org.apache.commons.math3.linear.SingularMatrixException)3 CustomPowellOptimizer (org.apache.commons.math3.optim.nonlinear.scalar.noderiv.CustomPowellOptimizer)3