Search in sources :

Example 11 with Precision

use of org.apache.commons.math3.util.Precision in project GDSC-SMLM by aherbert.

the class PCPALMMolecules method runSimulation.

private void runSimulation(boolean resultsAvailable) {
    if (resultsAvailable && !showSimulationDialog())
        return;
    startLog();
    log("Simulation parameters");
    if (blinkingDistribution == 3) {
        log("  - Clusters = %d", nMolecules);
        log("  - Simulation size = %s um", Utils.rounded(simulationSize, 4));
        log("  - Molecules/cluster = %s", Utils.rounded(blinkingRate, 4));
        log("  - Blinking distribution = %s", BLINKING_DISTRIBUTION[blinkingDistribution]);
        log("  - p-Value = %s", Utils.rounded(p, 4));
    } else {
        log("  - Molecules = %d", nMolecules);
        log("  - Simulation size = %s um", Utils.rounded(simulationSize, 4));
        log("  - Blinking rate = %s", Utils.rounded(blinkingRate, 4));
        log("  - Blinking distribution = %s", BLINKING_DISTRIBUTION[blinkingDistribution]);
    }
    log("  - Average precision = %s nm", Utils.rounded(sigmaS, 4));
    log("  - Clusters simulation = " + CLUSTER_SIMULATION[clusterSimulation]);
    if (clusterSimulation > 0) {
        log("  - Cluster number = %s +/- %s", Utils.rounded(clusterNumber, 4), Utils.rounded(clusterNumberSD, 4));
        log("  - Cluster radius = %s nm", Utils.rounded(clusterRadius, 4));
    }
    final double nmPerPixel = 100;
    double width = simulationSize * 1000.0;
    // Allow a border of 3 x sigma for +/- precision
    //if (blinkingRate > 1)
    width -= 3 * sigmaS;
    RandomGenerator randomGenerator = new Well19937c(System.currentTimeMillis() + System.identityHashCode(this));
    RandomDataGenerator dataGenerator = new RandomDataGenerator(randomGenerator);
    UniformDistribution dist = new UniformDistribution(null, new double[] { width, width, 0 }, randomGenerator.nextInt());
    molecules = new ArrayList<Molecule>(nMolecules);
    // Create some dummy results since the calibration is required for later analysis
    results = new MemoryPeakResults();
    results.setCalibration(new gdsc.smlm.results.Calibration(nmPerPixel, 1, 100));
    results.setSource(new NullSource("Molecule Simulation"));
    results.begin();
    int count = 0;
    // Generate a sequence of coordinates
    ArrayList<double[]> xyz = new ArrayList<double[]>((int) (nMolecules * 1.1));
    Statistics statsRadius = new Statistics();
    Statistics statsSize = new Statistics();
    String maskTitle = TITLE + " Cluster Mask";
    ByteProcessor bp = null;
    double maskScale = 0;
    if (clusterSimulation > 0) {
        // Simulate clusters.
        // Note: In the Veatch et al. paper (Plos 1, e31457) correlation functions are built using circles
        // with small radii of 4-8 Arbitrary Units (AU) or large radii of 10-30 AU. A fluctuations model is
        // created at T = 1.075 Tc. It is not clear exactly how the particles are distributed.
        // It may be that a mask is created first using the model. The particles are placed on the mask using
        // a specified density. This simulation produces a figure to show either a damped cosine function
        // (circles) or an exponential (fluctuations). The number of particles in each circle may be randomly
        // determined just by density. The figure does not discuss the derivation of the cluster size 
        // statistic.
        // 
        // If this plugin simulation is run with a uniform distribution and blinking rate of 1 then the damped
        // cosine function is reproduced. The curve crosses g(r)=1 at a value equivalent to the average
        // distance to the centre-of-mass of each drawn cluster, not the input cluster radius parameter (which 
        // is a hard upper limit on the distance to centre).
        final int maskSize = lowResolutionImageSize;
        int[] mask = null;
        // scale is in nm/pixel
        maskScale = width / maskSize;
        ArrayList<double[]> clusterCentres = new ArrayList<double[]>();
        int totalSteps = 1 + (int) Math.ceil(nMolecules / clusterNumber);
        if (clusterSimulation == 2 || clusterSimulation == 3) {
            // Clusters are non-overlapping circles
            // Ensure the circles do not overlap by using an exclusion mask that accumulates 
            // out-of-bounds pixels by drawing the last cluster (plus some border) on an image. When no
            // more pixels are available then stop generating molecules.
            // This is done by cumulatively filling a mask and using the MaskDistribution to select 
            // a new point. This may be slow but it works.
            // TODO - Allow clusters of different sizes...
            mask = new int[maskSize * maskSize];
            Arrays.fill(mask, 255);
            MaskDistribution maskDistribution = new MaskDistribution(mask, maskSize, maskSize, 0, maskScale, maskScale, randomGenerator);
            double[] centre;
            IJ.showStatus("Computing clusters mask");
            int roiRadius = (int) Math.round((clusterRadius * 2) / maskScale);
            if (clusterSimulation == 3) {
                // Generate a mask of circles then sample from that.
                // If we want to fill the mask completely then adjust the total steps to be the number of 
                // circles that can fit inside the mask.
                totalSteps = (int) (maskSize * maskSize / (Math.PI * Math.pow(clusterRadius / maskScale, 2)));
            }
            while ((centre = maskDistribution.next()) != null && clusterCentres.size() < totalSteps) {
                IJ.showProgress(clusterCentres.size(), totalSteps);
                // The mask returns the coordinates with the centre of the image at 0,0
                centre[0] += width / 2;
                centre[1] += width / 2;
                clusterCentres.add(centre);
                // Fill in the mask around the centre to exclude any more circles that could overlap
                double cx = centre[0] / maskScale;
                double cy = centre[1] / maskScale;
                fillMask(mask, maskSize, (int) cx, (int) cy, roiRadius, 0);
                //Utils.display("Mask", new ColorProcessor(maskSize, maskSize, mask));
                try {
                    maskDistribution = new MaskDistribution(mask, maskSize, maskSize, 0, maskScale, maskScale, randomGenerator);
                } catch (IllegalArgumentException e) {
                    // This can happen when there are no more non-zero pixels
                    log("WARNING: No more room for clusters on the mask area (created %d of estimated %d)", clusterCentres.size(), totalSteps);
                    break;
                }
            }
            IJ.showProgress(1);
            IJ.showStatus("");
        } else {
            // Pick centres randomly from the distribution 
            while (clusterCentres.size() < totalSteps) clusterCentres.add(dist.next());
        }
        if (showClusterMask || clusterSimulation == 3) {
            // Show the mask for the clusters
            if (mask == null)
                mask = new int[maskSize * maskSize];
            else
                Arrays.fill(mask, 0);
            int roiRadius = (int) Math.round((clusterRadius) / maskScale);
            for (double[] c : clusterCentres) {
                double cx = c[0] / maskScale;
                double cy = c[1] / maskScale;
                fillMask(mask, maskSize, (int) cx, (int) cy, roiRadius, 1);
            }
            if (clusterSimulation == 3) {
                // We have the mask. Now pick points at random from the mask.
                MaskDistribution maskDistribution = new MaskDistribution(mask, maskSize, maskSize, 0, maskScale, maskScale, randomGenerator);
                // Allocate each molecule position to a parent circle so defining clusters.
                int[][] clusters = new int[clusterCentres.size()][];
                int[] clusterSize = new int[clusters.length];
                for (int i = 0; i < nMolecules; i++) {
                    double[] centre = maskDistribution.next();
                    // The mask returns the coordinates with the centre of the image at 0,0
                    centre[0] += width / 2;
                    centre[1] += width / 2;
                    xyz.add(centre);
                    // Output statistics on cluster size and number.
                    // TODO - Finding the closest cluster could be done better than an all-vs-all comparison
                    double max = distance2(centre, clusterCentres.get(0));
                    int cluster = 0;
                    for (int j = 1; j < clusterCentres.size(); j++) {
                        double d2 = distance2(centre, clusterCentres.get(j));
                        if (d2 < max) {
                            max = d2;
                            cluster = j;
                        }
                    }
                    // Assign point i to cluster
                    centre[2] = cluster;
                    if (clusterSize[cluster] == 0) {
                        clusters[cluster] = new int[10];
                    }
                    if (clusters[cluster].length <= clusterSize[cluster]) {
                        clusters[cluster] = Arrays.copyOf(clusters[cluster], (int) (clusters[cluster].length * 1.5));
                    }
                    clusters[cluster][clusterSize[cluster]++] = i;
                }
                // Generate real cluster size statistics
                for (int j = 0; j < clusterSize.length; j++) {
                    final int size = clusterSize[j];
                    if (size == 0)
                        continue;
                    statsSize.add(size);
                    if (size == 1) {
                        statsRadius.add(0);
                        continue;
                    }
                    // Find centre of cluster and add the distance to each point
                    double[] com = new double[2];
                    for (int n = 0; n < size; n++) {
                        double[] xy = xyz.get(clusters[j][n]);
                        for (int k = 0; k < 2; k++) com[k] += xy[k];
                    }
                    for (int k = 0; k < 2; k++) com[k] /= size;
                    for (int n = 0; n < size; n++) {
                        double dx = xyz.get(clusters[j][n])[0] - com[0];
                        double dy = xyz.get(clusters[j][n])[1] - com[1];
                        statsRadius.add(Math.sqrt(dx * dx + dy * dy));
                    }
                }
            }
            if (showClusterMask) {
                bp = new ByteProcessor(maskSize, maskSize);
                for (int i = 0; i < mask.length; i++) if (mask[i] != 0)
                    bp.set(i, 128);
                Utils.display(maskTitle, bp);
            }
        }
        // Use the simulated cluster centres to create clusters of the desired size
        if (clusterSimulation == 1 || clusterSimulation == 2) {
            for (double[] clusterCentre : clusterCentres) {
                int clusterN = (int) Math.round((clusterNumberSD > 0) ? dataGenerator.nextGaussian(clusterNumber, clusterNumberSD) : clusterNumber);
                if (clusterN < 1)
                    continue;
                //double[] clusterCentre = dist.next();
                if (clusterN == 1) {
                    // No need for a cluster around a point
                    xyz.add(clusterCentre);
                    statsRadius.add(0);
                    statsSize.add(1);
                } else {
                    // Generate N random points within a circle of the chosen cluster radius.
                    // Locate the centre-of-mass and the average distance to the centre.
                    double[] com = new double[3];
                    int j = 0;
                    while (j < clusterN) {
                        // Generate a random point within a circle uniformly
                        // http://stackoverflow.com/questions/5837572/generate-a-random-point-within-a-circle-uniformly
                        double t = 2.0 * Math.PI * randomGenerator.nextDouble();
                        double u = randomGenerator.nextDouble() + randomGenerator.nextDouble();
                        double r = clusterRadius * ((u > 1) ? 2 - u : u);
                        double x = r * Math.cos(t);
                        double y = r * Math.sin(t);
                        double[] xy = new double[] { clusterCentre[0] + x, clusterCentre[1] + y };
                        xyz.add(xy);
                        for (int k = 0; k < 2; k++) com[k] += xy[k];
                        j++;
                    }
                    // Add the distance of the points from the centre of the cluster.
                    // Note this does not account for the movement due to precision.
                    statsSize.add(j);
                    if (j == 1) {
                        statsRadius.add(0);
                    } else {
                        for (int k = 0; k < 2; k++) com[k] /= j;
                        while (j > 0) {
                            double dx = xyz.get(xyz.size() - j)[0] - com[0];
                            double dy = xyz.get(xyz.size() - j)[1] - com[1];
                            statsRadius.add(Math.sqrt(dx * dx + dy * dy));
                            j--;
                        }
                    }
                }
            }
        }
    } else {
        // Random distribution
        for (int i = 0; i < nMolecules; i++) xyz.add(dist.next());
    }
    // The Gaussian sigma should be applied so the overall distance from the centre
    // ( sqrt(x^2+y^2) ) has a standard deviation of sigmaS?
    final double sigma1D = sigmaS / Math.sqrt(2);
    // Show optional histograms
    StoredDataStatistics intraDistances = null;
    StoredData blinks = null;
    if (showHistograms) {
        int capacity = (int) (xyz.size() * blinkingRate);
        intraDistances = new StoredDataStatistics(capacity);
        blinks = new StoredData(capacity);
    }
    Statistics statsSigma = new Statistics();
    for (int i = 0; i < xyz.size(); i++) {
        int nOccurrences = getBlinks(dataGenerator, blinkingRate);
        if (showHistograms)
            blinks.add(nOccurrences);
        final int size = molecules.size();
        // Get coordinates in nm
        final double[] moleculeXyz = xyz.get(i);
        if (bp != null && nOccurrences > 0) {
            bp.putPixel((int) Math.round(moleculeXyz[0] / maskScale), (int) Math.round(moleculeXyz[1] / maskScale), 255);
        }
        while (nOccurrences-- > 0) {
            final double[] localisationXy = Arrays.copyOf(moleculeXyz, 2);
            // Add random precision
            if (sigma1D > 0) {
                final double dx = dataGenerator.nextGaussian(0, sigma1D);
                final double dy = dataGenerator.nextGaussian(0, sigma1D);
                localisationXy[0] += dx;
                localisationXy[1] += dy;
                if (!dist.isWithinXY(localisationXy))
                    continue;
                // Calculate mean-squared displacement
                statsSigma.add(dx * dx + dy * dy);
            }
            final double x = localisationXy[0];
            final double y = localisationXy[1];
            molecules.add(new Molecule(x, y, i, 1));
            // Store in pixels
            float[] params = new float[7];
            params[Gaussian2DFunction.X_POSITION] = (float) (x / nmPerPixel);
            params[Gaussian2DFunction.Y_POSITION] = (float) (y / nmPerPixel);
            results.addf(i + 1, (int) x, (int) y, 0, 0, 0, params, null);
        }
        if (molecules.size() > size) {
            count++;
            if (showHistograms) {
                int newCount = molecules.size() - size;
                if (newCount == 1) {
                    //intraDistances.add(0);
                    continue;
                }
                // Get the distance matrix between these molecules
                double[][] matrix = new double[newCount][newCount];
                for (int ii = size, x = 0; ii < molecules.size(); ii++, x++) {
                    for (int jj = size + 1, y = 1; jj < molecules.size(); jj++, y++) {
                        final double d2 = molecules.get(ii).distance2(molecules.get(jj));
                        matrix[x][y] = matrix[y][x] = d2;
                    }
                }
                // Get the maximum distance for particle linkage clustering of this molecule
                double max = 0;
                for (int x = 0; x < newCount; x++) {
                    // Compare to all-other molecules and get the minimum distance 
                    // needed to join at least one
                    double linkDistance = Double.POSITIVE_INFINITY;
                    for (int y = 0; y < newCount; y++) {
                        if (x == y)
                            continue;
                        if (matrix[x][y] < linkDistance)
                            linkDistance = matrix[x][y];
                    }
                    // Check if this is larger 
                    if (max < linkDistance)
                        max = linkDistance;
                }
                intraDistances.add(Math.sqrt(max));
            }
        }
    }
    results.end();
    if (bp != null)
        Utils.display(maskTitle, bp);
    // Used for debugging
    //System.out.printf("  * Molecules = %d (%d activated)\n", xyz.size(), count);
    //if (clusterSimulation > 0)
    //	System.out.printf("  * Cluster number = %s +/- %s. Radius = %s +/- %s\n",
    //			Utils.rounded(statsSize.getMean(), 4), Utils.rounded(statsSize.getStandardDeviation(), 4),
    //			Utils.rounded(statsRadius.getMean(), 4), Utils.rounded(statsRadius.getStandardDeviation(), 4));
    log("Simulation results");
    log("  * Molecules = %d (%d activated)", xyz.size(), count);
    log("  * Blinking rate = %s", Utils.rounded((double) molecules.size() / xyz.size(), 4));
    log("  * Precision (Mean-displacement) = %s nm", (statsSigma.getN() > 0) ? Utils.rounded(Math.sqrt(statsSigma.getMean()), 4) : "0");
    if (showHistograms) {
        if (intraDistances.getN() == 0) {
            log("  * Mean Intra-Molecule particle linkage distance = 0 nm");
            log("  * Fraction of inter-molecule particle linkage @ 0 nm = 0 %%");
        } else {
            plot(blinks, "Blinks/Molecule", true);
            double[][] intraHist = plot(intraDistances, "Intra-molecule particle linkage distance", false);
            // Determine 95th and 99th percentile
            int p99 = intraHist[0].length - 1;
            double limit1 = 0.99 * intraHist[1][p99];
            double limit2 = 0.95 * intraHist[1][p99];
            while (intraHist[1][p99] > limit1 && p99 > 0) p99--;
            int p95 = p99;
            while (intraHist[1][p95] > limit2 && p95 > 0) p95--;
            log("  * Mean Intra-Molecule particle linkage distance = %s nm (95%% = %s, 99%% = %s, 100%% = %s)", Utils.rounded(intraDistances.getMean(), 4), Utils.rounded(intraHist[0][p95], 4), Utils.rounded(intraHist[0][p99], 4), Utils.rounded(intraHist[0][intraHist[0].length - 1], 4));
            if (distanceAnalysis) {
                performDistanceAnalysis(intraHist, p99);
            }
        }
    }
    if (clusterSimulation > 0) {
        log("  * Cluster number = %s +/- %s", Utils.rounded(statsSize.getMean(), 4), Utils.rounded(statsSize.getStandardDeviation(), 4));
        log("  * Cluster radius = %s +/- %s nm (mean distance to centre-of-mass)", Utils.rounded(statsRadius.getMean(), 4), Utils.rounded(statsRadius.getStandardDeviation(), 4));
    }
}
Also used : ByteProcessor(ij.process.ByteProcessor) TDoubleArrayList(gnu.trove.list.array.TDoubleArrayList) ArrayList(java.util.ArrayList) MaskDistribution(gdsc.smlm.model.MaskDistribution) Well19937c(org.apache.commons.math3.random.Well19937c) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) MemoryPeakResults(gdsc.smlm.results.MemoryPeakResults) NullSource(gdsc.smlm.results.NullSource) RandomDataGenerator(org.apache.commons.math3.random.RandomDataGenerator) UniformDistribution(gdsc.smlm.model.UniformDistribution) StoredDataStatistics(gdsc.core.utils.StoredDataStatistics) Statistics(gdsc.core.utils.Statistics) StoredDataStatistics(gdsc.core.utils.StoredDataStatistics) DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) WeightedObservedPoint(org.apache.commons.math3.fitting.WeightedObservedPoint) ClusterPoint(gdsc.core.clustering.ClusterPoint) StoredData(gdsc.core.utils.StoredData)

Example 12 with Precision

use of org.apache.commons.math3.util.Precision in project GDSC-SMLM by aherbert.

the class PCPALMFitting method fitEmulsionModel.

/**
	 * Fits the correlation curve with r>0 to the clustered model using the estimated density and precision. Parameters
	 * must be fit within a tolerance of the starting values.
	 * 
	 * @param gr
	 * @param sigmaS
	 *            The estimated precision
	 * @param proteinDensity
	 *            The estimated protein density
	 * @return The fitted parameters [precision, density, clusterRadius, clusterDensity]
	 */
private double[] fitEmulsionModel(double[][] gr, double sigmaS, double proteinDensity, String resultColour) {
    final EmulsionModelFunctionGradient function = new EmulsionModelFunctionGradient();
    emulsionModel = function;
    log("Fitting %s: Estimated precision = %f nm, estimated protein density = %g um^-2", emulsionModel.getName(), sigmaS, proteinDensity * 1e6);
    emulsionModel.setLogging(true);
    for (int i = offset; i < gr[0].length; i++) {
        // Only fit the curve above the estimated resolution (points below it will be subject to error)
        if (gr[0][i] > sigmaS * fitAboveEstimatedPrecision)
            emulsionModel.addPoint(gr[0][i], gr[1][i]);
    }
    double[] parameters;
    // The model is: sigma, density, range, amplitude, alpha
    double[] initialSolution = new double[] { sigmaS, proteinDensity, sigmaS * 5, 1, sigmaS * 5 };
    int evaluations = 0;
    // Constrain the fitting to be close to the estimated precision (sigmaS) and protein density.
    // LVM fitting does not support constrained fitting so use a bounded optimiser.
    SumOfSquaresModelFunction emulsionModelMulti = new SumOfSquaresModelFunction(emulsionModel);
    double[] x = emulsionModelMulti.x;
    double[] y = emulsionModelMulti.y;
    // Range should be equal to the first time the g(r) curve crosses 1
    for (int i = 0; i < x.length; i++) if (y[i] < 1) {
        initialSolution[4] = initialSolution[2] = (i > 0) ? (x[i - 1] + x[i]) * 0.5 : x[i];
        break;
    }
    // Put some bounds around the initial guess. Use the fitting tolerance (in %) if provided.
    double limit = (fittingTolerance > 0) ? 1 + fittingTolerance / 100 : 2;
    double[] lB = new double[] { initialSolution[0] / limit, initialSolution[1] / limit, 0, 0, 0 };
    // The amplitude and range should not extend beyond the limits of the g(r) curve.
    // TODO - Find out the expected range for the alpha parameter.  
    double[] uB = new double[] { initialSolution[0] * limit, initialSolution[1] * limit, Maths.max(x), Maths.max(gr[1]), Maths.max(x) * 2 };
    log("Fitting %s using a bounded search: %s < precision < %s & %s < density < %s", emulsionModel.getName(), Utils.rounded(lB[0], 4), Utils.rounded(uB[0], 4), Utils.rounded(lB[1] * 1e6, 4), Utils.rounded(uB[1] * 1e6, 4));
    PointValuePair constrainedSolution = runBoundedOptimiser(gr, initialSolution, lB, uB, emulsionModelMulti);
    if (constrainedSolution == null)
        return null;
    parameters = constrainedSolution.getPointRef();
    evaluations = boundedEvaluations;
    // Refit using a LVM  
    if (useLSE) {
        log("Re-fitting %s using a gradient optimisation", emulsionModel.getName());
        LevenbergMarquardtOptimizer optimizer = new LevenbergMarquardtOptimizer();
        Optimum lvmSolution;
        try {
            //@formatter:off
            LeastSquaresProblem problem = new LeastSquaresBuilder().maxEvaluations(Integer.MAX_VALUE).maxIterations(3000).start(parameters).target(function.getY()).weight(new DiagonalMatrix(function.getWeights())).model(function, new MultivariateMatrixFunction() {

                public double[][] value(double[] point) throws IllegalArgumentException {
                    return function.jacobian(point);
                }
            }).build();
            //@formatter:on
            lvmSolution = optimizer.optimize(problem);
            evaluations += lvmSolution.getEvaluations();
            double ss = lvmSolution.getResiduals().dotProduct(lvmSolution.getResiduals());
            if (ss < constrainedSolution.getValue()) {
                log("Re-fitting %s improved the SS from %s to %s (-%s%%)", emulsionModel.getName(), Utils.rounded(constrainedSolution.getValue(), 4), Utils.rounded(ss, 4), Utils.rounded(100 * (constrainedSolution.getValue() - ss) / constrainedSolution.getValue(), 4));
                parameters = lvmSolution.getPoint().toArray();
            }
        } catch (TooManyIterationsException e) {
            log("Failed to re-fit %s: Too many iterations (%s)", emulsionModel.getName(), e.getMessage());
        } catch (ConvergenceException e) {
            log("Failed to re-fit %s: %s", emulsionModel.getName(), e.getMessage());
        }
    }
    emulsionModel.setLogging(false);
    // Ensure the width is positive
    parameters[0] = Math.abs(parameters[0]);
    //parameters[2] = Math.abs(parameters[2]);
    double ss = 0;
    double[] obs = emulsionModel.getY();
    double[] exp = emulsionModel.value(parameters);
    for (int i = 0; i < obs.length; i++) ss += (obs[i] - exp[i]) * (obs[i] - exp[i]);
    ic3 = Maths.getAkaikeInformationCriterionFromResiduals(ss, emulsionModel.size(), parameters.length);
    final double fitSigmaS = parameters[0];
    final double fitProteinDensity = parameters[1];
    //The radius of the cluster domain
    final double domainRadius = parameters[2];
    //The density of the cluster domain
    final double domainDensity = parameters[3];
    //The coherence length between circles
    final double coherence = parameters[4];
    // This is from the PC-PALM paper. It may not be correct for the emulsion model.
    final double nCluster = 2 * domainDensity * Math.PI * domainRadius * domainRadius * fitProteinDensity;
    double e1 = parameterDrift(sigmaS, fitSigmaS);
    double e2 = parameterDrift(proteinDensity, fitProteinDensity);
    log("  %s fit: SS = %f. cAIC = %f. %d evaluations", emulsionModel.getName(), ss, ic3, evaluations);
    log("  %s parameters:", emulsionModel.getName());
    log("    Average precision = %s nm (%s%%)", Utils.rounded(fitSigmaS, 4), Utils.rounded(e1, 4));
    log("    Average protein density = %s um^-2 (%s%%)", Utils.rounded(fitProteinDensity * 1e6, 4), Utils.rounded(e2, 4));
    log("    Domain radius = %s nm", Utils.rounded(domainRadius, 4));
    log("    Domain density = %s", Utils.rounded(domainDensity, 4));
    log("    Domain coherence = %s", Utils.rounded(coherence, 4));
    log("    nCluster = %s", Utils.rounded(nCluster, 4));
    // Check the fitted parameters are within tolerance of the initial estimates
    valid2 = true;
    if (fittingTolerance > 0 && (Math.abs(e1) > fittingTolerance || Math.abs(e2) > fittingTolerance)) {
        log("  Failed to fit %s within tolerance (%s%%): Average precision = %f nm (%s%%), average protein density = %g um^-2 (%s%%)", emulsionModel.getName(), Utils.rounded(fittingTolerance, 4), fitSigmaS, Utils.rounded(e1, 4), fitProteinDensity * 1e6, Utils.rounded(e2, 4));
        valid2 = false;
    }
    // Check extra parameters. Domain radius should be higher than the precision. Density should be positive
    if (domainRadius < fitSigmaS) {
        log("  Failed to fit %s: Domain radius is smaller than the average precision (%s < %s)", emulsionModel.getName(), Utils.rounded(domainRadius, 4), Utils.rounded(fitSigmaS, 4));
        valid2 = false;
    }
    if (domainDensity < 0) {
        log("  Failed to fit %s: Domain density is negative (%s)", emulsionModel.getName(), Utils.rounded(domainDensity, 4));
        valid2 = false;
    }
    if (ic3 > ic1) {
        log("  Failed to fit %s - Information Criterion has increased %s%%", emulsionModel.getName(), Utils.rounded((100 * (ic3 - ic1) / ic1), 4));
        valid2 = false;
    }
    addResult(emulsionModel.getName(), resultColour, valid2, fitSigmaS, fitProteinDensity, domainRadius, domainDensity, nCluster, coherence, ic3);
    return parameters;
}
Also used : PointValuePair(org.apache.commons.math3.optim.PointValuePair) LeastSquaresBuilder(org.apache.commons.math3.fitting.leastsquares.LeastSquaresBuilder) Optimum(org.apache.commons.math3.fitting.leastsquares.LeastSquaresOptimizer.Optimum) LevenbergMarquardtOptimizer(org.apache.commons.math3.fitting.leastsquares.LevenbergMarquardtOptimizer) DiagonalMatrix(org.apache.commons.math3.linear.DiagonalMatrix) ConvergenceException(org.apache.commons.math3.exception.ConvergenceException) TooManyIterationsException(org.apache.commons.math3.exception.TooManyIterationsException) LeastSquaresProblem(org.apache.commons.math3.fitting.leastsquares.LeastSquaresProblem) MultivariateMatrixFunction(org.apache.commons.math3.analysis.MultivariateMatrixFunction)

Example 13 with Precision

use of org.apache.commons.math3.util.Precision in project gatk by broadinstitute.

the class AlleleFrequencyCalculator method getLog10PNonRef.

//TODO: this should be a class of static methods once the old AFCalculator is gone.
/**
     * Compute the probability of the alleles segregating given the genotype likelihoods of the samples in vc
     *
     * @param vc the VariantContext holding the alleles and sample information.  The VariantContext
     *           must have at least 1 alternative allele
     * @param refSnpIndelPseudocounts a total hack.  A length-3 vector containing Dirichlet prior pseudocounts to
     *                                be given to ref, alt SNP, and alt indel alleles.  Hack won't be necessary when we destroy the old AF calculators
     * @return result (for programming convenience)
     */
@Override
public AFCalculationResult getLog10PNonRef(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles, final double[] refSnpIndelPseudocounts) {
    Utils.nonNull(vc, "VariantContext cannot be null");
    final int numAlleles = vc.getNAlleles();
    final List<Allele> alleles = vc.getAlleles();
    Utils.validateArg(numAlleles > 1, () -> "VariantContext has only a single reference allele, but getLog10PNonRef requires at least one at all " + vc);
    final double[] priorPseudocounts = alleles.stream().mapToDouble(a -> a.isReference() ? refPseudocount : (a.length() > 1 ? snpPseudocount : indelPseudocount)).toArray();
    double[] alleleCounts = new double[numAlleles];
    // log10(1/numAlleles)
    final double flatLog10AlleleFrequency = -MathUtils.log10(numAlleles);
    double[] log10AlleleFrequencies = new IndexRange(0, numAlleles).mapToDouble(n -> flatLog10AlleleFrequency);
    double alleleCountsMaximumDifference = Double.POSITIVE_INFINITY;
    while (alleleCountsMaximumDifference > THRESHOLD_FOR_ALLELE_COUNT_CONVERGENCE) {
        final double[] newAlleleCounts = effectiveAlleleCounts(vc, log10AlleleFrequencies);
        alleleCountsMaximumDifference = Arrays.stream(MathArrays.ebeSubtract(alleleCounts, newAlleleCounts)).map(Math::abs).max().getAsDouble();
        alleleCounts = newAlleleCounts;
        final double[] posteriorPseudocounts = MathArrays.ebeAdd(priorPseudocounts, alleleCounts);
        // first iteration uses flat prior in order to avoid local minimum where the prior + no pseudocounts gives such a low
        // effective allele frequency that it overwhelms the genotype likelihood of a real variant
        // basically, we want a chance to get non-zero pseudocounts before using a prior that's biased against a variant
        log10AlleleFrequencies = new Dirichlet(posteriorPseudocounts).log10MeanWeights();
    }
    double[] log10POfZeroCountsByAllele = new double[numAlleles];
    double log10PNoVariant = 0;
    for (final Genotype g : vc.getGenotypes()) {
        if (!g.hasLikelihoods()) {
            continue;
        }
        final int ploidy = g.getPloidy() == 0 ? defaultPloidy : g.getPloidy();
        final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(ploidy, numAlleles);
        final double[] log10GenotypePosteriors = log10NormalizedGenotypePosteriors(g, glCalc, log10AlleleFrequencies);
        //the total probability
        log10PNoVariant += log10GenotypePosteriors[HOM_REF_GENOTYPE_INDEX];
        // per allele non-log space probabilities of zero counts for this sample
        // for each allele calculate the total probability of genotypes containing at least one copy of the allele
        final double[] log10ProbabilityOfNonZeroAltAlleles = new double[numAlleles];
        Arrays.fill(log10ProbabilityOfNonZeroAltAlleles, Double.NEGATIVE_INFINITY);
        for (int genotype = 0; genotype < glCalc.genotypeCount(); genotype++) {
            final double log10GenotypePosterior = log10GenotypePosteriors[genotype];
            glCalc.genotypeAlleleCountsAt(genotype).forEachAlleleIndexAndCount((alleleIndex, count) -> log10ProbabilityOfNonZeroAltAlleles[alleleIndex] = MathUtils.log10SumLog10(log10ProbabilityOfNonZeroAltAlleles[alleleIndex], log10GenotypePosterior));
        }
        for (int allele = 0; allele < numAlleles; allele++) {
            // if prob of non hom ref == 1 up to numerical precision, short-circuit to avoid NaN
            if (log10ProbabilityOfNonZeroAltAlleles[allele] >= 0) {
                log10POfZeroCountsByAllele[allele] = Double.NEGATIVE_INFINITY;
            } else {
                log10POfZeroCountsByAllele[allele] += MathUtils.log10OneMinusPow10(log10ProbabilityOfNonZeroAltAlleles[allele]);
            }
        }
    }
    // unfortunately AFCalculationResult expects integers for the MLE.  We really should emit the EM no-integer values
    // which are valuable (eg in CombineGVCFs) as the sufficient statistics of the Dirichlet posterior on allele frequencies
    final int[] integerAlleleCounts = Arrays.stream(alleleCounts).mapToInt(x -> (int) Math.round(x)).toArray();
    final int[] integerAltAlleleCounts = Arrays.copyOfRange(integerAlleleCounts, 1, numAlleles);
    //skip the ref allele (index 0)
    final Map<Allele, Double> log10PRefByAllele = IntStream.range(1, numAlleles).boxed().collect(Collectors.toMap(alleles::get, a -> log10POfZeroCountsByAllele[a]));
    // we compute posteriors here and don't have the same prior that AFCalculationResult expects.  Therefore, we
    // give it our posterior as its "likelihood" along with a flat dummy prior
    //TODO: HACK must be negative for AFCalcResult
    final double[] dummyFlatPrior = { -1e-10, -1e-10 };
    final double[] log10PosteriorOfNoVariantYesVariant = { log10PNoVariant, MathUtils.log10OneMinusPow10(log10PNoVariant) };
    return new AFCalculationResult(integerAltAlleleCounts, alleles, log10PosteriorOfNoVariantYesVariant, dummyFlatPrior, log10PRefByAllele);
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) IntStream(java.util.stream.IntStream) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) MathArrays(org.apache.commons.math3.util.MathArrays) Dirichlet(org.broadinstitute.hellbender.utils.Dirichlet) GenotypeAlleleCounts(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAlleleCounts) Collectors(java.util.stream.Collectors) GenotypeLikelihoodCalculator(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator) GenotypeLikelihoodCalculators(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculators) List(java.util.List) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) Map(java.util.Map) VariantContext(htsjdk.variant.variantcontext.VariantContext) Utils(org.broadinstitute.hellbender.utils.Utils) IndexRange(org.broadinstitute.hellbender.utils.IndexRange) Genotype(htsjdk.variant.variantcontext.Genotype) IndexRange(org.broadinstitute.hellbender.utils.IndexRange) Allele(htsjdk.variant.variantcontext.Allele) Dirichlet(org.broadinstitute.hellbender.utils.Dirichlet) GenotypeLikelihoodCalculator(org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator)

Example 14 with Precision

use of org.apache.commons.math3.util.Precision in project gatk by broadinstitute.

the class AdaptiveMetropolisSamplerUnitTest method testGaussian.

@Test
public void testGaussian() {
    final RandomGenerator rng = RandomGeneratorFactory.createRandomGenerator(new Random(RANDOM_SEED));
    for (final double theoreticalMean : Arrays.asList(0)) {
        for (final double precision : Arrays.asList(1.0)) {
            final double variance = 1 / precision;
            //Note: this is the theoretical standard deviation of the sample mean given uncorrelated
            //samples.  The sample mean will have a greater variance here because samples are correlated.
            final double standardDeviationOfMean = Math.sqrt(variance / NUM_SAMPLES);
            final Function<Double, Double> logPDF = x -> -(precision / 2) * (x - theoreticalMean) * (x - theoreticalMean);
            final AdaptiveMetropolisSampler sampler = new AdaptiveMetropolisSampler(INITIAL_GAUSSIAN_GUESS, INITIAL_STEP_SIZE, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY);
            final List<Double> samples = sampler.sample(rng, logPDF, NUM_SAMPLES, NUM_BURN_IN_STEPS);
            final double sampleMean = samples.stream().mapToDouble(x -> x).average().getAsDouble();
            final double sampleMeanSquare = samples.stream().mapToDouble(x -> x * x).average().getAsDouble();
            final double sampleVariance = (sampleMeanSquare - sampleMean * sampleMean) * NUM_SAMPLES / (NUM_SAMPLES - 1);
            Assert.assertEquals(sampleMean, theoreticalMean, 6 * standardDeviationOfMean);
            Assert.assertEquals(sampleVariance, variance, variance / 10);
        }
    }
}
Also used : Arrays(java.util.Arrays) List(java.util.List) Assert(org.testng.Assert) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) RandomGeneratorFactory(org.apache.commons.math3.random.RandomGeneratorFactory) Test(org.testng.annotations.Test) Random(java.util.Random) Function(java.util.function.Function) Random(java.util.Random) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) Test(org.testng.annotations.Test)

Example 15 with Precision

use of org.apache.commons.math3.util.Precision in project gatk by broadinstitute.

the class FisherExactTest method twoSidedPValue.

/**
     * Computes the 2-sided pvalue of the Fisher's exact test on a normalized table that ensures that the sum of
     * all four entries is less than 2 * 200.
     */
public static double twoSidedPValue(final int[][] normalizedTable) {
    Utils.nonNull(normalizedTable);
    Utils.validateArg(normalizedTable.length == 2, () -> "input must be 2x2 " + Arrays.deepToString(normalizedTable));
    Utils.validateArg(normalizedTable[0] != null && normalizedTable[0].length == 2, () -> "input must be 2x2 " + Arrays.deepToString(normalizedTable));
    Utils.validateArg(normalizedTable[1] != null && normalizedTable[1].length == 2, () -> "input must be 2x2 " + Arrays.deepToString(normalizedTable));
    //Note: this implementation follows the one in R base package
    final int[][] x = normalizedTable;
    final int m = x[0][0] + x[0][1];
    final int n = x[1][0] + x[1][1];
    final int k = x[0][0] + x[1][0];
    final int lo = Math.max(0, k - n);
    final int hi = Math.min(k, m);
    final IndexRange support = new IndexRange(lo, hi + 1);
    if (support.size() <= 1) {
        //special case, support has only one value
        return 1.0;
    }
    final AbstractIntegerDistribution dist = new HypergeometricDistribution(null, m + n, m, k);
    final double[] logds = support.mapToDouble(dist::logProbability);
    final double threshold = logds[x[0][0] - lo] * REL_ERR;
    final double[] log10ds = DoubleStream.of(logds).filter(d -> d <= threshold).map(MathUtils::logToLog10).toArray();
    final double pValue = MathUtils.sumLog10(log10ds);
    // min is necessary as numerical precision can result in pValue being slightly greater than 1.0
    return Math.min(pValue, 1.0);
}
Also used : HypergeometricDistribution(org.apache.commons.math3.distribution.HypergeometricDistribution) AbstractIntegerDistribution(org.apache.commons.math3.distribution.AbstractIntegerDistribution)

Aggregations

WeightedObservedPoint (org.apache.commons.math3.fitting.WeightedObservedPoint)8 StoredDataStatistics (gdsc.core.utils.StoredDataStatistics)7 PeakResult (gdsc.smlm.results.PeakResult)6 ArrayList (java.util.ArrayList)6 RandomGenerator (org.apache.commons.math3.random.RandomGenerator)6 ConvergenceException (org.apache.commons.math3.exception.ConvergenceException)5 TooManyIterationsException (org.apache.commons.math3.exception.TooManyIterationsException)5 DescriptiveStatistics (org.apache.commons.math3.stat.descriptive.DescriptiveStatistics)5 ClusterPoint (gdsc.core.clustering.ClusterPoint)4 MemoryPeakResults (gdsc.smlm.results.MemoryPeakResults)4 MultivariateMatrixFunction (org.apache.commons.math3.analysis.MultivariateMatrixFunction)4 LeastSquaresBuilder (org.apache.commons.math3.fitting.leastsquares.LeastSquaresBuilder)4 Optimum (org.apache.commons.math3.fitting.leastsquares.LeastSquaresOptimizer.Optimum)4 LeastSquaresProblem (org.apache.commons.math3.fitting.leastsquares.LeastSquaresProblem)4 LevenbergMarquardtOptimizer (org.apache.commons.math3.fitting.leastsquares.LevenbergMarquardtOptimizer)4 DiagonalMatrix (org.apache.commons.math3.linear.DiagonalMatrix)4 Statistics (gdsc.core.utils.Statistics)3 Plot2 (ij.gui.Plot2)3 WindowOrganiser (ij.plugin.WindowOrganiser)3 BasePoint (gdsc.core.match.BasePoint)2