Search in sources :

Example 11 with Mean

use of org.apache.commons.math3.stat.descriptive.moment.Mean in project GDSC-SMLM by aherbert.

the class BaseFunctionSolverTest method canFitSingleGaussianBetter.

void canFitSingleGaussianBetter(FunctionSolver solver, boolean applyBounds, FunctionSolver solver2, boolean applyBounds2, String name, String name2, NoiseModel noiseModel) {
    double[] noise = getNoise(noiseModel);
    if (solver.isWeighted())
        solver.setWeights(getWeights(noiseModel));
    int LOOPS = 5;
    randomGenerator.setSeed(seed);
    StoredDataStatistics[] stats = new StoredDataStatistics[6];
    String[] statName = { "Signal", "X", "Y" };
    int[] betterPrecision = new int[3];
    int[] totalPrecision = new int[3];
    int[] betterAccuracy = new int[3];
    int[] totalAccuracy = new int[3];
    int i1 = 0, i2 = 0;
    for (double s : signal) {
        double[] expected = createParams(1, s, 0, 0, 1);
        double[] lower = null, upper = null;
        if (applyBounds || applyBounds2) {
            lower = createParams(0, s * 0.5, -0.2, -0.2, 0.8);
            upper = createParams(3, s * 2, 0.2, 0.2, 1.2);
        }
        if (applyBounds)
            solver.setBounds(lower, upper);
        if (applyBounds2)
            solver2.setBounds(lower, upper);
        for (int loop = LOOPS; loop-- > 0; ) {
            double[] data = drawGaussian(expected, noise, noiseModel);
            for (int i = 0; i < stats.length; i++) stats[i] = new StoredDataStatistics();
            for (double db : base) for (double dx : shift) for (double dy : shift) for (double dsx : factor) {
                double[] p = createParams(db, s, dx, dy, dsx);
                double[] fp = fitGaussian(solver, data, p, expected);
                i1 += solver.getEvaluations();
                double[] fp2 = fitGaussian(solver2, data, p, expected);
                i2 += solver2.getEvaluations();
                // Get the mean and sd (the fit precision)
                compare(fp, expected, fp2, expected, Gaussian2DFunction.SIGNAL, stats[0], stats[1]);
                compare(fp, expected, fp2, expected, Gaussian2DFunction.X_POSITION, stats[2], stats[3]);
                compare(fp, expected, fp2, expected, Gaussian2DFunction.Y_POSITION, stats[4], stats[5]);
            // Use the distance
            //stats[2].add(distance(fp, expected));
            //stats[3].add(distance(fp2, expected2));
            }
            // two sided
            double alpha = 0.05;
            for (int i = 0; i < stats.length; i += 2) {
                double u1 = stats[i].getMean();
                double u2 = stats[i + 1].getMean();
                double sd1 = stats[i].getStandardDeviation();
                double sd2 = stats[i + 1].getStandardDeviation();
                TTest tt = new TTest();
                boolean diff = tt.tTest(stats[i].getValues(), stats[i + 1].getValues(), alpha);
                int index = i / 2;
                String msg = String.format("%s vs %s : %.1f (%s) %s %f +/- %f vs %f +/- %f  (N=%d) %b", name2, name, s, noiseModel, statName[index], u2, sd2, u1, sd1, stats[i].getN(), diff);
                if (diff) {
                    // Different means. Check they are roughly the same
                    if (DoubleEquality.almostEqualRelativeOrAbsolute(u1, u2, 0.1, 0)) {
                        // Basically the same. Check which is more precise
                        if (!DoubleEquality.almostEqualRelativeOrAbsolute(sd1, sd2, 0.05, 0)) {
                            if (sd2 < sd1) {
                                betterPrecision[index]++;
                                println(msg + " P*");
                            } else
                                println(msg + " P");
                            totalPrecision[index]++;
                        }
                    } else {
                        // Check which is more accurate (closer to zero)
                        u1 = Math.abs(u1);
                        u2 = Math.abs(u2);
                        if (u2 < u1) {
                            betterAccuracy[index]++;
                            println(msg + " A*");
                        } else
                            println(msg + " A");
                        totalAccuracy[index]++;
                    }
                } else {
                    // The same means. Check that it is more precise
                    if (!DoubleEquality.almostEqualRelativeOrAbsolute(sd1, sd2, 0.05, 0)) {
                        if (sd2 < sd1) {
                            betterPrecision[index]++;
                            println(msg + " P*");
                        } else
                            println(msg + " P");
                        totalPrecision[index]++;
                    }
                }
            }
        }
    }
    int better = 0, total = 0;
    for (int index = 0; index < statName.length; index++) {
        better += betterPrecision[index] + betterAccuracy[index];
        total += totalPrecision[index] + totalAccuracy[index];
        test(name2, name, statName[index] + " P", betterPrecision[index], totalPrecision[index], printBetterDetails);
        test(name2, name, statName[index] + " A", betterAccuracy[index], totalAccuracy[index], printBetterDetails);
    }
    test(name2, name, String.format("All (eval [%d] [%d]) : ", i2, i1), better, total, true);
}
Also used : TTest(org.apache.commons.math3.stat.inference.TTest) StoredDataStatistics(gdsc.core.utils.StoredDataStatistics)

Example 12 with Mean

use of org.apache.commons.math3.stat.descriptive.moment.Mean in project lucene-solr by apache.

the class DescribeEvaluator method evaluate.

public Tuple evaluate(Tuple tuple) throws IOException {
    if (subEvaluators.size() != 1) {
        throw new IOException("describe expects 1 column as a parameters");
    }
    StreamEvaluator colEval = subEvaluators.get(0);
    List<Number> numbers = (List<Number>) colEval.evaluate(tuple);
    DescriptiveStatistics descriptiveStatistics = new DescriptiveStatistics();
    for (Number n : numbers) {
        descriptiveStatistics.addValue(n.doubleValue());
    }
    Map map = new HashMap();
    map.put("max", descriptiveStatistics.getMax());
    map.put("mean", descriptiveStatistics.getMean());
    map.put("min", descriptiveStatistics.getMin());
    map.put("stdev", descriptiveStatistics.getStandardDeviation());
    map.put("sum", descriptiveStatistics.getSum());
    map.put("N", descriptiveStatistics.getN());
    map.put("var", descriptiveStatistics.getVariance());
    map.put("kurtosis", descriptiveStatistics.getKurtosis());
    map.put("skewness", descriptiveStatistics.getSkewness());
    map.put("popVar", descriptiveStatistics.getPopulationVariance());
    map.put("geometricMean", descriptiveStatistics.getGeometricMean());
    map.put("sumsq", descriptiveStatistics.getSumsq());
    return new Tuple(map);
}
Also used : DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) HashMap(java.util.HashMap) List(java.util.List) IOException(java.io.IOException) Map(java.util.Map) HashMap(java.util.HashMap) Tuple(org.apache.solr.client.solrj.io.Tuple)

Example 13 with Mean

use of org.apache.commons.math3.stat.descriptive.moment.Mean in project GDSC-SMLM by aherbert.

the class PCPALMMolecules method runSimulation.

private void runSimulation(boolean resultsAvailable) {
    if (resultsAvailable && !showSimulationDialog())
        return;
    startLog();
    log("Simulation parameters");
    if (blinkingDistribution == 3) {
        log("  - Clusters = %d", nMolecules);
        log("  - Simulation size = %s um", Utils.rounded(simulationSize, 4));
        log("  - Molecules/cluster = %s", Utils.rounded(blinkingRate, 4));
        log("  - Blinking distribution = %s", BLINKING_DISTRIBUTION[blinkingDistribution]);
        log("  - p-Value = %s", Utils.rounded(p, 4));
    } else {
        log("  - Molecules = %d", nMolecules);
        log("  - Simulation size = %s um", Utils.rounded(simulationSize, 4));
        log("  - Blinking rate = %s", Utils.rounded(blinkingRate, 4));
        log("  - Blinking distribution = %s", BLINKING_DISTRIBUTION[blinkingDistribution]);
    }
    log("  - Average precision = %s nm", Utils.rounded(sigmaS, 4));
    log("  - Clusters simulation = " + CLUSTER_SIMULATION[clusterSimulation]);
    if (clusterSimulation > 0) {
        log("  - Cluster number = %s +/- %s", Utils.rounded(clusterNumber, 4), Utils.rounded(clusterNumberSD, 4));
        log("  - Cluster radius = %s nm", Utils.rounded(clusterRadius, 4));
    }
    final double nmPerPixel = 100;
    double width = simulationSize * 1000.0;
    // Allow a border of 3 x sigma for +/- precision
    //if (blinkingRate > 1)
    width -= 3 * sigmaS;
    RandomGenerator randomGenerator = new Well19937c(System.currentTimeMillis() + System.identityHashCode(this));
    RandomDataGenerator dataGenerator = new RandomDataGenerator(randomGenerator);
    UniformDistribution dist = new UniformDistribution(null, new double[] { width, width, 0 }, randomGenerator.nextInt());
    molecules = new ArrayList<Molecule>(nMolecules);
    // Create some dummy results since the calibration is required for later analysis
    results = new MemoryPeakResults();
    results.setCalibration(new gdsc.smlm.results.Calibration(nmPerPixel, 1, 100));
    results.setSource(new NullSource("Molecule Simulation"));
    results.begin();
    int count = 0;
    // Generate a sequence of coordinates
    ArrayList<double[]> xyz = new ArrayList<double[]>((int) (nMolecules * 1.1));
    Statistics statsRadius = new Statistics();
    Statistics statsSize = new Statistics();
    String maskTitle = TITLE + " Cluster Mask";
    ByteProcessor bp = null;
    double maskScale = 0;
    if (clusterSimulation > 0) {
        // Simulate clusters.
        // Note: In the Veatch et al. paper (Plos 1, e31457) correlation functions are built using circles
        // with small radii of 4-8 Arbitrary Units (AU) or large radii of 10-30 AU. A fluctuations model is
        // created at T = 1.075 Tc. It is not clear exactly how the particles are distributed.
        // It may be that a mask is created first using the model. The particles are placed on the mask using
        // a specified density. This simulation produces a figure to show either a damped cosine function
        // (circles) or an exponential (fluctuations). The number of particles in each circle may be randomly
        // determined just by density. The figure does not discuss the derivation of the cluster size 
        // statistic.
        // 
        // If this plugin simulation is run with a uniform distribution and blinking rate of 1 then the damped
        // cosine function is reproduced. The curve crosses g(r)=1 at a value equivalent to the average
        // distance to the centre-of-mass of each drawn cluster, not the input cluster radius parameter (which 
        // is a hard upper limit on the distance to centre).
        final int maskSize = lowResolutionImageSize;
        int[] mask = null;
        // scale is in nm/pixel
        maskScale = width / maskSize;
        ArrayList<double[]> clusterCentres = new ArrayList<double[]>();
        int totalSteps = 1 + (int) Math.ceil(nMolecules / clusterNumber);
        if (clusterSimulation == 2 || clusterSimulation == 3) {
            // Clusters are non-overlapping circles
            // Ensure the circles do not overlap by using an exclusion mask that accumulates 
            // out-of-bounds pixels by drawing the last cluster (plus some border) on an image. When no
            // more pixels are available then stop generating molecules.
            // This is done by cumulatively filling a mask and using the MaskDistribution to select 
            // a new point. This may be slow but it works.
            // TODO - Allow clusters of different sizes...
            mask = new int[maskSize * maskSize];
            Arrays.fill(mask, 255);
            MaskDistribution maskDistribution = new MaskDistribution(mask, maskSize, maskSize, 0, maskScale, maskScale, randomGenerator);
            double[] centre;
            IJ.showStatus("Computing clusters mask");
            int roiRadius = (int) Math.round((clusterRadius * 2) / maskScale);
            if (clusterSimulation == 3) {
                // Generate a mask of circles then sample from that.
                // If we want to fill the mask completely then adjust the total steps to be the number of 
                // circles that can fit inside the mask.
                totalSteps = (int) (maskSize * maskSize / (Math.PI * Math.pow(clusterRadius / maskScale, 2)));
            }
            while ((centre = maskDistribution.next()) != null && clusterCentres.size() < totalSteps) {
                IJ.showProgress(clusterCentres.size(), totalSteps);
                // The mask returns the coordinates with the centre of the image at 0,0
                centre[0] += width / 2;
                centre[1] += width / 2;
                clusterCentres.add(centre);
                // Fill in the mask around the centre to exclude any more circles that could overlap
                double cx = centre[0] / maskScale;
                double cy = centre[1] / maskScale;
                fillMask(mask, maskSize, (int) cx, (int) cy, roiRadius, 0);
                //Utils.display("Mask", new ColorProcessor(maskSize, maskSize, mask));
                try {
                    maskDistribution = new MaskDistribution(mask, maskSize, maskSize, 0, maskScale, maskScale, randomGenerator);
                } catch (IllegalArgumentException e) {
                    // This can happen when there are no more non-zero pixels
                    log("WARNING: No more room for clusters on the mask area (created %d of estimated %d)", clusterCentres.size(), totalSteps);
                    break;
                }
            }
            IJ.showProgress(1);
            IJ.showStatus("");
        } else {
            // Pick centres randomly from the distribution 
            while (clusterCentres.size() < totalSteps) clusterCentres.add(dist.next());
        }
        if (showClusterMask || clusterSimulation == 3) {
            // Show the mask for the clusters
            if (mask == null)
                mask = new int[maskSize * maskSize];
            else
                Arrays.fill(mask, 0);
            int roiRadius = (int) Math.round((clusterRadius) / maskScale);
            for (double[] c : clusterCentres) {
                double cx = c[0] / maskScale;
                double cy = c[1] / maskScale;
                fillMask(mask, maskSize, (int) cx, (int) cy, roiRadius, 1);
            }
            if (clusterSimulation == 3) {
                // We have the mask. Now pick points at random from the mask.
                MaskDistribution maskDistribution = new MaskDistribution(mask, maskSize, maskSize, 0, maskScale, maskScale, randomGenerator);
                // Allocate each molecule position to a parent circle so defining clusters.
                int[][] clusters = new int[clusterCentres.size()][];
                int[] clusterSize = new int[clusters.length];
                for (int i = 0; i < nMolecules; i++) {
                    double[] centre = maskDistribution.next();
                    // The mask returns the coordinates with the centre of the image at 0,0
                    centre[0] += width / 2;
                    centre[1] += width / 2;
                    xyz.add(centre);
                    // Output statistics on cluster size and number.
                    // TODO - Finding the closest cluster could be done better than an all-vs-all comparison
                    double max = distance2(centre, clusterCentres.get(0));
                    int cluster = 0;
                    for (int j = 1; j < clusterCentres.size(); j++) {
                        double d2 = distance2(centre, clusterCentres.get(j));
                        if (d2 < max) {
                            max = d2;
                            cluster = j;
                        }
                    }
                    // Assign point i to cluster
                    centre[2] = cluster;
                    if (clusterSize[cluster] == 0) {
                        clusters[cluster] = new int[10];
                    }
                    if (clusters[cluster].length <= clusterSize[cluster]) {
                        clusters[cluster] = Arrays.copyOf(clusters[cluster], (int) (clusters[cluster].length * 1.5));
                    }
                    clusters[cluster][clusterSize[cluster]++] = i;
                }
                // Generate real cluster size statistics
                for (int j = 0; j < clusterSize.length; j++) {
                    final int size = clusterSize[j];
                    if (size == 0)
                        continue;
                    statsSize.add(size);
                    if (size == 1) {
                        statsRadius.add(0);
                        continue;
                    }
                    // Find centre of cluster and add the distance to each point
                    double[] com = new double[2];
                    for (int n = 0; n < size; n++) {
                        double[] xy = xyz.get(clusters[j][n]);
                        for (int k = 0; k < 2; k++) com[k] += xy[k];
                    }
                    for (int k = 0; k < 2; k++) com[k] /= size;
                    for (int n = 0; n < size; n++) {
                        double dx = xyz.get(clusters[j][n])[0] - com[0];
                        double dy = xyz.get(clusters[j][n])[1] - com[1];
                        statsRadius.add(Math.sqrt(dx * dx + dy * dy));
                    }
                }
            }
            if (showClusterMask) {
                bp = new ByteProcessor(maskSize, maskSize);
                for (int i = 0; i < mask.length; i++) if (mask[i] != 0)
                    bp.set(i, 128);
                Utils.display(maskTitle, bp);
            }
        }
        // Use the simulated cluster centres to create clusters of the desired size
        if (clusterSimulation == 1 || clusterSimulation == 2) {
            for (double[] clusterCentre : clusterCentres) {
                int clusterN = (int) Math.round((clusterNumberSD > 0) ? dataGenerator.nextGaussian(clusterNumber, clusterNumberSD) : clusterNumber);
                if (clusterN < 1)
                    continue;
                //double[] clusterCentre = dist.next();
                if (clusterN == 1) {
                    // No need for a cluster around a point
                    xyz.add(clusterCentre);
                    statsRadius.add(0);
                    statsSize.add(1);
                } else {
                    // Generate N random points within a circle of the chosen cluster radius.
                    // Locate the centre-of-mass and the average distance to the centre.
                    double[] com = new double[3];
                    int j = 0;
                    while (j < clusterN) {
                        // Generate a random point within a circle uniformly
                        // http://stackoverflow.com/questions/5837572/generate-a-random-point-within-a-circle-uniformly
                        double t = 2.0 * Math.PI * randomGenerator.nextDouble();
                        double u = randomGenerator.nextDouble() + randomGenerator.nextDouble();
                        double r = clusterRadius * ((u > 1) ? 2 - u : u);
                        double x = r * Math.cos(t);
                        double y = r * Math.sin(t);
                        double[] xy = new double[] { clusterCentre[0] + x, clusterCentre[1] + y };
                        xyz.add(xy);
                        for (int k = 0; k < 2; k++) com[k] += xy[k];
                        j++;
                    }
                    // Add the distance of the points from the centre of the cluster.
                    // Note this does not account for the movement due to precision.
                    statsSize.add(j);
                    if (j == 1) {
                        statsRadius.add(0);
                    } else {
                        for (int k = 0; k < 2; k++) com[k] /= j;
                        while (j > 0) {
                            double dx = xyz.get(xyz.size() - j)[0] - com[0];
                            double dy = xyz.get(xyz.size() - j)[1] - com[1];
                            statsRadius.add(Math.sqrt(dx * dx + dy * dy));
                            j--;
                        }
                    }
                }
            }
        }
    } else {
        // Random distribution
        for (int i = 0; i < nMolecules; i++) xyz.add(dist.next());
    }
    // The Gaussian sigma should be applied so the overall distance from the centre
    // ( sqrt(x^2+y^2) ) has a standard deviation of sigmaS?
    final double sigma1D = sigmaS / Math.sqrt(2);
    // Show optional histograms
    StoredDataStatistics intraDistances = null;
    StoredData blinks = null;
    if (showHistograms) {
        int capacity = (int) (xyz.size() * blinkingRate);
        intraDistances = new StoredDataStatistics(capacity);
        blinks = new StoredData(capacity);
    }
    Statistics statsSigma = new Statistics();
    for (int i = 0; i < xyz.size(); i++) {
        int nOccurrences = getBlinks(dataGenerator, blinkingRate);
        if (showHistograms)
            blinks.add(nOccurrences);
        final int size = molecules.size();
        // Get coordinates in nm
        final double[] moleculeXyz = xyz.get(i);
        if (bp != null && nOccurrences > 0) {
            bp.putPixel((int) Math.round(moleculeXyz[0] / maskScale), (int) Math.round(moleculeXyz[1] / maskScale), 255);
        }
        while (nOccurrences-- > 0) {
            final double[] localisationXy = Arrays.copyOf(moleculeXyz, 2);
            // Add random precision
            if (sigma1D > 0) {
                final double dx = dataGenerator.nextGaussian(0, sigma1D);
                final double dy = dataGenerator.nextGaussian(0, sigma1D);
                localisationXy[0] += dx;
                localisationXy[1] += dy;
                if (!dist.isWithinXY(localisationXy))
                    continue;
                // Calculate mean-squared displacement
                statsSigma.add(dx * dx + dy * dy);
            }
            final double x = localisationXy[0];
            final double y = localisationXy[1];
            molecules.add(new Molecule(x, y, i, 1));
            // Store in pixels
            float[] params = new float[7];
            params[Gaussian2DFunction.X_POSITION] = (float) (x / nmPerPixel);
            params[Gaussian2DFunction.Y_POSITION] = (float) (y / nmPerPixel);
            results.addf(i + 1, (int) x, (int) y, 0, 0, 0, params, null);
        }
        if (molecules.size() > size) {
            count++;
            if (showHistograms) {
                int newCount = molecules.size() - size;
                if (newCount == 1) {
                    //intraDistances.add(0);
                    continue;
                }
                // Get the distance matrix between these molecules
                double[][] matrix = new double[newCount][newCount];
                for (int ii = size, x = 0; ii < molecules.size(); ii++, x++) {
                    for (int jj = size + 1, y = 1; jj < molecules.size(); jj++, y++) {
                        final double d2 = molecules.get(ii).distance2(molecules.get(jj));
                        matrix[x][y] = matrix[y][x] = d2;
                    }
                }
                // Get the maximum distance for particle linkage clustering of this molecule
                double max = 0;
                for (int x = 0; x < newCount; x++) {
                    // Compare to all-other molecules and get the minimum distance 
                    // needed to join at least one
                    double linkDistance = Double.POSITIVE_INFINITY;
                    for (int y = 0; y < newCount; y++) {
                        if (x == y)
                            continue;
                        if (matrix[x][y] < linkDistance)
                            linkDistance = matrix[x][y];
                    }
                    // Check if this is larger 
                    if (max < linkDistance)
                        max = linkDistance;
                }
                intraDistances.add(Math.sqrt(max));
            }
        }
    }
    results.end();
    if (bp != null)
        Utils.display(maskTitle, bp);
    // Used for debugging
    //System.out.printf("  * Molecules = %d (%d activated)\n", xyz.size(), count);
    //if (clusterSimulation > 0)
    //	System.out.printf("  * Cluster number = %s +/- %s. Radius = %s +/- %s\n",
    //			Utils.rounded(statsSize.getMean(), 4), Utils.rounded(statsSize.getStandardDeviation(), 4),
    //			Utils.rounded(statsRadius.getMean(), 4), Utils.rounded(statsRadius.getStandardDeviation(), 4));
    log("Simulation results");
    log("  * Molecules = %d (%d activated)", xyz.size(), count);
    log("  * Blinking rate = %s", Utils.rounded((double) molecules.size() / xyz.size(), 4));
    log("  * Precision (Mean-displacement) = %s nm", (statsSigma.getN() > 0) ? Utils.rounded(Math.sqrt(statsSigma.getMean()), 4) : "0");
    if (showHistograms) {
        if (intraDistances.getN() == 0) {
            log("  * Mean Intra-Molecule particle linkage distance = 0 nm");
            log("  * Fraction of inter-molecule particle linkage @ 0 nm = 0 %%");
        } else {
            plot(blinks, "Blinks/Molecule", true);
            double[][] intraHist = plot(intraDistances, "Intra-molecule particle linkage distance", false);
            // Determine 95th and 99th percentile
            int p99 = intraHist[0].length - 1;
            double limit1 = 0.99 * intraHist[1][p99];
            double limit2 = 0.95 * intraHist[1][p99];
            while (intraHist[1][p99] > limit1 && p99 > 0) p99--;
            int p95 = p99;
            while (intraHist[1][p95] > limit2 && p95 > 0) p95--;
            log("  * Mean Intra-Molecule particle linkage distance = %s nm (95%% = %s, 99%% = %s, 100%% = %s)", Utils.rounded(intraDistances.getMean(), 4), Utils.rounded(intraHist[0][p95], 4), Utils.rounded(intraHist[0][p99], 4), Utils.rounded(intraHist[0][intraHist[0].length - 1], 4));
            if (distanceAnalysis) {
                performDistanceAnalysis(intraHist, p99);
            }
        }
    }
    if (clusterSimulation > 0) {
        log("  * Cluster number = %s +/- %s", Utils.rounded(statsSize.getMean(), 4), Utils.rounded(statsSize.getStandardDeviation(), 4));
        log("  * Cluster radius = %s +/- %s nm (mean distance to centre-of-mass)", Utils.rounded(statsRadius.getMean(), 4), Utils.rounded(statsRadius.getStandardDeviation(), 4));
    }
}
Also used : ByteProcessor(ij.process.ByteProcessor) TDoubleArrayList(gnu.trove.list.array.TDoubleArrayList) ArrayList(java.util.ArrayList) MaskDistribution(gdsc.smlm.model.MaskDistribution) Well19937c(org.apache.commons.math3.random.Well19937c) RandomGenerator(org.apache.commons.math3.random.RandomGenerator) MemoryPeakResults(gdsc.smlm.results.MemoryPeakResults) NullSource(gdsc.smlm.results.NullSource) RandomDataGenerator(org.apache.commons.math3.random.RandomDataGenerator) UniformDistribution(gdsc.smlm.model.UniformDistribution) StoredDataStatistics(gdsc.core.utils.StoredDataStatistics) Statistics(gdsc.core.utils.Statistics) StoredDataStatistics(gdsc.core.utils.StoredDataStatistics) DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) WeightedObservedPoint(org.apache.commons.math3.fitting.WeightedObservedPoint) ClusterPoint(gdsc.core.clustering.ClusterPoint) StoredData(gdsc.core.utils.StoredData)

Example 14 with Mean

use of org.apache.commons.math3.stat.descriptive.moment.Mean in project GDSC-SMLM by aherbert.

the class PCPALMClusters method fitBinomial.

/**
	 * Fit a zero-truncated Binomial to the cumulative histogram
	 * 
	 * @param histogramData
	 * @return
	 */
private double[] fitBinomial(HistogramData histogramData) {
    // Get the mean and sum of the input histogram
    double mean;
    double sum = 0;
    count = 0;
    for (int i = 0; i < histogramData.histogram[1].length; i++) {
        count += histogramData.histogram[1][i];
        sum += histogramData.histogram[1][i] * i;
    }
    mean = sum / count;
    String name = "Zero-truncated Binomial distribution";
    Utils.log("Mean cluster size = %s", Utils.rounded(mean));
    Utils.log("Fitting cumulative " + name);
    // Convert to a normalised double array for the binomial fitter
    double[] histogram = new double[histogramData.histogram[1].length];
    for (int i = 0; i < histogramData.histogram[1].length; i++) histogram[i] = histogramData.histogram[1][i] / count;
    // Plot the cumulative histogram
    String title = TITLE + " Cumulative Distribution";
    Plot2 plot = null;
    if (showCumulativeHistogram) {
        // Create a cumulative histogram for fitting
        double[] cumulativeHistogram = new double[histogram.length];
        sum = 0;
        for (int i = 0; i < histogram.length; i++) {
            sum += histogram[i];
            cumulativeHistogram[i] = sum;
        }
        double[] values = Utils.newArray(histogram.length, 0.0, 1.0);
        plot = new Plot2(title, "N", "Cumulative Probability", values, cumulativeHistogram);
        plot.setLimits(0, histogram.length - 1, 0, 1.05);
        plot.addPoints(values, cumulativeHistogram, Plot2.CIRCLE);
        Utils.display(title, plot);
    }
    // Do fitting for different N
    double bestSS = Double.POSITIVE_INFINITY;
    double[] parameters = null;
    int worse = 0;
    int N = histogram.length - 1;
    int min = minN;
    final boolean customRange = (minN > 1) || (maxN > 0);
    if (min > N)
        min = N;
    if (maxN > 0 && N > maxN)
        N = maxN;
    Utils.log("Fitting N from %d to %d%s", min, N, (customRange) ? " (custom-range)" : "");
    // Since varying the N should be done in integer steps do this
    // for n=1,2,3,... until the SS peaks then falls off (is worse then the best 
    // score several times in succession)
    BinomialFitter bf = new BinomialFitter(new IJLogger());
    bf.setMaximumLikelihood(maximumLikelihood);
    for (int n = min; n <= N; n++) {
        PointValuePair solution = bf.fitBinomial(histogram, mean, n, true);
        if (solution == null)
            continue;
        double p = solution.getPointRef()[0];
        Utils.log("Fitted %s : N=%d, p=%s. SS=%g", name, n, Utils.rounded(p), solution.getValue());
        if (bestSS > solution.getValue()) {
            bestSS = solution.getValue();
            parameters = new double[] { n, p };
            worse = 0;
        } else if (bestSS < Double.POSITIVE_INFINITY) {
            if (++worse >= 3)
                break;
        }
        if (showCumulativeHistogram)
            addToPlot(n, p, title, plot, new Color((float) n / N, 0, 1f - (float) n / N));
    }
    // Add best it in magenta
    if (showCumulativeHistogram && parameters != null)
        addToPlot((int) parameters[0], parameters[1], title, plot, Color.magenta);
    return parameters;
}
Also used : Color(java.awt.Color) BinomialFitter(gdsc.smlm.fitting.BinomialFitter) Plot2(ij.gui.Plot2) ClusterPoint(gdsc.core.clustering.ClusterPoint) IJLogger(gdsc.core.ij.IJLogger) PointValuePair(org.apache.commons.math3.optim.PointValuePair)

Example 15 with Mean

use of org.apache.commons.math3.stat.descriptive.moment.Mean in project gatk by broadinstitute.

the class ReCapSegCallerUnitTest method testMakeCalls.

@Test
public void testMakeCalls() {
    final List<Target> targets = new ArrayList<>();
    final List<String> columnNames = Arrays.asList("Sample");
    final List<Double> coverage = new ArrayList<>();
    //add amplification targets
    for (int i = 0; i < 10; i++) {
        final SimpleInterval interval = new SimpleInterval("chr", 100 + 2 * i, 101 + 2 * i);
        targets.add(new Target(interval));
        coverage.add(ParamUtils.log2(2.0));
    }
    //add deletion targets
    for (int i = 0; i < 10; i++) {
        final SimpleInterval interval = new SimpleInterval("chr", 300 + 2 * i, 301 + 2 * i);
        targets.add(new Target(interval));
        coverage.add(ParamUtils.log2(0.5));
    }
    //add targets that don't belong to a segment
    for (int i = 1; i < 10; i++) {
        final SimpleInterval interval = new SimpleInterval("chr", 400 + 2 * i, 401 + 2 * i);
        targets.add(new Target(interval));
        coverage.add(ParamUtils.log2(1.0));
    }
    //add obviously neutral targets with some small spread
    for (int i = -5; i < 6; i++) {
        final SimpleInterval interval = new SimpleInterval("chr", 500 + 2 * i, 501 + 2 * i);
        targets.add(new Target(interval));
        coverage.add(ParamUtils.log2(0.01 * i + 1));
    }
    //add spread-out targets to a neutral segment (mean near zero)
    for (int i = -5; i < 6; i++) {
        final SimpleInterval interval = new SimpleInterval("chr", 700 + 2 * i, 701 + 2 * i);
        targets.add(new Target(interval));
        coverage.add(ParamUtils.log2(0.1 * i + 1));
    }
    final RealMatrix coverageMatrix = new Array2DRowRealMatrix(targets.size(), 1);
    coverageMatrix.setColumn(0, coverage.stream().mapToDouble(x -> x).toArray());
    final int n = targets.size();
    final int m = coverageMatrix.getRowDimension();
    final ReadCountCollection counts = new ReadCountCollection(targets, columnNames, coverageMatrix);
    List<ModeledSegment> segments = new ArrayList<>();
    //amplification
    segments.add(new ModeledSegment(new SimpleInterval("chr", 100, 200), 100, ParamUtils.log2(2.0)));
    //deletion
    segments.add(new ModeledSegment(new SimpleInterval("chr", 300, 400), 100, ParamUtils.log2(0.5)));
    //neutral
    segments.add(new ModeledSegment(new SimpleInterval("chr", 450, 550), 100, ParamUtils.log2(1)));
    //neutral
    segments.add(new ModeledSegment(new SimpleInterval("chr", 650, 750), 100, ParamUtils.log2(1)));
    List<ModeledSegment> calls = ReCapSegCaller.makeCalls(counts, segments);
    Assert.assertEquals(calls.get(0).getCall(), ReCapSegCaller.AMPLIFICATION_CALL);
    Assert.assertEquals(calls.get(1).getCall(), ReCapSegCaller.DELETION_CALL);
    Assert.assertEquals(calls.get(2).getCall(), ReCapSegCaller.NEUTRAL_CALL);
    Assert.assertEquals(calls.get(3).getCall(), ReCapSegCaller.NEUTRAL_CALL);
}
Also used : ArrayList(java.util.ArrayList) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

Test (org.testng.annotations.Test)27 Mean (org.apache.commons.math3.stat.descriptive.moment.Mean)23 List (java.util.List)17 RandomGenerator (org.apache.commons.math3.random.RandomGenerator)16 RealMatrix (org.apache.commons.math3.linear.RealMatrix)14 ArrayList (java.util.ArrayList)12 Collectors (java.util.stream.Collectors)12 StandardDeviation (org.apache.commons.math3.stat.descriptive.moment.StandardDeviation)12 Utils (org.broadinstitute.hellbender.utils.Utils)12 StoredDataStatistics (gdsc.core.utils.StoredDataStatistics)10 Arrays (java.util.Arrays)10 IntStream (java.util.stream.IntStream)10 NormalDistribution (org.apache.commons.math3.distribution.NormalDistribution)10 WeightedObservedPoint (org.apache.commons.math3.fitting.WeightedObservedPoint)10 Logger (org.apache.logging.log4j.Logger)10 ReadCountCollection (org.broadinstitute.hellbender.tools.exome.ReadCountCollection)10 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)10 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)10 Function (java.util.function.Function)9 DescriptiveStatistics (org.apache.commons.math3.stat.descriptive.DescriptiveStatistics)9