Search in sources :

Example 21 with Percentile

use of org.apache.commons.math3.stat.descriptive.rank.Percentile in project GDSC-SMLM by aherbert.

the class DoubletAnalysis method summariseResults.

/**
 * Summarise results.
 *
 * @param results the results
 * @param density the density
 * @param runTime the run time
 */
private void summariseResults(ArrayList<DoubletResult> results, double density, long runTime) {
    // If we are only assessing results with no neighbour candidates
    // TODO - Count the number of actual results that have no neighbours
    final int numberOfMolecules = this.results.size() - ignored.get();
    final FitConfiguration fitConfig = config.getFitConfiguration();
    // Store details we want in the analysis table
    final StringBuilder sb = new StringBuilder();
    sb.append(MathUtils.rounded(density)).append('\t');
    sb.append(MathUtils.rounded(getSa())).append('\t');
    sb.append(config.getFittingWidth()).append('\t');
    sb.append(PsfProtosHelper.getName(fitConfig.getPsfType()));
    sb.append(":").append(PeakFit.getSolverName(fitConfig));
    if (fitConfig.isModelCameraMle()) {
        sb.append(":Camera\t");
        // Add details of the noise model for the MLE
        final CalibrationReader r = new CalibrationReader(fitConfig.getCalibration());
        sb.append("EM=").append(r.isEmCcd());
        sb.append(":A=").append(MathUtils.rounded(r.getCountPerElectron()));
        sb.append(":N=").append(MathUtils.rounded(r.getReadNoise()));
        sb.append('\t');
    } else {
        sb.append("\t\t");
    }
    final String analysisPrefix = sb.toString();
    // -=-=-=-=-
    showResults(results, settings.showResults);
    sb.setLength(0);
    final int n = countN(results);
    // Create the benchmark settings and the fitting settings
    sb.append(numberOfMolecules).append('\t');
    sb.append(n).append('\t');
    sb.append(MathUtils.rounded(density)).append('\t');
    sb.append(MathUtils.rounded(simulationParameters.minSignal)).append('\t');
    sb.append(MathUtils.rounded(simulationParameters.maxSignal)).append('\t');
    sb.append(MathUtils.rounded(simulationParameters.averageSignal)).append('\t');
    sb.append(MathUtils.rounded(simulationParameters.sd)).append('\t');
    sb.append(MathUtils.rounded(simulationParameters.pixelPitch)).append('\t');
    sb.append(MathUtils.rounded(getSa() * simulationParameters.pixelPitch)).append('\t');
    sb.append(MathUtils.rounded(simulationParameters.gain)).append('\t');
    sb.append(MathUtils.rounded(simulationParameters.readNoise)).append('\t');
    sb.append(MathUtils.rounded(simulationParameters.background)).append('\t');
    sb.append(MathUtils.rounded(simulationParameters.noise)).append('\t');
    sb.append(MathUtils.rounded(simulationParameters.averageSignal / simulationParameters.noise)).append('\t');
    sb.append(config.getFittingWidth()).append('\t');
    sb.append(PsfProtosHelper.getName(fitConfig.getPsfType()));
    sb.append(":").append(PeakFit.getSolverName(fitConfig));
    if (fitConfig.isModelCameraMle()) {
        sb.append(":Camera\t");
        // Add details of the noise model for the MLE
        final CalibrationReader r = new CalibrationReader(fitConfig.getCalibration());
        sb.append("EM=").append(r.isEmCcd());
        sb.append(":A=").append(MathUtils.rounded(r.getCountPerElectron()));
        sb.append(":N=").append(MathUtils.rounded(r.getReadNoise()));
        sb.append('\t');
    } else {
        sb.append("\t\t");
    }
    // Now output the actual results ...
    // Show histograms as cumulative to avoid problems with bin width
    // Residuals scores
    // Iterations and evaluations where fit was OK
    final StoredDataStatistics[] stats = new StoredDataStatistics[Settings.NAMES2.length];
    for (int i = 0; i < stats.length; i++) {
        stats[i] = new StoredDataStatistics();
    }
    // For Jaccard scoring we need to count the score with no residuals threshold,
    // i.e. Accumulate the score accepting all doublets that were fit
    double tp = 0;
    double fp = 0;
    double bestTp = 0;
    double bestFp = 0;
    final ArrayList<DoubletBonus> data = new ArrayList<>(results.size());
    for (final DoubletResult result : results) {
        final double score = result.getMaxScore();
        // Filter the singles that would be accepted
        if (result.good1) {
            // Filter the doublets that would be accepted
            if (result.good2) {
                final double tp2 = result.tp2a + result.tp2b;
                final double fp2 = result.fp2a + result.fp2b;
                tp += tp2;
                fp += fp2;
                if (result.tp2a > 0.5) {
                    bestTp += result.tp2a;
                    bestFp += result.fp2a;
                }
                if (result.tp2b > 0.5) {
                    bestTp += result.tp2b;
                    bestFp += result.fp2b;
                }
                // Store this as a doublet bonus
                data.add(new DoubletBonus(score, result.getAvScore(), tp2 - result.tp1, fp2 - result.fp1));
            } else {
                // No doublet fit so this will always be the single fit result
                tp += result.tp1;
                fp += result.fp1;
                if (result.tp1 > 0.5) {
                    bestTp += result.tp1;
                    bestFp += result.fp1;
                }
            }
        }
        // Build statistics
        final int c = result.matchClass;
        // True results, i.e. where there was a choice between single or doublet
        if (result.valid) {
            stats[c].add(score);
        }
        // Of those where the fit was good, summarise the iterations and evaluations
        if (result.good1) {
            stats[3].add(result.iter1);
            stats[4].add(result.eval1);
            // about the iteration increase for singles that are not doublets.
            if (c != 0 && result.good2) {
                stats[5].add(result.iter2);
                stats[6].add(result.eval2);
            }
        }
    }
    // Debug the counts
    // double tpSingle = 0;
    // double fpSingle = 0;
    // double tpDoublet = 0;
    // double fpDoublet = 0;
    // int nSingle = 0, nDoublet = 0;
    // for (DoubletResult result : results) {
    // if (result.good1) {
    // if (result.good2) {
    // tpDoublet += result.tp2a + result.tp2b;
    // fpDoublet += result.fp2a + result.fp2b;
    // nDoublet++;
    // }
    // tpSingle += result.tp1;
    // fpSingle += result.fp1;
    // nSingle++;
    // }
    // }
    // System.out.printf("Single %.1f,%.1f (%d) : Doublet %.1f,%.1f (%d)\n", tpSingle, fpSingle,
    // nSingle, tpDoublet, fpDoublet, nDoublet * 2);
    // Summarise score for true results
    final Percentile p = new Percentile(99);
    for (int c = 0; c < stats.length; c++) {
        final double[] values = stats[c].getValues();
        // Sorting is need for the percentile and the cumulative histogram so do it once
        Arrays.sort(values);
        sb.append(MathUtils.rounded(stats[c].getMean())).append("+/-").append(MathUtils.rounded(stats[c].getStandardDeviation())).append(" (").append(stats[c].getN()).append(") ").append(MathUtils.rounded(p.evaluate(values))).append('\t');
        if (settings.showHistograms && settings.displayHistograms[c + Settings.NAMES.length]) {
            showCumulativeHistogram(values, Settings.NAMES2[c]);
        }
    }
    sb.append(Settings.MATCHING_METHODS[settings.matchingMethod]).append('\t');
    // Plot a graph of the additional results we would fit at all score thresholds.
    // This assumes we just pick the the doublet if we fit it (NO FILTERING at all!)
    // Initialise the score for residuals 0
    // Store this as it serves as a baseline for the filtering analysis
    final ResidualsScore residualsScoreMax = computeScores(data, tp, fp, numberOfMolecules, true);
    final ResidualsScore residualsScoreAv = computeScores(data, tp, fp, numberOfMolecules, false);
    residualsScore = (settings.useMaxResiduals) ? residualsScoreMax : residualsScoreAv;
    if (settings.showJaccardPlot) {
        plotJaccard(residualsScore, null);
    }
    final String bestJaccard = MathUtils.rounded(bestTp / (bestFp + numberOfMolecules)) + '\t';
    final String analysisPrefix2 = analysisPrefix + bestJaccard;
    sb.append(bestJaccard);
    addJaccardScores(sb);
    sb.append('\t').append(TextUtils.nanosToString(runTime));
    createSummaryTable().append(sb.toString());
    // Store results in memory for later analysis
    referenceResults.set(new ReferenceResults(results, residualsScoreMax, residualsScoreAv, numberOfMolecules, analysisPrefix2));
}
Also used : Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) StoredDataStatistics(uk.ac.sussex.gdsc.core.utils.StoredDataStatistics) ArrayList(java.util.ArrayList) CalibrationReader(uk.ac.sussex.gdsc.smlm.data.config.CalibrationReader) PeakResultPoint(uk.ac.sussex.gdsc.smlm.results.PeakResultPoint) BasePoint(uk.ac.sussex.gdsc.core.match.BasePoint) FitConfiguration(uk.ac.sussex.gdsc.smlm.engine.FitConfiguration)

Example 22 with Percentile

use of org.apache.commons.math3.stat.descriptive.rank.Percentile in project GDSC-SMLM by aherbert.

the class DataEstimator method getPercentile.

/**
 * Get the percentile value of the data.
 *
 * @param percentile The percentile
 * @return the percentile value
 */
public float getPercentile(double percentile) {
    // Check the input
    if (percentile <= 0) {
        percentile = Double.MIN_NORMAL;
    }
    if (percentile > 100) {
        percentile = 100;
    }
    // The data should not have NaN so we ignore them for speed.
    final Percentile p = new Percentile(percentile).withNaNStrategy(NaNStrategy.FIXED);
    final int size = width * height;
    final double[] values = new double[size];
    for (int i = 0; i < size; i++) {
        values[i] = data[i];
    }
    return (float) p.evaluate(values);
}
Also used : Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile)

Example 23 with Percentile

use of org.apache.commons.math3.stat.descriptive.rank.Percentile in project metron by apache.

the class HLLPMeasurement method main.

public static void main(String[] args) {
    Options options = new Options();
    try {
        CommandLineParser parser = new PosixParser();
        CommandLine cmd = null;
        try {
            cmd = ParserOptions.parse(parser, args);
        } catch (ParseException pe) {
            pe.printStackTrace();
            final HelpFormatter usageFormatter = new HelpFormatter();
            usageFormatter.printHelp("HLLPMeasurement", null, options, null, true);
            System.exit(-1);
        }
        if (cmd.hasOption("h")) {
            final HelpFormatter usageFormatter = new HelpFormatter();
            usageFormatter.printHelp("HLLPMeasurement", null, options, null, true);
            System.exit(0);
        }
        final String chartDelim = ParserOptions.CHART_DELIM.get(cmd, "|");
        final int numTrials = Integer.parseInt(ParserOptions.NUM_TRIALS.get(cmd, "5000"));
        final int cardMin = Integer.parseInt(ParserOptions.CARD_MIN.get(cmd, "200"));
        final int cardMax = Integer.parseInt(ParserOptions.CARD_MAX.get(cmd, "1000"));
        final int cardStep = Integer.parseInt(ParserOptions.CARD_STEP.get(cmd, "200"));
        final int cardStart = (((cardMin - 1) / cardStep) * cardStep) + cardStep;
        final int spMin = Integer.parseInt(ParserOptions.SP_MIN.get(cmd, "4"));
        final int spMax = Integer.parseInt(ParserOptions.SP_MAX.get(cmd, "32"));
        final int spStep = Integer.parseInt(ParserOptions.SP_STEP.get(cmd, "4"));
        final int pMin = Integer.parseInt(ParserOptions.P_MIN.get(cmd, "4"));
        final int pMax = Integer.parseInt(ParserOptions.P_MAX.get(cmd, "32"));
        final int pStep = Integer.parseInt(ParserOptions.P_STEP.get(cmd, "4"));
        final double errorPercentile = Double.parseDouble(ParserOptions.ERR_PERCENTILE.get(cmd, "50"));
        final double timePercentile = Double.parseDouble(ParserOptions.TIME_PERCENTILE.get(cmd, "50"));
        final double sizePercentile = Double.parseDouble(ParserOptions.SIZE_PERCENTILE.get(cmd, "50"));
        final boolean formatErrPercent = Boolean.parseBoolean(ParserOptions.ERR_FORMAT_PERCENT.get(cmd, "true"));
        final int errMultiplier = formatErrPercent ? 100 : 1;
        final Function<Double, String> errorFormatter = (v -> ERR_FORMAT.format(v * errMultiplier));
        final Function<Double, String> timeFormatter = (v -> TIME_FORMAT.format(v / NANO_TO_MILLIS));
        final Function<Double, String> sizeFormatter = (v -> SIZE_FORMAT.format(v));
        final String[] chartKey = new String[] { "card: cardinality", "sp: sparse precision value", "p: normal precision value", "err: error as a percent of the expected cardinality; ", "time: total time to add all values to the hllp estimator and calculate a cardinality estimate", "size: size of the hllp set in bytes once all values have been added for the specified cardinality", "l=low, m=mid(based on percentile chosen), h=high, std=standard deviation" };
        final String[] chartHeader = new String[] { "card", "sp", "p", "err l/m/h/std (% of actual)", "time l/m/h/std (ms)", "size l/m/h/std (b)" };
        final int[] chartPadding = new int[] { 10, 10, 10, 40, 40, 30 };
        if (spMin < pMin) {
            throw new IllegalArgumentException("p must be <= sp");
        }
        if (spMax < pMax) {
            throw new IllegalArgumentException("p must be <= sp");
        }
        println("Options Used");
        println("------------");
        println("num trials: " + numTrials);
        println("card min: " + cardMin);
        println("card max: " + cardMax);
        println("card step: " + cardStep);
        println("card start: " + cardStart);
        println("sp min: " + spMin);
        println("sp max: " + spMax);
        println("sp step: " + spStep);
        println("p min: " + pMin);
        println("p max: " + pMax);
        println("p step: " + pStep);
        println("error percentile: " + errorPercentile);
        println("time percentile: " + timePercentile);
        println("size percentile: " + sizePercentile);
        println("format err as %: " + formatErrPercent);
        println("");
        printHeading(chartKey, chartHeader, chartPadding, chartDelim);
        for (int c = cardStart; c <= cardMax; c += cardStep) {
            for (int sp = spMin; sp <= spMax; sp += spStep) {
                for (int p = pMin; p <= pMax; p += pStep) {
                    DescriptiveStatistics errorStats = new DescriptiveStatistics();
                    DescriptiveStatistics timeStats = new DescriptiveStatistics();
                    DescriptiveStatistics sizeStats = new DescriptiveStatistics();
                    for (int i = 0; i < numTrials; i++) {
                        List<Object> trialSet = buildTrialSet(c);
                        Set unique = new HashSet();
                        unique.addAll(trialSet);
                        long distinctVals = unique.size();
                        HyperLogLogPlus hllp = new HyperLogLogPlus(p, sp);
                        long timeStart = System.nanoTime();
                        hllp.addAll(trialSet);
                        long dvEstimate = hllp.cardinality();
                        long timeEnd = System.nanoTime();
                        long timeElapsed = timeEnd - timeStart;
                        double rawError = Math.abs(dvEstimate - distinctVals) / (double) distinctVals;
                        errorStats.addValue(rawError);
                        timeStats.addValue(timeElapsed);
                        sizeStats.addValue(SerDeUtils.toBytes(hllp).length);
                    }
                    MeasureResultFormatter errorRF = new MeasureResultFormatter(errorStats, errorFormatter, errorPercentile);
                    MeasureResultFormatter timeRF = new MeasureResultFormatter(timeStats, timeFormatter, timePercentile);
                    MeasureResultFormatter sizeRF = new MeasureResultFormatter(sizeStats, sizeFormatter, sizePercentile);
                    println(formatWithPadding(new String[] { "" + c, "" + sp, "" + p, errorRF.getFormattedResults(), timeRF.getFormattedResults(), sizeRF.getFormattedResults() }, chartPadding, chartDelim));
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
    }
}
Also used : DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics)

Example 24 with Percentile

use of org.apache.commons.math3.stat.descriptive.rank.Percentile in project GDSC-SMLM by aherbert.

the class BenchmarkSpotFit method showDoubleHistogram.

private double[] showDoubleHistogram(StoredDataStatistics[][] stats, final int i, WindowOrganiser wo, double[][] matchScores, double nPredicted) {
    String xLabel = filterCriteria[i].name;
    LowerLimit lower = filterCriteria[i].lower;
    UpperLimit upper = filterCriteria[i].upper;
    double[] j = null;
    double[] metric = null;
    double maxJ = 0;
    if (i <= FILTER_PRECISION && (showFilterScoreHistograms || upper.requiresJaccard || lower.requiresJaccard)) {
        // Jaccard score verses the range of the metric
        Arrays.sort(matchScores, new Comparator<double[]>() {

            public int compare(double[] o1, double[] o2) {
                if (o1[i] < o2[i])
                    return -1;
                if (o1[i] > o2[i])
                    return 1;
                return 0;
            }
        });
        final int scoreIndex = FILTER_PRECISION + 1;
        int n = results.size();
        double tp = 0;
        double fp = 0;
        j = new double[matchScores.length + 1];
        metric = new double[j.length];
        for (int k = 0; k < matchScores.length; k++) {
            final double score = matchScores[k][scoreIndex];
            tp += score;
            fp += (1 - score);
            j[k + 1] = tp / (fp + n);
            metric[k + 1] = matchScores[k][i];
        }
        metric[0] = metric[1];
        maxJ = Maths.max(j);
        if (showFilterScoreHistograms) {
            String title = TITLE + " Jaccard " + xLabel;
            Plot plot = new Plot(title, xLabel, "Jaccard", metric, j);
            // Remove outliers
            double[] limitsx = Maths.limits(metric);
            Percentile p = new Percentile();
            double l = p.evaluate(metric, 25);
            double u = p.evaluate(metric, 75);
            double iqr = 1.5 * (u - l);
            limitsx[1] = Math.min(limitsx[1], u + iqr);
            plot.setLimits(limitsx[0], limitsx[1], 0, Maths.max(j));
            PlotWindow pw = Utils.display(title, plot);
            if (Utils.isNewWindow())
                wo.add(pw);
        }
    }
    // [0] is all
    // [1] is matches
    // [2] is no match
    StoredDataStatistics s1 = stats[0][i];
    StoredDataStatistics s2 = stats[1][i];
    StoredDataStatistics s3 = stats[2][i];
    if (s1.getN() == 0)
        return new double[4];
    DescriptiveStatistics d = s1.getStatistics();
    double median = 0;
    Plot2 plot = null;
    String title = null;
    if (showFilterScoreHistograms) {
        median = d.getPercentile(50);
        String label = String.format("n = %d. Median = %s nm", s1.getN(), Utils.rounded(median));
        int id = Utils.showHistogram(TITLE, s1, xLabel, filterCriteria[i].minBinWidth, (filterCriteria[i].restrictRange) ? 1 : 0, 0, label);
        if (id == 0) {
            IJ.log("Failed to show the histogram: " + xLabel);
            return new double[4];
        }
        if (Utils.isNewWindow())
            wo.add(id);
        title = WindowManager.getImage(id).getTitle();
        // Reverse engineer the histogram settings
        plot = Utils.plot;
        double[] xValues = Utils.xValues;
        int bins = xValues.length;
        double yMin = xValues[0];
        double binSize = xValues[1] - xValues[0];
        double yMax = xValues[0] + (bins - 1) * binSize;
        if (s2.getN() > 0) {
            double[] values = s2.getValues();
            double[][] hist = Utils.calcHistogram(values, yMin, yMax, bins);
            if (hist[0].length > 0) {
                plot.setColor(Color.red);
                plot.addPoints(hist[0], hist[1], Plot2.BAR);
                Utils.display(title, plot);
            }
        }
        if (s3.getN() > 0) {
            double[] values = s3.getValues();
            double[][] hist = Utils.calcHistogram(values, yMin, yMax, bins);
            if (hist[0].length > 0) {
                plot.setColor(Color.blue);
                plot.addPoints(hist[0], hist[1], Plot2.BAR);
                Utils.display(title, plot);
            }
        }
    }
    // Do cumulative histogram
    double[][] h1 = Maths.cumulativeHistogram(s1.getValues(), true);
    double[][] h2 = Maths.cumulativeHistogram(s2.getValues(), true);
    double[][] h3 = Maths.cumulativeHistogram(s3.getValues(), true);
    if (showFilterScoreHistograms) {
        title = TITLE + " Cumul " + xLabel;
        plot = new Plot2(title, xLabel, "Frequency");
        // Find limits
        double[] xlimit = Maths.limits(h1[0]);
        xlimit = Maths.limits(xlimit, h2[0]);
        xlimit = Maths.limits(xlimit, h3[0]);
        // Restrict using the inter-quartile range 
        if (filterCriteria[i].restrictRange) {
            double q1 = d.getPercentile(25);
            double q2 = d.getPercentile(75);
            double iqr = (q2 - q1) * 2.5;
            xlimit[0] = Maths.max(xlimit[0], median - iqr);
            xlimit[1] = Maths.min(xlimit[1], median + iqr);
        }
        plot.setLimits(xlimit[0], xlimit[1], 0, 1.05);
        plot.addPoints(h1[0], h1[1], Plot.LINE);
        plot.setColor(Color.red);
        plot.addPoints(h2[0], h2[1], Plot.LINE);
        plot.setColor(Color.blue);
        plot.addPoints(h3[0], h3[1], Plot.LINE);
    }
    // Determine the maximum difference between the TP and FP
    double maxx1 = 0;
    double maxx2 = 0;
    double max1 = 0;
    double max2 = 0;
    // We cannot compute the delta histogram, or use percentiles
    if (s2.getN() == 0) {
        upper = UpperLimit.ZERO;
        lower = LowerLimit.ZERO;
    }
    final boolean requireLabel = (showFilterScoreHistograms && filterCriteria[i].requireLabel);
    if (requireLabel || upper.requiresDeltaHistogram() || lower.requiresDeltaHistogram()) {
        if (s2.getN() != 0 && s3.getN() != 0) {
            LinearInterpolator li = new LinearInterpolator();
            PolynomialSplineFunction f1 = li.interpolate(h2[0], h2[1]);
            PolynomialSplineFunction f2 = li.interpolate(h3[0], h3[1]);
            for (double x : h1[0]) {
                if (x < h2[0][0] || x < h3[0][0])
                    continue;
                try {
                    double v1 = f1.value(x);
                    double v2 = f2.value(x);
                    double diff = v2 - v1;
                    if (diff > 0) {
                        if (max1 < diff) {
                            max1 = diff;
                            maxx1 = x;
                        }
                    } else {
                        if (max2 > diff) {
                            max2 = diff;
                            maxx2 = x;
                        }
                    }
                } catch (OutOfRangeException e) {
                    // Because we reached the end
                    break;
                }
            }
        } else {
            // Switch to percentiles if we have no delta histogram
            if (upper.requiresDeltaHistogram())
                upper = UpperLimit.NINETY_NINE_PERCENT;
            if (lower.requiresDeltaHistogram())
                lower = LowerLimit.ONE_PERCENT;
        }
    //			System.out.printf("Bounds %s : %s, pos %s, neg %s, %s\n", xLabel, Utils.rounded(getPercentile(h2, 0.01)),
    //					Utils.rounded(maxx1), Utils.rounded(maxx2), Utils.rounded(getPercentile(h1, 0.99)));
    }
    if (showFilterScoreHistograms) {
        // We use bins=1 on charts where we do not need a label
        if (requireLabel) {
            String label = String.format("Max+ %s @ %s, Max- %s @ %s", Utils.rounded(max1), Utils.rounded(maxx1), Utils.rounded(max2), Utils.rounded(maxx2));
            plot.setColor(Color.black);
            plot.addLabel(0, 0, label);
        }
        PlotWindow pw = Utils.display(title, plot);
        if (Utils.isNewWindow())
            wo.add(pw.getImagePlus().getID());
    }
    // Now compute the bounds using the desired limit
    double l, u;
    switch(lower) {
        case ONE_PERCENT:
            l = getPercentile(h2, 0.01);
            break;
        case MAX_NEGATIVE_CUMUL_DELTA:
            l = maxx2;
            break;
        case ZERO:
            l = 0;
            break;
        case HALF_MAX_JACCARD_VALUE:
            l = getValue(metric, j, maxJ * 0.5);
            break;
        default:
            throw new RuntimeException("Missing lower limit method");
    }
    switch(upper) {
        case MAX_POSITIVE_CUMUL_DELTA:
            u = maxx1;
            break;
        case NINETY_NINE_PERCENT:
            u = getPercentile(h2, 0.99);
            break;
        case NINETY_NINE_NINE_PERCENT:
            u = getPercentile(h2, 0.999);
            break;
        case ZERO:
            u = 0;
            break;
        case MAX_JACCARD2:
            u = getValue(metric, j, maxJ) * 2;
            //System.out.printf("MaxJ = %.4f @ %.3f\n", maxJ, u / 2);
            break;
        default:
            throw new RuntimeException("Missing upper limit method");
    }
    double min = getPercentile(h1, 0);
    double max = getPercentile(h1, 1);
    return new double[] { l, u, min, max };
}
Also used : DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Plot(ij.gui.Plot) StoredDataStatistics(gdsc.core.utils.StoredDataStatistics) PlotWindow(ij.gui.PlotWindow) Plot2(ij.gui.Plot2) PolynomialSplineFunction(org.apache.commons.math3.analysis.polynomials.PolynomialSplineFunction) PeakResultPoint(gdsc.smlm.ij.plugins.ResultsMatchCalculator.PeakResultPoint) BasePoint(gdsc.core.match.BasePoint) LinearInterpolator(org.apache.commons.math3.analysis.interpolation.LinearInterpolator) OutOfRangeException(org.apache.commons.math3.exception.OutOfRangeException)

Example 25 with Percentile

use of org.apache.commons.math3.stat.descriptive.rank.Percentile in project gatk by broadinstitute.

the class ReadCountCollectionUtils method removeColumnsWithExtremeMedianCounts.

/**
     * Creates a new read-count collection that is a copy of the input but dropping columns with extreme median coverage.
     *
     * @param readCounts the input read-counts.
     * @param extremeColumnMedianCountPercentileThreshold bottom percentile to use as an exclusion threshold.
     * @return never {@code null}. It might be a reference to the input read-counts if
     * there are no columns to be dropped.
     */
public static ReadCountCollection removeColumnsWithExtremeMedianCounts(final ReadCountCollection readCounts, final double extremeColumnMedianCountPercentileThreshold, final Logger logger) {
    final List<String> columnNames = readCounts.columnNames();
    final RealMatrix counts = readCounts.counts();
    final double[] columnMedians = MatrixSummaryUtils.getColumnMedians(counts);
    // Calculate thresholds:
    final double bottomExtremeThreshold = new Percentile(extremeColumnMedianCountPercentileThreshold).evaluate(columnMedians);
    final double topExtremeThreshold = new Percentile(100 - extremeColumnMedianCountPercentileThreshold).evaluate(columnMedians);
    // Determine kept and dropped column sets.
    final Set<String> columnsToKeep = new LinkedHashSet<>(readCounts.columnNames().size());
    final int initialMapSize = ((int) (2.0 * extremeColumnMedianCountPercentileThreshold / 100.0)) * readCounts.columnNames().size();
    final Map<String, Double> columnsToDrop = new LinkedHashMap<>(initialMapSize);
    for (int i = 0; i < columnMedians.length; i++) {
        if (columnMedians[i] >= bottomExtremeThreshold && columnMedians[i] <= topExtremeThreshold) {
            columnsToKeep.add(columnNames.get(i));
        } else {
            columnsToDrop.put(columnNames.get(i), columnMedians[i]);
        }
    }
    // log and drop columns if it applies
    if (columnsToKeep.isEmpty()) {
        throw new UserException.BadInput("No column count left after applying the extreme counts outlier filter");
    } else if (columnsToKeep.size() == columnNames.size()) {
        logger.info(String.format("No column dropped due to extreme counts outside [%.10f, %.10f]", bottomExtremeThreshold, topExtremeThreshold));
        return readCounts;
    } else {
        final double droppedPercentage = ((double) (columnsToDrop.size()) / columnNames.size()) * 100;
        logger.info(String.format("Some columns dropped (%d out of %d, %.2f%%) as they are classified as having extreme " + "median counts across targets outside [%.10f, %.10f]: e.g. %s", columnsToDrop.size(), columnNames.size(), droppedPercentage, bottomExtremeThreshold, topExtremeThreshold, columnsToDrop.entrySet().stream().limit(10).map(kv -> kv.getKey() + " (" + kv.getValue() + ")").collect(Collectors.joining(", "))));
        return readCounts.subsetColumns(columnsToKeep);
    }
}
Also used : Percentile(org.apache.commons.math3.stat.descriptive.rank.Percentile) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix)

Aggregations

Percentile (org.apache.commons.math3.stat.descriptive.rank.Percentile)31 ArrayList (java.util.ArrayList)16 RealMatrix (org.apache.commons.math3.linear.RealMatrix)16 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)14 List (java.util.List)11 Collectors (java.util.stream.Collectors)11 IntStream (java.util.stream.IntStream)11 File (java.io.File)10 DoubleStream (java.util.stream.DoubleStream)10 Median (org.apache.commons.math3.stat.descriptive.rank.Median)10 Logger (org.apache.logging.log4j.Logger)10 Test (org.testng.annotations.Test)10 Random (java.util.Random)9 Stream (java.util.stream.Stream)9 DescriptiveStatistics (org.apache.commons.math3.stat.descriptive.DescriptiveStatistics)9 Level (org.apache.logging.log4j.Level)8 Marker (org.apache.logging.log4j.Marker)8 Message (org.apache.logging.log4j.message.Message)8 AbstractLogger (org.apache.logging.log4j.spi.AbstractLogger)8 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)8