Search in sources :

Example 16 with SimpleRegression

use of org.apache.commons.math3.stat.regression.SimpleRegression in project GDSC-SMLM by aherbert.

the class BenchmarkSpotFilter method summariseResults.

private BenchmarkFilterResult summariseResults(TIntObjectHashMap<FilterResult> filterResults, FitEngineConfiguration config, MaximaSpotFilter spotFilter, boolean relativeDistances, boolean batchSummary) {
    BenchmarkFilterResult filterResult = new BenchmarkFilterResult(filterResults, config, spotFilter);
    // Note: 
    // Although we can compute the TP/FP score as each additional spot is added
    // using the RankedScoreCalculator this is not applicable to the PeakFit method.
    // The method relies on all spot candidates being present in order to make a
    // decision to fit the candidate as a multiple. So scoring the filter candidates using
    // for example the top 10 may get a better score than if all candidates were scored
    // and the scores accumulated for the top 10, it is not how the algorithm will use the 
    // candidate set. I.e. It does not use the top 10, then top 20 to refine the fit, etc. 
    // (the method is not iterative) .
    // We require an assessment of how a subset of the scored candidates
    // in ranked order contributes to the overall score, i.e. are the candidates ranked
    // in the correct order, those most contributing to the match to the underlying data 
    // should be higher up and those least contributing will be at the end.
    // TODO We could add some smart filtering of candidates before ranking. This would
    // allow assessment of the candidate set handed to PeakFit. E.g. Threshold the image
    // and only use candidates that are in the foreground region.
    double[][] cumul = histogramFailures(filterResult);
    // Create the overall match score
    final double[] total = new double[3];
    final ArrayList<ScoredSpot> allSpots = new ArrayList<BenchmarkSpotFilter.ScoredSpot>();
    filterResults.forEachValue(new TObjectProcedure<FilterResult>() {

        public boolean execute(FilterResult result) {
            total[0] += result.result.getTP();
            total[1] += result.result.getFP();
            total[2] += result.result.getFN();
            allSpots.addAll(Arrays.asList(result.spots));
            return true;
        }
    });
    double tp = total[0], fp = total[1], fn = total[2];
    FractionClassificationResult allResult = new FractionClassificationResult(tp, fp, 0, fn);
    // The number of actual results
    final double n = (tp + fn);
    StringBuilder sb = new StringBuilder();
    double signal = (simulationParameters.minSignal + simulationParameters.maxSignal) * 0.5;
    // Create the benchmark settings and the fitting settings
    sb.append(imp.getStackSize()).append("\t");
    final int w = lastAnalysisBorder.width;
    final int h = lastAnalysisBorder.height;
    sb.append(w).append("\t");
    sb.append(h).append("\t");
    sb.append(Utils.rounded(n)).append("\t");
    double density = (n / imp.getStackSize()) / (w * h) / (simulationParameters.a * simulationParameters.a / 1e6);
    sb.append(Utils.rounded(density)).append("\t");
    sb.append(Utils.rounded(signal)).append("\t");
    sb.append(Utils.rounded(simulationParameters.s)).append("\t");
    sb.append(Utils.rounded(simulationParameters.a)).append("\t");
    sb.append(Utils.rounded(simulationParameters.depth)).append("\t");
    sb.append(simulationParameters.fixedDepth).append("\t");
    sb.append(Utils.rounded(simulationParameters.gain)).append("\t");
    sb.append(Utils.rounded(simulationParameters.readNoise)).append("\t");
    sb.append(Utils.rounded(simulationParameters.b)).append("\t");
    sb.append(Utils.rounded(simulationParameters.b2)).append("\t");
    // Compute the noise
    double noise = simulationParameters.b2;
    if (simulationParameters.emCCD) {
        // The b2 parameter was computed without application of the EM-CCD noise factor of 2.
        //final double b2 = backgroundVariance + readVariance
        //                = simulationParameters.b + readVariance
        // This should be applied only to the background variance.
        final double readVariance = noise - simulationParameters.b;
        noise = simulationParameters.b * 2 + readVariance;
    }
    sb.append(Utils.rounded(signal / Math.sqrt(noise))).append("\t");
    sb.append(Utils.rounded(simulationParameters.s / simulationParameters.a)).append("\t");
    sb.append(config.getDataFilterType()).append("\t");
    //sb.append(spotFilter.getName()).append("\t");
    sb.append(spotFilter.getSearch()).append("\t");
    sb.append(spotFilter.getBorder()).append("\t");
    sb.append(Utils.rounded(spotFilter.getSpread())).append("\t");
    sb.append(config.getDataFilter(0)).append("\t");
    final double param = config.getSmooth(0);
    final double hwhmMin = config.getHWHMMin();
    if (relativeDistances) {
        sb.append(Utils.rounded(param * hwhmMin)).append("\t");
        sb.append(Utils.rounded(param)).append("\t");
    } else {
        sb.append(Utils.rounded(param)).append("\t");
        sb.append(Utils.rounded(param / hwhmMin)).append("\t");
    }
    sb.append(spotFilter.getDescription()).append("\t");
    sb.append(lastAnalysisBorder.x).append("\t");
    sb.append(MATCHING_METHOD[matchingMethod]).append("\t");
    sb.append(Utils.rounded(lowerMatchDistance)).append("\t");
    sb.append(Utils.rounded(matchDistance)).append("\t");
    sb.append(Utils.rounded(lowerSignalFactor)).append("\t");
    sb.append(Utils.rounded(upperSignalFactor));
    resultPrefix = sb.toString();
    // Add the results
    sb.append("\t");
    // Rank the scored spots by intensity
    Collections.sort(allSpots);
    // Produce Recall, Precision, Jaccard for each cut of the spot candidates
    double[] r = new double[allSpots.size() + 1];
    double[] p = new double[r.length];
    double[] j = new double[r.length];
    double[] c = new double[r.length];
    double[] truePositives = new double[r.length];
    double[] falsePositives = new double[r.length];
    double[] intensity = new double[r.length];
    // Note: fn = n - tp
    tp = fp = 0;
    int i = 1;
    p[0] = 1;
    FastCorrelator corr = new FastCorrelator();
    double lastC = 0;
    double[] i1 = new double[r.length];
    double[] i2 = new double[r.length];
    int ci = 0;
    SimpleRegression regression = new SimpleRegression(false);
    for (ScoredSpot s : allSpots) {
        if (s.match) {
            // Score partial matches as part true-positive and part false-positive.
            // TP can be above 1 if we are allowing multiple matches.
            tp += s.getScore();
            fp += s.antiScore();
            // Just use a rounded intensity for now
            final double spotIntensity = s.getIntensity();
            final long v1 = (long) Math.round(spotIntensity);
            final long v2 = (long) Math.round(s.intensity);
            regression.addData(spotIntensity, s.intensity);
            i1[ci] = spotIntensity;
            i2[ci] = s.intensity;
            ci++;
            corr.add(v1, v2);
            lastC = corr.getCorrelation();
        } else
            fp++;
        r[i] = (double) tp / n;
        p[i] = (double) tp / (tp + fp);
        // (tp+fp+fn) == (fp+n) since tp+fn=n;
        j[i] = (double) tp / (fp + n);
        c[i] = lastC;
        truePositives[i] = tp;
        falsePositives[i] = fp;
        intensity[i] = s.getIntensity();
        i++;
    }
    i1 = Arrays.copyOf(i1, ci);
    i2 = Arrays.copyOf(i2, ci);
    final double slope = regression.getSlope();
    sb.append(Utils.rounded(slope)).append("\t");
    addResult(sb, allResult, c[c.length - 1]);
    // Output the match results when the recall achieves the fraction of the maximum.
    double target = r[r.length - 1];
    if (recallFraction < 100)
        target *= recallFraction / 100.0;
    int fractionIndex = 0;
    while (fractionIndex < r.length && r[fractionIndex] < target) {
        fractionIndex++;
    }
    if (fractionIndex == r.length)
        fractionIndex--;
    addResult(sb, new FractionClassificationResult(truePositives[fractionIndex], falsePositives[fractionIndex], 0, n - truePositives[fractionIndex]), c[fractionIndex]);
    // Output the match results at the maximum jaccard score
    int maxIndex = 0;
    for (int ii = 1; ii < r.length; ii++) {
        if (j[maxIndex] < j[ii])
            maxIndex = ii;
    }
    addResult(sb, new FractionClassificationResult(truePositives[maxIndex], falsePositives[maxIndex], 0, n - truePositives[maxIndex]), c[maxIndex]);
    sb.append(Utils.rounded(time / 1e6));
    // Calculate AUC (Average precision == Area Under Precision-Recall curve)
    final double auc = AUCCalculator.auc(p, r);
    // Compute the AUC using the adjusted precision curve
    // which uses the maximum precision for recall >= r
    final double[] maxp = new double[p.length];
    double max = 0;
    for (int k = maxp.length; k-- > 0; ) {
        if (max < p[k])
            max = p[k];
        maxp[k] = max;
    }
    final double auc2 = AUCCalculator.auc(maxp, r);
    sb.append("\t").append(Utils.rounded(auc));
    sb.append("\t").append(Utils.rounded(auc2));
    // Output the number of fit failures that must be processed to capture fractions of the true positives
    if (cumul[0].length != 0) {
        sb.append("\t").append(Utils.rounded(getFailures(cumul, 0.80)));
        sb.append("\t").append(Utils.rounded(getFailures(cumul, 0.90)));
        sb.append("\t").append(Utils.rounded(getFailures(cumul, 0.95)));
        sb.append("\t").append(Utils.rounded(getFailures(cumul, 0.99)));
        sb.append("\t").append(Utils.rounded(cumul[0][cumul[0].length - 1]));
    } else
        sb.append("\t\t\t\t\t");
    BufferedTextWindow resultsTable = getTable(batchSummary);
    resultsTable.append(sb.toString());
    // Store results
    filterResult.auc = auc;
    filterResult.auc2 = auc2;
    filterResult.r = r;
    filterResult.p = p;
    filterResult.j = j;
    filterResult.c = c;
    filterResult.maxIndex = maxIndex;
    filterResult.fractionIndex = fractionIndex;
    filterResult.cumul = cumul;
    filterResult.slope = slope;
    filterResult.i1 = i1;
    filterResult.i2 = i2;
    filterResult.intensity = intensity;
    filterResult.relativeDistances = relativeDistances;
    filterResult.time = time;
    return filterResult;
}
Also used : BufferedTextWindow(gdsc.core.ij.BufferedTextWindow) FastCorrelator(gdsc.core.utils.FastCorrelator) ArrayList(java.util.ArrayList) PeakResultPoint(gdsc.smlm.ij.plugins.ResultsMatchCalculator.PeakResultPoint) BasePoint(gdsc.core.match.BasePoint) SimpleRegression(org.apache.commons.math3.stat.regression.SimpleRegression) FractionClassificationResult(gdsc.core.match.FractionClassificationResult)

Example 17 with SimpleRegression

use of org.apache.commons.math3.stat.regression.SimpleRegression in project GDSC-SMLM by aherbert.

the class BenchmarkFilterAnalysis method reportResults.

private ComplexFilterScore reportResults(boolean newResults, List<ComplexFilterScore> filters) {
    if (filters.isEmpty()) {
        IJ.log("Warning: No filters pass the criteria");
        return null;
    }
    getCoordinateStore();
    Collections.sort(filters);
    FractionClassificationResult topFilterClassificationResult = null;
    ArrayList<FractionalAssignment[]> topFilterResults = null;
    String topFilterSummary = null;
    if (showSummaryTable || saveTemplate) {
        createSummaryWindow();
        int n = 0;
        final double range = (summaryDepth / simulationParameters.a) * 0.5;
        int np = 0;
        for (double depth : depthStats) {
            if (Math.abs(depth) < range)
                np++;
        }
        for (ComplexFilterScore fs : filters) {
            final ArrayList<FractionalAssignment[]> list = new ArrayList<FractionalAssignment[]>(resultsList.length);
            final FractionClassificationResult r = scoreFilter(fs.getFilter(), minimalFilter, resultsList, list, coordinateStore);
            final StringBuilder sb = createResult(fs.getFilter(), r);
            if (topFilterResults == null) {
                topFilterResults = list;
                topFilterClassificationResult = r;
            }
            // Show the recall at the specified depth. Sum the distance and signal factor of all scored spots.
            int scored = 0;
            double tp = 0, d = 0, sf = 0, rmsd = 0;
            SimpleRegression regression = new SimpleRegression(false);
            for (FractionalAssignment[] assignments : list) {
                if (assignments == null)
                    continue;
                for (int i = 0; i < assignments.length; i++) {
                    final CustomFractionalAssignment c = (CustomFractionalAssignment) assignments[i];
                    if (Math.abs(c.peak.error) <= range)
                        tp += c.getScore();
                    d += c.d;
                    sf += c.getSignalFactor();
                    rmsd += c.d * c.d;
                    regression.addData(c.peakResult.getSignal(), c.peak.getSignal());
                }
                scored += assignments.length;
            }
            final double slope = regression.getSlope();
            sb.append('\t');
            sb.append(Utils.rounded((double) tp / np)).append('\t');
            sb.append(Utils.rounded(d / scored)).append('\t');
            sb.append(Utils.rounded(sf / scored)).append('\t');
            sb.append(Utils.rounded(Math.sqrt(rmsd / scored))).append('\t');
            sb.append(Utils.rounded(slope)).append('\t');
            if (fs.atLimit() != null)
                sb.append(fs.atLimit());
            String text = sb.toString();
            if (topFilterSummary == null) {
                topFilterSummary = text;
                if (!showSummaryTable)
                    break;
            }
            if (fs.time != 0) {
                sb.append('\t');
                sb.append(fs.algorithm);
                sb.append('\t');
                sb.append(org.apache.commons.lang3.time.DurationFormatUtils.formatDurationHMS(fs.time));
            } else
                sb.append("\t\t");
            if (fs.paramTime != 0) {
                sb.append('\t');
                sb.append(fs.getParamAlgorithm());
                sb.append('\t');
                sb.append(org.apache.commons.lang3.time.DurationFormatUtils.formatDurationHMS(fs.paramTime));
            } else
                sb.append("\t\t");
            text = sb.toString();
            if (isHeadless)
                IJ.log(text);
            else
                summaryWindow.append(text);
            n++;
            if (summaryTopN > 0 && n >= summaryTopN)
                break;
        }
        // Add a spacer to the summary table if we have multiple results
        if (n > 1 && showSummaryTable) {
            if (isHeadless)
                IJ.log("");
            else
                summaryWindow.append("");
        }
    }
    DirectFilter bestFilter = filters.get(0).getFilter();
    if (saveBestFilter)
        saveFilter(bestFilter);
    if (topFilterClassificationResult == null) {
        topFilterResults = new ArrayList<FractionalAssignment[]>(resultsList.length);
        topFilterClassificationResult = scoreFilter(bestFilter, minimalFilter, resultsList, topFilterResults, coordinateStore);
    }
    if (newResults || scores.isEmpty()) {
        scores.add(new FilterResult(failCount, residualsThreshold, duplicateDistance, filters.get(0)));
    }
    if (saveTemplate)
        saveTemplate(topFilterSummary);
    showPlots();
    calculateSensitivity();
    topFilterResults = depthAnalysis(topFilterResults, bestFilter);
    topFilterResults = scoreAnalysis(topFilterResults, bestFilter);
    componentAnalysis(topFilterClassificationResult, filters.get(0));
    PreprocessedPeakResult[] filterResults = null;
    if (isShowOverlay())
        filterResults = showOverlay(topFilterResults, bestFilter);
    saveResults(filterResults, bestFilter);
    wo.tile();
    return filters.get(0);
}
Also used : IDirectFilter(gdsc.smlm.results.filter.IDirectFilter) DirectFilter(gdsc.smlm.results.filter.DirectFilter) ArrayList(java.util.ArrayList) SimpleRegression(org.apache.commons.math3.stat.regression.SimpleRegression) FractionalAssignment(gdsc.core.match.FractionalAssignment) PeakFractionalAssignment(gdsc.smlm.results.filter.PeakFractionalAssignment) FractionClassificationResult(gdsc.core.match.FractionClassificationResult) BasePreprocessedPeakResult(gdsc.smlm.results.filter.BasePreprocessedPeakResult) PreprocessedPeakResult(gdsc.smlm.results.filter.PreprocessedPeakResult)

Example 18 with SimpleRegression

use of org.apache.commons.math3.stat.regression.SimpleRegression in project dhis2-core by dhis2.

the class DefaultChartService method getCategoryDataSet.

private CategoryDataset[] getCategoryDataSet(BaseChart chart) {
    Map<String, Object> valueMap = new HashMap<>();
    if (chart.isAnalyticsType(AnalyticsType.AGGREGATE)) {
        valueMap = analyticsService.getAggregatedDataValueMapping(chart);
    } else if (chart.isAnalyticsType(AnalyticsType.EVENT)) {
        Grid grid = eventAnalyticsService.getAggregatedEventData(chart);
        chart.setDataItemGrid(grid);
        valueMap = GridUtils.getMetaValueMapping(grid, (grid.getWidth() - 1));
    }
    DefaultCategoryDataset regularDataSet = new DefaultCategoryDataset();
    DefaultCategoryDataset regressionDataSet = new DefaultCategoryDataset();
    SimpleRegression regression = new SimpleRegression();
    BaseAnalyticalObject.sortKeys(valueMap);
    List<NameableObject> seriez = new ArrayList<>(chart.series());
    List<NameableObject> categories = new ArrayList<>(chart.category());
    if (chart.hasSortOrder()) {
        categories = getSortedCategories(categories, chart, valueMap);
    }
    for (NameableObject series : seriez) {
        double categoryIndex = 0;
        for (NameableObject category : categories) {
            categoryIndex++;
            String key = getKey(series, category, chart.getAnalyticsType());
            Object object = valueMap.get(key);
            Number value = object != null && object instanceof Number ? (Number) object : null;
            regularDataSet.addValue(value, series.getShortName(), category.getShortName());
            if (chart.isRegression() && value != null && value instanceof Double && !MathUtils.isEqual((Double) value, MathUtils.ZERO)) {
                regression.addData(categoryIndex, (Double) value);
            }
        }
        if (// Period must be category
        chart.isRegression()) {
            categoryIndex = 0;
            for (NameableObject category : chart.category()) {
                final double value = regression.predict(categoryIndex++);
                if (!Double.isNaN(value)) {
                    regressionDataSet.addValue(value, TREND_PREFIX + series.getShortName(), category.getShortName());
                }
            }
        }
    }
    return new CategoryDataset[] { regularDataSet, regressionDataSet };
}
Also used : SimpleRegression(org.apache.commons.math3.stat.regression.SimpleRegression) DefaultCategoryDataset(org.jfree.data.category.DefaultCategoryDataset) CategoryDataset(org.jfree.data.category.CategoryDataset) DefaultCategoryDataset(org.jfree.data.category.DefaultCategoryDataset)

Example 19 with SimpleRegression

use of org.apache.commons.math3.stat.regression.SimpleRegression in project incubator-heron by apache.

the class ComponentMetricsHelper method computeBufferSizeTrend.

public void computeBufferSizeTrend() {
    for (InstanceMetrics instanceMetrics : componentMetrics.getMetrics().values()) {
        Map<Instant, Double> bufferMetrics = instanceMetrics.getMetrics().get(METRIC_BUFFER_SIZE.text());
        if (bufferMetrics == null || bufferMetrics.size() < 3) {
            // missing of insufficient data for creating a trend line
            continue;
        }
        SimpleRegression simpleRegression = new SimpleRegression(true);
        for (Instant timestamp : bufferMetrics.keySet()) {
            simpleRegression.addData(timestamp.getEpochSecond(), bufferMetrics.get(timestamp));
        }
        double slope = simpleRegression.getSlope();
        instanceMetrics.addMetric(METRIC_WAIT_Q_GROWTH_RATE.text(), slope);
        if (maxBufferChangeRate < slope) {
            maxBufferChangeRate = slope;
        }
    }
}
Also used : InstanceMetrics(com.microsoft.dhalion.metrics.InstanceMetrics) SimpleRegression(org.apache.commons.math3.stat.regression.SimpleRegression) Instant(java.time.Instant)

Example 20 with SimpleRegression

use of org.apache.commons.math3.stat.regression.SimpleRegression in project SeqMonk by s-andrews.

the class GeneSetIntensityDifferenceFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
protected void generateProbeList() {
    try {
        applyMultipleTestingCorrection = optionsPanel.multipleTestingBox.isSelected();
        calculateCustomRegression = optionsPanel.calculateCustomRegressionBox.isSelected();
        if (calculateCustomRegression == false) {
            customRegressionValues = null;
        }
        if (calculateLinearRegression) {
            simpleRegression = new SimpleRegression();
        }
        Probe[] allProbes = startingList.getAllProbes();
        // We're not allowing multiple comparisons - this is a bit of a messy workaround so it's compatible with other methods.
        fromStore = fromStores[0];
        toStore = toStores[0];
        ArrayList<Probe> probeArrayList = new ArrayList<Probe>();
        int NaNcount = 0;
        // remove the invalid probes - the ones without a value
        for (int i = 0; i < allProbes.length; i++) {
            if ((Float.isNaN(fromStore.getValueForProbe(allProbes[i]))) || (Float.isNaN(toStore.getValueForProbe(allProbes[i])))) {
                NaNcount++;
            } else {
                probeArrayList.add(allProbes[i]);
            }
        }
        System.err.println("Found " + NaNcount + " probes that were invalid.");
        probes = probeArrayList.toArray(new Probe[0]);
        if (calculateCustomRegression == true) {
            customRegressionValues = new float[2][probes.length];
        }
        // We'll pull the number of probes to sample from the preferences if they've changed it
        Integer updatedProbesPerSet = optionsPanel.probesPerSet();
        if (updatedProbesPerSet != null)
            probesPerSet = updatedProbesPerSet;
        // we want a set of z-scores using the local distribution.
        probeZScoreLookupTable = new Hashtable<Probe, Double>();
        // Put something in the progress whilst we're ordering the probe values to make the comparison.
        progressUpdated("Generating background model", 0, 1);
        Comparator<Integer> comp = new AverageIntensityComparator(fromStore, toStore, probes);
        // We need to generate a set of probe indices that can be ordered by their average intensity
        Integer[] indices = new Integer[probes.length];
        for (int i = 0; i < probes.length; i++) {
            indices[i] = i;
            /* add the data to the linear regression object */
            if (calculateLinearRegression) {
                simpleRegression.addData((double) fromStore.getValueForProbe(probes[i]), (double) toStore.getValueForProbe(probes[i]));
            }
        }
        if (calculateLinearRegression) {
            System.out.println("intercept = " + simpleRegression.getIntercept() + ", slope = " + simpleRegression.getSlope());
        }
        Arrays.sort(indices, comp);
        /* This holds the indices to get the deduplicated probe from the original list of probes */
        deduplicatedIndices = new Integer[probes.length];
        /* The number of probes with different values */
        int dedupProbeCounter = 0;
        // the probes deduplicated by value
        ArrayList<Probe> deduplicatedProbes = new ArrayList<Probe>();
        // populate the first one so that we have something to compare to in the loop
        deduplicatedIndices[0] = 0;
        deduplicatedProbes.add(probes[indices[0]]);
        progressUpdated("Made 0 of 1 comparisons", 0, 1);
        for (int i = 1; i < indices.length; i++) {
            /* indices have been sorted, now we need to check whether adjacent pair values are identical */
            if ((fromStore.getValueForProbe(probes[indices[i]]) == fromStore.getValueForProbe(probes[indices[i - 1]])) && (toStore.getValueForProbe(probes[indices[i]]) == toStore.getValueForProbe(probes[indices[i - 1]]))) {
                /* If they are identical, do not add the probe to the deduplicatedProbes object, but have a reference for it so we can look up which deduplicated probe and 
					 * therefore which distribution slice to use for the duplicated probe. */
                deduplicatedIndices[i] = dedupProbeCounter;
            } else {
                deduplicatedProbes.add(probes[indices[i]]);
                dedupProbeCounter++;
                deduplicatedIndices[i] = dedupProbeCounter;
            }
        }
        Probe[] dedupProbes = deduplicatedProbes.toArray(new Probe[0]);
        // make sure we're not trying to use more probes than we've got in the analysis
        if (probesPerSet > dedupProbes.length) {
            probesPerSet = dedupProbes.length;
        }
        System.out.println("total number of probe values = " + probes.length);
        System.out.println("number of deduplicated probe values = " + dedupProbes.length);
        System.out.println("probesPerSet = " + probesPerSet);
        // I want this to contain all the differences, then from that I'm going to get the z-scores.
        double[] currentDiffSet = new double[probesPerSet];
        for (int i = 0; i < indices.length; i++) {
            if (cancel) {
                cancel = false;
                progressCancelled();
                return;
            }
            if (i % 1000 == 0) {
                int progress = (i * 100) / indices.length;
                // progress += 100*comparisonIndex;
                progressUpdated("Made 0 out of 1 comparisons", progress, 100);
            }
            /**
             * There are +1s in here because we skip over j when j == startingIndex.
             */
            // We need to make up the set of differences to represent this probe
            int startingIndex = deduplicatedIndices[i] - (probesPerSet / 2);
            if (startingIndex < 0)
                startingIndex = 0;
            if (startingIndex + (probesPerSet + 1) >= dedupProbes.length)
                startingIndex = dedupProbes.length - (probesPerSet + 1);
            try {
                for (int j = startingIndex; j < startingIndex + (probesPerSet + 1); j++) {
                    if (j == startingIndex) {
                        // Don't include the point being tested in the background model
                        continue;
                    }
                    double diff;
                    if (calculateLinearRegression == true) {
                        if (j > dedupProbes.length) {
                            System.err.println(" j is too big, it's " + j + " and dedupProbes.length = " + dedupProbes.length + ", starting index = " + startingIndex);
                        }
                        double x = fromStore.getValueForProbe(dedupProbes[j]);
                        double expectedY = (simpleRegression.getSlope() * x) + simpleRegression.getIntercept();
                        diff = toStore.getValueForProbe(dedupProbes[j]) - expectedY;
                    } else {
                        diff = toStore.getValueForProbe(dedupProbes[j]) - fromStore.getValueForProbe(dedupProbes[j]);
                    }
                    if (j < startingIndex) {
                        currentDiffSet[j - startingIndex] = diff;
                    } else {
                        currentDiffSet[(j - startingIndex) - 1] = diff;
                    }
                }
                if (calculateCustomRegression == true) {
                    // the average/ kind of centre line
                    float z = ((fromStore.getValueForProbe(probes[indices[i]]) + toStore.getValueForProbe(probes[indices[i]])) / 2);
                    customRegressionValues[0][indices[i]] = z - ((float) SimpleStats.mean(currentDiffSet) / 2);
                    customRegressionValues[1][indices[i]] = z + ((float) SimpleStats.mean(currentDiffSet) / 2);
                }
                double mean = 0;
                // Get the difference for this point
                double diff;
                if (calculateLinearRegression == true) {
                    double x = fromStore.getValueForProbe(probes[indices[i]]);
                    double expectedY = (simpleRegression.getSlope() * x) + simpleRegression.getIntercept();
                    diff = toStore.getValueForProbe(probes[indices[i]]) - expectedY;
                } else if (calculateCustomRegression == true) {
                    diff = toStore.getValueForProbe(probes[indices[i]]) - fromStore.getValueForProbe(probes[indices[i]]);
                    mean = SimpleStats.mean(currentDiffSet);
                } else {
                    diff = toStore.getValueForProbe(probes[indices[i]]) - fromStore.getValueForProbe(probes[indices[i]]);
                }
                double stdev;
                stdev = SimpleStats.stdev(currentDiffSet, mean);
                // if there are no reads in the probe for either of the datasets, should we set the zscore to 0??
                double zScore = (diff - mean) / stdev;
                // modified z score
                // median absolute deviation
                /*		double[] madArray = new double[currentDiffSet.length];
						double median = SimpleStats.median(currentDiffSet);					
						
						for(int d=0; d<currentDiffSet.length; d++){							
							madArray[d] = Math.abs(currentDiffSet[d] - median);							
						}
						
						double mad = SimpleStats.median(madArray);							
						zScore = (0.6745 * (diff - median))/mad;
					}
				*/
                probeZScoreLookupTable.put(probes[indices[i]], zScore);
            } catch (SeqMonkException sme) {
                progressExceptionReceived(sme);
                return;
            }
        }
        // make this an array list as we're kicking out the mapped gene sets that have zscores with variance of 0.
        ArrayList<MappedGeneSetTTestValue> pValueArrayList = new ArrayList<MappedGeneSetTTestValue>();
        MappedGeneSet[] mappedGeneSets = null;
        /* if we're using the gene set from a file, map the gene sets to the probes */
        if (optionsPanel.geneSetsFileRadioButton.isSelected()) {
            GeneSetCollectionParser geneSetCollectionParser = new GeneSetCollectionParser(minGenesInSet, maxGenesInSet);
            GeneSetCollection geneSetCollection = geneSetCollectionParser.parseGeneSetInformation(validGeneSetFilepath);
            MappedGeneSet[] allMappedGeneSets = geneSetCollection.getMappedGeneSets(probes);
            if (allMappedGeneSets == null) {
                JOptionPane.showMessageDialog(SeqMonkApplication.getInstance(), "No sets of genes could be matched to probes.\nCheck that the gmt file is for the right species. \nTo use gene sets from a file, probe names must contain the gene name. \nTry defining new probes over genes (e.g. using the RNA-seq quantitation pipeline) or use existing probes lists instead of a gene set file.", "No gene sets identified", JOptionPane.ERROR_MESSAGE);
                throw new SeqMonkException("No sets of genes could be matched to probes.\nTo use gene sets from a file, probe names must contain the gene name.\nTry defining new probes over genes or use existing probes lists instead of a gene set file.");
            } else {
                ArrayList<MappedGeneSet> mgsArrayList = new ArrayList<MappedGeneSet>();
                /* get rid of those that have fewer probes in the set than minGenesInSet. We shouldn't exceed maxGenesInSet unless probes have been made over something other than genes */
                for (int i = 0; i < allMappedGeneSets.length; i++) {
                    if (allMappedGeneSets[i].getProbes().length >= minGenesInSet) {
                        mgsArrayList.add(allMappedGeneSets[i]);
                    }
                }
                mappedGeneSets = mgsArrayList.toArray(new MappedGeneSet[0]);
            }
        } else /* or if we're using existing probelists, create mappedGeneSets from them */
        if (optionsPanel.probeListRadioButton.isSelected() && selectedProbeLists != null) {
            mappedGeneSets = new MappedGeneSet[selectedProbeLists.length];
            for (int i = 0; i < selectedProbeLists.length; i++) {
                mappedGeneSets[i] = new MappedGeneSet(selectedProbeLists[i]);
            }
        } else {
            throw new SeqMonkException("Haven't got any genesets to use, shouldn't have got here without having any selected.");
        }
        if (mappedGeneSets == null || mappedGeneSets.length == 0) {
            throw new SeqMonkException("Couldn't map gene sets to the probes, try again with a different probe set");
        } else {
            System.err.println("there are " + mappedGeneSets.length + " mappedGeneSets");
            System.err.println("size of zScore lookup table = " + probeZScoreLookupTable.size());
        }
        System.err.println("total number of mapped gene sets = " + mappedGeneSets.length);
        // deduplicate our mappedGeneSets
        if (optionsPanel.deduplicateGeneSetBox.isSelected()) {
            mappedGeneSets = deduplicateMappedGeneSets(mappedGeneSets);
        }
        System.err.println("deduplicated mapped gene sets = " + mappedGeneSets.length);
        /* 
			 * we need to go through the mapped gene set and get all the values for the matched probes 
			 * 
			 */
        for (int i = 0; i < mappedGeneSets.length; i++) {
            Probe[] geneSetProbes = mappedGeneSets[i].getProbes();
            // to contain all the z-scores for the gene set
            double[] geneSetZscores = new double[geneSetProbes.length];
            // Find the z-scores for each of the probes in the mappedGeneSet
            for (int gsp = 0; gsp < geneSetProbes.length; gsp++) {
                if (probeZScoreLookupTable.containsKey(geneSetProbes[gsp])) {
                    geneSetZscores[gsp] = probeZScoreLookupTable.get(geneSetProbes[gsp]);
                }
            }
            // this is just temporary to check the stats - there's some duplication with this.... is there??
            mappedGeneSets[i].zScores = geneSetZscores;
            if (geneSetZscores.length > 1) {
                // the number of probes in the mappedGeneSet should always be > 1 anyway as the mappedGeneSet shouldn't be created if there are < 2 matched probes.
                double pVal;
                if (optionsPanel.statisticalTestBox.getSelectedItem().toString().equals("t-test")) {
                    pVal = TTest.calculatePValue(geneSetZscores, 0);
                } else if (optionsPanel.statisticalTestBox.getSelectedItem().toString().equals("Kolmorogov-Smirnov test")) {
                    double[] allZscores = getAllZScores(probeZScoreLookupTable);
                    KolmogorovSmirnovTest ksTest = new KolmogorovSmirnovTest();
                    pVal = ksTest.kolmogorovSmirnovTest(geneSetZscores, allZscores);
                } else if (optionsPanel.statisticalTestBox.getSelectedItem().toString().equals("Kolmorogov-Smirnov non-directional test")) {
                    double[] allZscores = getAllZScores(probeZScoreLookupTable);
                    KolmogorovSmirnovTest ksTest = new KolmogorovSmirnovTest();
                    pVal = ksTest.kolmogorovSmirnovTest(convertToAbsoluteValues(geneSetZscores), convertToAbsoluteValues(allZscores));
                } else {
                    throw new IllegalArgumentException("Didn't recognise statistical test " + optionsPanel.statisticalTestBox.getSelectedItem().toString());
                }
                mappedGeneSets[i].meanZScore = SimpleStats.mean(geneSetZscores);
                // check the variance - we don't want variances of 0.
                double stdev = SimpleStats.stdev(geneSetZscores);
                if ((stdev * stdev) < 0.00000000000001) {
                    continue;
                } else // if all the differences between the datasets are 0 then just get rid of them
                if (Double.isNaN(pVal)) {
                    continue;
                } else {
                    pValueArrayList.add(new MappedGeneSetTTestValue(mappedGeneSets[i], pVal));
                }
            } else {
                System.err.println("Fell through the net somewhere, why does the set of zscores contain fewer than 2 values?");
                continue;
            }
        }
        MappedGeneSetTTestValue[] filterResultpValues = pValueArrayList.toArray(new MappedGeneSetTTestValue[0]);
        ArrayList<MappedGeneSetTTestValue> filteredPValueArrayList = new ArrayList<MappedGeneSetTTestValue>();
        // Now we've got all the p values they need to be corrected.
        if (applyMultipleTestingCorrection) {
            BenjHochFDR.calculateQValues(filterResultpValues);
        }
        System.err.println(filterResultpValues.length + " p-values calculated, multtest = " + applyMultipleTestingCorrection + ", pval limit = " + pValueLimit);
        for (int i = 0; i < filterResultpValues.length; i++) {
            double pOrQvalue;
            if (applyMultipleTestingCorrection) {
                pOrQvalue = filterResultpValues[i].q;
            } else {
                pOrQvalue = filterResultpValues[i].p;
            }
            // check whether it passes the p/q-value and z-score cut-offs
            if (optionsPanel.reportAllResults.isSelected()) {
                filteredPValueArrayList.add(filterResultpValues[i]);
            } else {
                if ((pOrQvalue < pValueLimit) && (Math.abs(filterResultpValues[i].mappedGeneSet.meanZScore) > zScoreThreshold)) {
                    filteredPValueArrayList.add(filterResultpValues[i]);
                }
            }
        }
        // convert the ArrayList to MappedGeneSetTTestValue
        filterResultpValues = filteredPValueArrayList.toArray(new MappedGeneSetTTestValue[0]);
        if (filterResultpValues.length == 0) {
            JOptionPane.showMessageDialog(SeqMonkApplication.getInstance(), "No sets of genes were identified using the selected parameters, \ntry changing the gene sets or relaxing the p-value/z-score thresholds.", "No gene sets identified", JOptionPane.INFORMATION_MESSAGE);
        } else {
            geneSetDisplay = new GeneSetDisplay(dataCollection, listDescription(), fromStore, toStore, probes, probeZScoreLookupTable, filterResultpValues, startingList, customRegressionValues, simpleRegression, // optionsPanel.bidirectionalRadioButton.isSelected());
            false);
            geneSetDisplay.addWindowListener(this);
        }
        // We don't want to save the probe list here, we're bringing up the intermediate display from which probe lists can be saved.
        progressCancelled();
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
Also used : GeneSetCollection(uk.ac.babraham.SeqMonk.GeneSets.GeneSetCollection) ArrayList(java.util.ArrayList) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) MappedGeneSetTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.MappedGeneSetTTestValue) KolmogorovSmirnovTest(org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) GeneSetCollectionParser(uk.ac.babraham.SeqMonk.GeneSets.GeneSetCollectionParser) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) SimpleRegression(org.apache.commons.math3.stat.regression.SimpleRegression) MappedGeneSet(uk.ac.babraham.SeqMonk.GeneSets.MappedGeneSet)

Aggregations

SimpleRegression (org.apache.commons.math3.stat.regression.SimpleRegression)33 ArrayList (java.util.ArrayList)13 Test (org.junit.Test)4 FractionClassificationResult (gdsc.core.match.FractionClassificationResult)3 List (java.util.List)3 Sample (function.genotype.base.Sample)2 BasePoint (gdsc.core.match.BasePoint)2 FractionalAssignment (gdsc.core.match.FractionalAssignment)2 FastCorrelator (gdsc.core.utils.FastCorrelator)2 StoredDataStatistics (gdsc.core.utils.StoredDataStatistics)2 PeakResultPoint (gdsc.smlm.ij.plugins.ResultsMatchCalculator.PeakResultPoint)2 BasePreprocessedPeakResult (gdsc.smlm.results.filter.BasePreprocessedPeakResult)2 DirectFilter (gdsc.smlm.results.filter.DirectFilter)2 PeakFractionalAssignment (gdsc.smlm.results.filter.PeakFractionalAssignment)2 PreprocessedPeakResult (gdsc.smlm.results.filter.PreprocessedPeakResult)2 TIntHashSet (gnu.trove.set.hash.TIntHashSet)2 Plot (ij.gui.Plot)2 PlotWindow (ij.gui.PlotWindow)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2