use of org.apache.commons.math3.stat.regression.SimpleRegression in project GDSC-SMLM by aherbert.
the class BenchmarkSpotFilter method summariseResults.
private BenchmarkFilterResult summariseResults(TIntObjectHashMap<FilterResult> filterResults, FitEngineConfiguration config, MaximaSpotFilter spotFilter, boolean relativeDistances, boolean batchSummary) {
BenchmarkFilterResult filterResult = new BenchmarkFilterResult(filterResults, config, spotFilter);
// Note:
// Although we can compute the TP/FP score as each additional spot is added
// using the RankedScoreCalculator this is not applicable to the PeakFit method.
// The method relies on all spot candidates being present in order to make a
// decision to fit the candidate as a multiple. So scoring the filter candidates using
// for example the top 10 may get a better score than if all candidates were scored
// and the scores accumulated for the top 10, it is not how the algorithm will use the
// candidate set. I.e. It does not use the top 10, then top 20 to refine the fit, etc.
// (the method is not iterative) .
// We require an assessment of how a subset of the scored candidates
// in ranked order contributes to the overall score, i.e. are the candidates ranked
// in the correct order, those most contributing to the match to the underlying data
// should be higher up and those least contributing will be at the end.
// TODO We could add some smart filtering of candidates before ranking. This would
// allow assessment of the candidate set handed to PeakFit. E.g. Threshold the image
// and only use candidates that are in the foreground region.
double[][] cumul = histogramFailures(filterResult);
// Create the overall match score
final double[] total = new double[3];
final ArrayList<ScoredSpot> allSpots = new ArrayList<BenchmarkSpotFilter.ScoredSpot>();
filterResults.forEachValue(new TObjectProcedure<FilterResult>() {
public boolean execute(FilterResult result) {
total[0] += result.result.getTP();
total[1] += result.result.getFP();
total[2] += result.result.getFN();
allSpots.addAll(Arrays.asList(result.spots));
return true;
}
});
double tp = total[0], fp = total[1], fn = total[2];
FractionClassificationResult allResult = new FractionClassificationResult(tp, fp, 0, fn);
// The number of actual results
final double n = (tp + fn);
StringBuilder sb = new StringBuilder();
double signal = (simulationParameters.minSignal + simulationParameters.maxSignal) * 0.5;
// Create the benchmark settings and the fitting settings
sb.append(imp.getStackSize()).append("\t");
final int w = lastAnalysisBorder.width;
final int h = lastAnalysisBorder.height;
sb.append(w).append("\t");
sb.append(h).append("\t");
sb.append(Utils.rounded(n)).append("\t");
double density = (n / imp.getStackSize()) / (w * h) / (simulationParameters.a * simulationParameters.a / 1e6);
sb.append(Utils.rounded(density)).append("\t");
sb.append(Utils.rounded(signal)).append("\t");
sb.append(Utils.rounded(simulationParameters.s)).append("\t");
sb.append(Utils.rounded(simulationParameters.a)).append("\t");
sb.append(Utils.rounded(simulationParameters.depth)).append("\t");
sb.append(simulationParameters.fixedDepth).append("\t");
sb.append(Utils.rounded(simulationParameters.gain)).append("\t");
sb.append(Utils.rounded(simulationParameters.readNoise)).append("\t");
sb.append(Utils.rounded(simulationParameters.b)).append("\t");
sb.append(Utils.rounded(simulationParameters.b2)).append("\t");
// Compute the noise
double noise = simulationParameters.b2;
if (simulationParameters.emCCD) {
// The b2 parameter was computed without application of the EM-CCD noise factor of 2.
//final double b2 = backgroundVariance + readVariance
// = simulationParameters.b + readVariance
// This should be applied only to the background variance.
final double readVariance = noise - simulationParameters.b;
noise = simulationParameters.b * 2 + readVariance;
}
sb.append(Utils.rounded(signal / Math.sqrt(noise))).append("\t");
sb.append(Utils.rounded(simulationParameters.s / simulationParameters.a)).append("\t");
sb.append(config.getDataFilterType()).append("\t");
//sb.append(spotFilter.getName()).append("\t");
sb.append(spotFilter.getSearch()).append("\t");
sb.append(spotFilter.getBorder()).append("\t");
sb.append(Utils.rounded(spotFilter.getSpread())).append("\t");
sb.append(config.getDataFilter(0)).append("\t");
final double param = config.getSmooth(0);
final double hwhmMin = config.getHWHMMin();
if (relativeDistances) {
sb.append(Utils.rounded(param * hwhmMin)).append("\t");
sb.append(Utils.rounded(param)).append("\t");
} else {
sb.append(Utils.rounded(param)).append("\t");
sb.append(Utils.rounded(param / hwhmMin)).append("\t");
}
sb.append(spotFilter.getDescription()).append("\t");
sb.append(lastAnalysisBorder.x).append("\t");
sb.append(MATCHING_METHOD[matchingMethod]).append("\t");
sb.append(Utils.rounded(lowerMatchDistance)).append("\t");
sb.append(Utils.rounded(matchDistance)).append("\t");
sb.append(Utils.rounded(lowerSignalFactor)).append("\t");
sb.append(Utils.rounded(upperSignalFactor));
resultPrefix = sb.toString();
// Add the results
sb.append("\t");
// Rank the scored spots by intensity
Collections.sort(allSpots);
// Produce Recall, Precision, Jaccard for each cut of the spot candidates
double[] r = new double[allSpots.size() + 1];
double[] p = new double[r.length];
double[] j = new double[r.length];
double[] c = new double[r.length];
double[] truePositives = new double[r.length];
double[] falsePositives = new double[r.length];
double[] intensity = new double[r.length];
// Note: fn = n - tp
tp = fp = 0;
int i = 1;
p[0] = 1;
FastCorrelator corr = new FastCorrelator();
double lastC = 0;
double[] i1 = new double[r.length];
double[] i2 = new double[r.length];
int ci = 0;
SimpleRegression regression = new SimpleRegression(false);
for (ScoredSpot s : allSpots) {
if (s.match) {
// Score partial matches as part true-positive and part false-positive.
// TP can be above 1 if we are allowing multiple matches.
tp += s.getScore();
fp += s.antiScore();
// Just use a rounded intensity for now
final double spotIntensity = s.getIntensity();
final long v1 = (long) Math.round(spotIntensity);
final long v2 = (long) Math.round(s.intensity);
regression.addData(spotIntensity, s.intensity);
i1[ci] = spotIntensity;
i2[ci] = s.intensity;
ci++;
corr.add(v1, v2);
lastC = corr.getCorrelation();
} else
fp++;
r[i] = (double) tp / n;
p[i] = (double) tp / (tp + fp);
// (tp+fp+fn) == (fp+n) since tp+fn=n;
j[i] = (double) tp / (fp + n);
c[i] = lastC;
truePositives[i] = tp;
falsePositives[i] = fp;
intensity[i] = s.getIntensity();
i++;
}
i1 = Arrays.copyOf(i1, ci);
i2 = Arrays.copyOf(i2, ci);
final double slope = regression.getSlope();
sb.append(Utils.rounded(slope)).append("\t");
addResult(sb, allResult, c[c.length - 1]);
// Output the match results when the recall achieves the fraction of the maximum.
double target = r[r.length - 1];
if (recallFraction < 100)
target *= recallFraction / 100.0;
int fractionIndex = 0;
while (fractionIndex < r.length && r[fractionIndex] < target) {
fractionIndex++;
}
if (fractionIndex == r.length)
fractionIndex--;
addResult(sb, new FractionClassificationResult(truePositives[fractionIndex], falsePositives[fractionIndex], 0, n - truePositives[fractionIndex]), c[fractionIndex]);
// Output the match results at the maximum jaccard score
int maxIndex = 0;
for (int ii = 1; ii < r.length; ii++) {
if (j[maxIndex] < j[ii])
maxIndex = ii;
}
addResult(sb, new FractionClassificationResult(truePositives[maxIndex], falsePositives[maxIndex], 0, n - truePositives[maxIndex]), c[maxIndex]);
sb.append(Utils.rounded(time / 1e6));
// Calculate AUC (Average precision == Area Under Precision-Recall curve)
final double auc = AUCCalculator.auc(p, r);
// Compute the AUC using the adjusted precision curve
// which uses the maximum precision for recall >= r
final double[] maxp = new double[p.length];
double max = 0;
for (int k = maxp.length; k-- > 0; ) {
if (max < p[k])
max = p[k];
maxp[k] = max;
}
final double auc2 = AUCCalculator.auc(maxp, r);
sb.append("\t").append(Utils.rounded(auc));
sb.append("\t").append(Utils.rounded(auc2));
// Output the number of fit failures that must be processed to capture fractions of the true positives
if (cumul[0].length != 0) {
sb.append("\t").append(Utils.rounded(getFailures(cumul, 0.80)));
sb.append("\t").append(Utils.rounded(getFailures(cumul, 0.90)));
sb.append("\t").append(Utils.rounded(getFailures(cumul, 0.95)));
sb.append("\t").append(Utils.rounded(getFailures(cumul, 0.99)));
sb.append("\t").append(Utils.rounded(cumul[0][cumul[0].length - 1]));
} else
sb.append("\t\t\t\t\t");
BufferedTextWindow resultsTable = getTable(batchSummary);
resultsTable.append(sb.toString());
// Store results
filterResult.auc = auc;
filterResult.auc2 = auc2;
filterResult.r = r;
filterResult.p = p;
filterResult.j = j;
filterResult.c = c;
filterResult.maxIndex = maxIndex;
filterResult.fractionIndex = fractionIndex;
filterResult.cumul = cumul;
filterResult.slope = slope;
filterResult.i1 = i1;
filterResult.i2 = i2;
filterResult.intensity = intensity;
filterResult.relativeDistances = relativeDistances;
filterResult.time = time;
return filterResult;
}
use of org.apache.commons.math3.stat.regression.SimpleRegression in project GDSC-SMLM by aherbert.
the class BenchmarkFilterAnalysis method reportResults.
private ComplexFilterScore reportResults(boolean newResults, List<ComplexFilterScore> filters) {
if (filters.isEmpty()) {
IJ.log("Warning: No filters pass the criteria");
return null;
}
getCoordinateStore();
Collections.sort(filters);
FractionClassificationResult topFilterClassificationResult = null;
ArrayList<FractionalAssignment[]> topFilterResults = null;
String topFilterSummary = null;
if (showSummaryTable || saveTemplate) {
createSummaryWindow();
int n = 0;
final double range = (summaryDepth / simulationParameters.a) * 0.5;
int np = 0;
for (double depth : depthStats) {
if (Math.abs(depth) < range)
np++;
}
for (ComplexFilterScore fs : filters) {
final ArrayList<FractionalAssignment[]> list = new ArrayList<FractionalAssignment[]>(resultsList.length);
final FractionClassificationResult r = scoreFilter(fs.getFilter(), minimalFilter, resultsList, list, coordinateStore);
final StringBuilder sb = createResult(fs.getFilter(), r);
if (topFilterResults == null) {
topFilterResults = list;
topFilterClassificationResult = r;
}
// Show the recall at the specified depth. Sum the distance and signal factor of all scored spots.
int scored = 0;
double tp = 0, d = 0, sf = 0, rmsd = 0;
SimpleRegression regression = new SimpleRegression(false);
for (FractionalAssignment[] assignments : list) {
if (assignments == null)
continue;
for (int i = 0; i < assignments.length; i++) {
final CustomFractionalAssignment c = (CustomFractionalAssignment) assignments[i];
if (Math.abs(c.peak.error) <= range)
tp += c.getScore();
d += c.d;
sf += c.getSignalFactor();
rmsd += c.d * c.d;
regression.addData(c.peakResult.getSignal(), c.peak.getSignal());
}
scored += assignments.length;
}
final double slope = regression.getSlope();
sb.append('\t');
sb.append(Utils.rounded((double) tp / np)).append('\t');
sb.append(Utils.rounded(d / scored)).append('\t');
sb.append(Utils.rounded(sf / scored)).append('\t');
sb.append(Utils.rounded(Math.sqrt(rmsd / scored))).append('\t');
sb.append(Utils.rounded(slope)).append('\t');
if (fs.atLimit() != null)
sb.append(fs.atLimit());
String text = sb.toString();
if (topFilterSummary == null) {
topFilterSummary = text;
if (!showSummaryTable)
break;
}
if (fs.time != 0) {
sb.append('\t');
sb.append(fs.algorithm);
sb.append('\t');
sb.append(org.apache.commons.lang3.time.DurationFormatUtils.formatDurationHMS(fs.time));
} else
sb.append("\t\t");
if (fs.paramTime != 0) {
sb.append('\t');
sb.append(fs.getParamAlgorithm());
sb.append('\t');
sb.append(org.apache.commons.lang3.time.DurationFormatUtils.formatDurationHMS(fs.paramTime));
} else
sb.append("\t\t");
text = sb.toString();
if (isHeadless)
IJ.log(text);
else
summaryWindow.append(text);
n++;
if (summaryTopN > 0 && n >= summaryTopN)
break;
}
// Add a spacer to the summary table if we have multiple results
if (n > 1 && showSummaryTable) {
if (isHeadless)
IJ.log("");
else
summaryWindow.append("");
}
}
DirectFilter bestFilter = filters.get(0).getFilter();
if (saveBestFilter)
saveFilter(bestFilter);
if (topFilterClassificationResult == null) {
topFilterResults = new ArrayList<FractionalAssignment[]>(resultsList.length);
topFilterClassificationResult = scoreFilter(bestFilter, minimalFilter, resultsList, topFilterResults, coordinateStore);
}
if (newResults || scores.isEmpty()) {
scores.add(new FilterResult(failCount, residualsThreshold, duplicateDistance, filters.get(0)));
}
if (saveTemplate)
saveTemplate(topFilterSummary);
showPlots();
calculateSensitivity();
topFilterResults = depthAnalysis(topFilterResults, bestFilter);
topFilterResults = scoreAnalysis(topFilterResults, bestFilter);
componentAnalysis(topFilterClassificationResult, filters.get(0));
PreprocessedPeakResult[] filterResults = null;
if (isShowOverlay())
filterResults = showOverlay(topFilterResults, bestFilter);
saveResults(filterResults, bestFilter);
wo.tile();
return filters.get(0);
}
use of org.apache.commons.math3.stat.regression.SimpleRegression in project dhis2-core by dhis2.
the class DefaultChartService method getCategoryDataSet.
private CategoryDataset[] getCategoryDataSet(BaseChart chart) {
Map<String, Object> valueMap = new HashMap<>();
if (chart.isAnalyticsType(AnalyticsType.AGGREGATE)) {
valueMap = analyticsService.getAggregatedDataValueMapping(chart);
} else if (chart.isAnalyticsType(AnalyticsType.EVENT)) {
Grid grid = eventAnalyticsService.getAggregatedEventData(chart);
chart.setDataItemGrid(grid);
valueMap = GridUtils.getMetaValueMapping(grid, (grid.getWidth() - 1));
}
DefaultCategoryDataset regularDataSet = new DefaultCategoryDataset();
DefaultCategoryDataset regressionDataSet = new DefaultCategoryDataset();
SimpleRegression regression = new SimpleRegression();
BaseAnalyticalObject.sortKeys(valueMap);
List<NameableObject> seriez = new ArrayList<>(chart.series());
List<NameableObject> categories = new ArrayList<>(chart.category());
if (chart.hasSortOrder()) {
categories = getSortedCategories(categories, chart, valueMap);
}
for (NameableObject series : seriez) {
double categoryIndex = 0;
for (NameableObject category : categories) {
categoryIndex++;
String key = getKey(series, category, chart.getAnalyticsType());
Object object = valueMap.get(key);
Number value = object != null && object instanceof Number ? (Number) object : null;
regularDataSet.addValue(value, series.getShortName(), category.getShortName());
if (chart.isRegression() && value != null && value instanceof Double && !MathUtils.isEqual((Double) value, MathUtils.ZERO)) {
regression.addData(categoryIndex, (Double) value);
}
}
if (// Period must be category
chart.isRegression()) {
categoryIndex = 0;
for (NameableObject category : chart.category()) {
final double value = regression.predict(categoryIndex++);
if (!Double.isNaN(value)) {
regressionDataSet.addValue(value, TREND_PREFIX + series.getShortName(), category.getShortName());
}
}
}
}
return new CategoryDataset[] { regularDataSet, regressionDataSet };
}
use of org.apache.commons.math3.stat.regression.SimpleRegression in project incubator-heron by apache.
the class ComponentMetricsHelper method computeBufferSizeTrend.
public void computeBufferSizeTrend() {
for (InstanceMetrics instanceMetrics : componentMetrics.getMetrics().values()) {
Map<Instant, Double> bufferMetrics = instanceMetrics.getMetrics().get(METRIC_BUFFER_SIZE.text());
if (bufferMetrics == null || bufferMetrics.size() < 3) {
// missing of insufficient data for creating a trend line
continue;
}
SimpleRegression simpleRegression = new SimpleRegression(true);
for (Instant timestamp : bufferMetrics.keySet()) {
simpleRegression.addData(timestamp.getEpochSecond(), bufferMetrics.get(timestamp));
}
double slope = simpleRegression.getSlope();
instanceMetrics.addMetric(METRIC_WAIT_Q_GROWTH_RATE.text(), slope);
if (maxBufferChangeRate < slope) {
maxBufferChangeRate = slope;
}
}
}
use of org.apache.commons.math3.stat.regression.SimpleRegression in project SeqMonk by s-andrews.
the class GeneSetIntensityDifferenceFilter method generateProbeList.
/* (non-Javadoc)
* @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
*/
protected void generateProbeList() {
try {
applyMultipleTestingCorrection = optionsPanel.multipleTestingBox.isSelected();
calculateCustomRegression = optionsPanel.calculateCustomRegressionBox.isSelected();
if (calculateCustomRegression == false) {
customRegressionValues = null;
}
if (calculateLinearRegression) {
simpleRegression = new SimpleRegression();
}
Probe[] allProbes = startingList.getAllProbes();
// We're not allowing multiple comparisons - this is a bit of a messy workaround so it's compatible with other methods.
fromStore = fromStores[0];
toStore = toStores[0];
ArrayList<Probe> probeArrayList = new ArrayList<Probe>();
int NaNcount = 0;
// remove the invalid probes - the ones without a value
for (int i = 0; i < allProbes.length; i++) {
if ((Float.isNaN(fromStore.getValueForProbe(allProbes[i]))) || (Float.isNaN(toStore.getValueForProbe(allProbes[i])))) {
NaNcount++;
} else {
probeArrayList.add(allProbes[i]);
}
}
System.err.println("Found " + NaNcount + " probes that were invalid.");
probes = probeArrayList.toArray(new Probe[0]);
if (calculateCustomRegression == true) {
customRegressionValues = new float[2][probes.length];
}
// We'll pull the number of probes to sample from the preferences if they've changed it
Integer updatedProbesPerSet = optionsPanel.probesPerSet();
if (updatedProbesPerSet != null)
probesPerSet = updatedProbesPerSet;
// we want a set of z-scores using the local distribution.
probeZScoreLookupTable = new Hashtable<Probe, Double>();
// Put something in the progress whilst we're ordering the probe values to make the comparison.
progressUpdated("Generating background model", 0, 1);
Comparator<Integer> comp = new AverageIntensityComparator(fromStore, toStore, probes);
// We need to generate a set of probe indices that can be ordered by their average intensity
Integer[] indices = new Integer[probes.length];
for (int i = 0; i < probes.length; i++) {
indices[i] = i;
/* add the data to the linear regression object */
if (calculateLinearRegression) {
simpleRegression.addData((double) fromStore.getValueForProbe(probes[i]), (double) toStore.getValueForProbe(probes[i]));
}
}
if (calculateLinearRegression) {
System.out.println("intercept = " + simpleRegression.getIntercept() + ", slope = " + simpleRegression.getSlope());
}
Arrays.sort(indices, comp);
/* This holds the indices to get the deduplicated probe from the original list of probes */
deduplicatedIndices = new Integer[probes.length];
/* The number of probes with different values */
int dedupProbeCounter = 0;
// the probes deduplicated by value
ArrayList<Probe> deduplicatedProbes = new ArrayList<Probe>();
// populate the first one so that we have something to compare to in the loop
deduplicatedIndices[0] = 0;
deduplicatedProbes.add(probes[indices[0]]);
progressUpdated("Made 0 of 1 comparisons", 0, 1);
for (int i = 1; i < indices.length; i++) {
/* indices have been sorted, now we need to check whether adjacent pair values are identical */
if ((fromStore.getValueForProbe(probes[indices[i]]) == fromStore.getValueForProbe(probes[indices[i - 1]])) && (toStore.getValueForProbe(probes[indices[i]]) == toStore.getValueForProbe(probes[indices[i - 1]]))) {
/* If they are identical, do not add the probe to the deduplicatedProbes object, but have a reference for it so we can look up which deduplicated probe and
* therefore which distribution slice to use for the duplicated probe. */
deduplicatedIndices[i] = dedupProbeCounter;
} else {
deduplicatedProbes.add(probes[indices[i]]);
dedupProbeCounter++;
deduplicatedIndices[i] = dedupProbeCounter;
}
}
Probe[] dedupProbes = deduplicatedProbes.toArray(new Probe[0]);
// make sure we're not trying to use more probes than we've got in the analysis
if (probesPerSet > dedupProbes.length) {
probesPerSet = dedupProbes.length;
}
System.out.println("total number of probe values = " + probes.length);
System.out.println("number of deduplicated probe values = " + dedupProbes.length);
System.out.println("probesPerSet = " + probesPerSet);
// I want this to contain all the differences, then from that I'm going to get the z-scores.
double[] currentDiffSet = new double[probesPerSet];
for (int i = 0; i < indices.length; i++) {
if (cancel) {
cancel = false;
progressCancelled();
return;
}
if (i % 1000 == 0) {
int progress = (i * 100) / indices.length;
// progress += 100*comparisonIndex;
progressUpdated("Made 0 out of 1 comparisons", progress, 100);
}
/**
* There are +1s in here because we skip over j when j == startingIndex.
*/
// We need to make up the set of differences to represent this probe
int startingIndex = deduplicatedIndices[i] - (probesPerSet / 2);
if (startingIndex < 0)
startingIndex = 0;
if (startingIndex + (probesPerSet + 1) >= dedupProbes.length)
startingIndex = dedupProbes.length - (probesPerSet + 1);
try {
for (int j = startingIndex; j < startingIndex + (probesPerSet + 1); j++) {
if (j == startingIndex) {
// Don't include the point being tested in the background model
continue;
}
double diff;
if (calculateLinearRegression == true) {
if (j > dedupProbes.length) {
System.err.println(" j is too big, it's " + j + " and dedupProbes.length = " + dedupProbes.length + ", starting index = " + startingIndex);
}
double x = fromStore.getValueForProbe(dedupProbes[j]);
double expectedY = (simpleRegression.getSlope() * x) + simpleRegression.getIntercept();
diff = toStore.getValueForProbe(dedupProbes[j]) - expectedY;
} else {
diff = toStore.getValueForProbe(dedupProbes[j]) - fromStore.getValueForProbe(dedupProbes[j]);
}
if (j < startingIndex) {
currentDiffSet[j - startingIndex] = diff;
} else {
currentDiffSet[(j - startingIndex) - 1] = diff;
}
}
if (calculateCustomRegression == true) {
// the average/ kind of centre line
float z = ((fromStore.getValueForProbe(probes[indices[i]]) + toStore.getValueForProbe(probes[indices[i]])) / 2);
customRegressionValues[0][indices[i]] = z - ((float) SimpleStats.mean(currentDiffSet) / 2);
customRegressionValues[1][indices[i]] = z + ((float) SimpleStats.mean(currentDiffSet) / 2);
}
double mean = 0;
// Get the difference for this point
double diff;
if (calculateLinearRegression == true) {
double x = fromStore.getValueForProbe(probes[indices[i]]);
double expectedY = (simpleRegression.getSlope() * x) + simpleRegression.getIntercept();
diff = toStore.getValueForProbe(probes[indices[i]]) - expectedY;
} else if (calculateCustomRegression == true) {
diff = toStore.getValueForProbe(probes[indices[i]]) - fromStore.getValueForProbe(probes[indices[i]]);
mean = SimpleStats.mean(currentDiffSet);
} else {
diff = toStore.getValueForProbe(probes[indices[i]]) - fromStore.getValueForProbe(probes[indices[i]]);
}
double stdev;
stdev = SimpleStats.stdev(currentDiffSet, mean);
// if there are no reads in the probe for either of the datasets, should we set the zscore to 0??
double zScore = (diff - mean) / stdev;
// modified z score
// median absolute deviation
/* double[] madArray = new double[currentDiffSet.length];
double median = SimpleStats.median(currentDiffSet);
for(int d=0; d<currentDiffSet.length; d++){
madArray[d] = Math.abs(currentDiffSet[d] - median);
}
double mad = SimpleStats.median(madArray);
zScore = (0.6745 * (diff - median))/mad;
}
*/
probeZScoreLookupTable.put(probes[indices[i]], zScore);
} catch (SeqMonkException sme) {
progressExceptionReceived(sme);
return;
}
}
// make this an array list as we're kicking out the mapped gene sets that have zscores with variance of 0.
ArrayList<MappedGeneSetTTestValue> pValueArrayList = new ArrayList<MappedGeneSetTTestValue>();
MappedGeneSet[] mappedGeneSets = null;
/* if we're using the gene set from a file, map the gene sets to the probes */
if (optionsPanel.geneSetsFileRadioButton.isSelected()) {
GeneSetCollectionParser geneSetCollectionParser = new GeneSetCollectionParser(minGenesInSet, maxGenesInSet);
GeneSetCollection geneSetCollection = geneSetCollectionParser.parseGeneSetInformation(validGeneSetFilepath);
MappedGeneSet[] allMappedGeneSets = geneSetCollection.getMappedGeneSets(probes);
if (allMappedGeneSets == null) {
JOptionPane.showMessageDialog(SeqMonkApplication.getInstance(), "No sets of genes could be matched to probes.\nCheck that the gmt file is for the right species. \nTo use gene sets from a file, probe names must contain the gene name. \nTry defining new probes over genes (e.g. using the RNA-seq quantitation pipeline) or use existing probes lists instead of a gene set file.", "No gene sets identified", JOptionPane.ERROR_MESSAGE);
throw new SeqMonkException("No sets of genes could be matched to probes.\nTo use gene sets from a file, probe names must contain the gene name.\nTry defining new probes over genes or use existing probes lists instead of a gene set file.");
} else {
ArrayList<MappedGeneSet> mgsArrayList = new ArrayList<MappedGeneSet>();
/* get rid of those that have fewer probes in the set than minGenesInSet. We shouldn't exceed maxGenesInSet unless probes have been made over something other than genes */
for (int i = 0; i < allMappedGeneSets.length; i++) {
if (allMappedGeneSets[i].getProbes().length >= minGenesInSet) {
mgsArrayList.add(allMappedGeneSets[i]);
}
}
mappedGeneSets = mgsArrayList.toArray(new MappedGeneSet[0]);
}
} else /* or if we're using existing probelists, create mappedGeneSets from them */
if (optionsPanel.probeListRadioButton.isSelected() && selectedProbeLists != null) {
mappedGeneSets = new MappedGeneSet[selectedProbeLists.length];
for (int i = 0; i < selectedProbeLists.length; i++) {
mappedGeneSets[i] = new MappedGeneSet(selectedProbeLists[i]);
}
} else {
throw new SeqMonkException("Haven't got any genesets to use, shouldn't have got here without having any selected.");
}
if (mappedGeneSets == null || mappedGeneSets.length == 0) {
throw new SeqMonkException("Couldn't map gene sets to the probes, try again with a different probe set");
} else {
System.err.println("there are " + mappedGeneSets.length + " mappedGeneSets");
System.err.println("size of zScore lookup table = " + probeZScoreLookupTable.size());
}
System.err.println("total number of mapped gene sets = " + mappedGeneSets.length);
// deduplicate our mappedGeneSets
if (optionsPanel.deduplicateGeneSetBox.isSelected()) {
mappedGeneSets = deduplicateMappedGeneSets(mappedGeneSets);
}
System.err.println("deduplicated mapped gene sets = " + mappedGeneSets.length);
/*
* we need to go through the mapped gene set and get all the values for the matched probes
*
*/
for (int i = 0; i < mappedGeneSets.length; i++) {
Probe[] geneSetProbes = mappedGeneSets[i].getProbes();
// to contain all the z-scores for the gene set
double[] geneSetZscores = new double[geneSetProbes.length];
// Find the z-scores for each of the probes in the mappedGeneSet
for (int gsp = 0; gsp < geneSetProbes.length; gsp++) {
if (probeZScoreLookupTable.containsKey(geneSetProbes[gsp])) {
geneSetZscores[gsp] = probeZScoreLookupTable.get(geneSetProbes[gsp]);
}
}
// this is just temporary to check the stats - there's some duplication with this.... is there??
mappedGeneSets[i].zScores = geneSetZscores;
if (geneSetZscores.length > 1) {
// the number of probes in the mappedGeneSet should always be > 1 anyway as the mappedGeneSet shouldn't be created if there are < 2 matched probes.
double pVal;
if (optionsPanel.statisticalTestBox.getSelectedItem().toString().equals("t-test")) {
pVal = TTest.calculatePValue(geneSetZscores, 0);
} else if (optionsPanel.statisticalTestBox.getSelectedItem().toString().equals("Kolmorogov-Smirnov test")) {
double[] allZscores = getAllZScores(probeZScoreLookupTable);
KolmogorovSmirnovTest ksTest = new KolmogorovSmirnovTest();
pVal = ksTest.kolmogorovSmirnovTest(geneSetZscores, allZscores);
} else if (optionsPanel.statisticalTestBox.getSelectedItem().toString().equals("Kolmorogov-Smirnov non-directional test")) {
double[] allZscores = getAllZScores(probeZScoreLookupTable);
KolmogorovSmirnovTest ksTest = new KolmogorovSmirnovTest();
pVal = ksTest.kolmogorovSmirnovTest(convertToAbsoluteValues(geneSetZscores), convertToAbsoluteValues(allZscores));
} else {
throw new IllegalArgumentException("Didn't recognise statistical test " + optionsPanel.statisticalTestBox.getSelectedItem().toString());
}
mappedGeneSets[i].meanZScore = SimpleStats.mean(geneSetZscores);
// check the variance - we don't want variances of 0.
double stdev = SimpleStats.stdev(geneSetZscores);
if ((stdev * stdev) < 0.00000000000001) {
continue;
} else // if all the differences between the datasets are 0 then just get rid of them
if (Double.isNaN(pVal)) {
continue;
} else {
pValueArrayList.add(new MappedGeneSetTTestValue(mappedGeneSets[i], pVal));
}
} else {
System.err.println("Fell through the net somewhere, why does the set of zscores contain fewer than 2 values?");
continue;
}
}
MappedGeneSetTTestValue[] filterResultpValues = pValueArrayList.toArray(new MappedGeneSetTTestValue[0]);
ArrayList<MappedGeneSetTTestValue> filteredPValueArrayList = new ArrayList<MappedGeneSetTTestValue>();
// Now we've got all the p values they need to be corrected.
if (applyMultipleTestingCorrection) {
BenjHochFDR.calculateQValues(filterResultpValues);
}
System.err.println(filterResultpValues.length + " p-values calculated, multtest = " + applyMultipleTestingCorrection + ", pval limit = " + pValueLimit);
for (int i = 0; i < filterResultpValues.length; i++) {
double pOrQvalue;
if (applyMultipleTestingCorrection) {
pOrQvalue = filterResultpValues[i].q;
} else {
pOrQvalue = filterResultpValues[i].p;
}
// check whether it passes the p/q-value and z-score cut-offs
if (optionsPanel.reportAllResults.isSelected()) {
filteredPValueArrayList.add(filterResultpValues[i]);
} else {
if ((pOrQvalue < pValueLimit) && (Math.abs(filterResultpValues[i].mappedGeneSet.meanZScore) > zScoreThreshold)) {
filteredPValueArrayList.add(filterResultpValues[i]);
}
}
}
// convert the ArrayList to MappedGeneSetTTestValue
filterResultpValues = filteredPValueArrayList.toArray(new MappedGeneSetTTestValue[0]);
if (filterResultpValues.length == 0) {
JOptionPane.showMessageDialog(SeqMonkApplication.getInstance(), "No sets of genes were identified using the selected parameters, \ntry changing the gene sets or relaxing the p-value/z-score thresholds.", "No gene sets identified", JOptionPane.INFORMATION_MESSAGE);
} else {
geneSetDisplay = new GeneSetDisplay(dataCollection, listDescription(), fromStore, toStore, probes, probeZScoreLookupTable, filterResultpValues, startingList, customRegressionValues, simpleRegression, // optionsPanel.bidirectionalRadioButton.isSelected());
false);
geneSetDisplay.addWindowListener(this);
}
// We don't want to save the probe list here, we're bringing up the intermediate display from which probe lists can be saved.
progressCancelled();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
Aggregations