use of org.apache.commons.math3.stat.descriptive.SummaryStatistics in project GDSC-SMLM by aherbert.
the class TraceMolecules method runOptimiser.
private void runOptimiser(TraceManager manager) {
// Get an estimate of the number of molecules without blinking
SummaryStatistics stats = new SummaryStatistics();
final double nmPerPixel = this.results.getNmPerPixel();
final double gain = this.results.getGain();
final boolean emCCD = this.results.isEMCCD();
for (PeakResult result : this.results.getResults()) stats.addValue(result.getPrecision(nmPerPixel, gain, emCCD));
// Use twice the precision to get the initial distance threshold
// Use 2.5x sigma as per the PC-PALM protocol in Sengupta, et al (2013) Nature Protocols 8, 345
double dEstimate = stats.getMean() * 2.5 / nmPerPixel;
int n = manager.traceMolecules(dEstimate, 1);
if (!getParameters(n, dEstimate))
return;
// TODO - Convert the distance threshold to use nm instead of pixels?
List<double[]> results = runTracing(manager, settings.minDistanceThreshold, settings.maxDistanceThreshold, settings.minTimeThreshold, settings.maxTimeThreshold, settings.optimiserSteps);
// Compute fractional difference from the true value:
// Use blinking rate directly or the estimated number of molecules
double nReference;
int statistic;
if (optimiseBlinkingRate) {
nReference = settings.blinkingRate;
statistic = 3;
IJ.log(String.format("Estimating blinking rate: %.2f", nReference));
} else {
nReference = n / settings.blinkingRate;
statistic = 2;
IJ.log(String.format("Estimating number of molecules: %d / %.2f = %.2f", n, settings.blinkingRate, nReference));
}
for (double[] result : results) {
//System.out.printf("%g %g = %g\n", result[0], result[1], result[2]);
if (optimiseBlinkingRate)
result[2] = (nReference - result[statistic]) / nReference;
else
result[2] = (result[statistic] - nReference) / nReference;
}
// Locate the optimal parameters with a fit of the zero contour
boolean found = findOptimalParameters(results);
createPlotResults(results);
if (!found)
return;
// Make fractional difference absolute so that lowest is best
for (double[] result : results) result[2] = Math.abs(result[2]);
// Set the optimal thresholds using the lowest value
double[] best = new double[] { 0, 0, Double.MAX_VALUE };
for (double[] result : results) if (best[2] > result[2])
best = result;
settings.distanceThreshold = best[0];
// The optimiser works using frames so convert back to the correct units
double convert = (settings.getTimeUnit() == TimeUnit.SECOND) ? exposureTime : 1;
settings.setTimeThreshold(settings.getTimeThreshold() * convert);
IJ.log(String.format("Optimal fractional difference @ D-threshold=%g, T-threshold=%f (%d frames)", settings.distanceThreshold, timeInSeconds(settings), timeInFrames(settings)));
SettingsManager.saveSettings(globalSettings);
}
use of org.apache.commons.math3.stat.descriptive.SummaryStatistics in project lucene-solr by apache.
the class HistogramEvaluator method evaluate.
public List<Map> evaluate(Tuple tuple) throws IOException {
StreamEvaluator colEval1 = subEvaluators.get(0);
List<Number> numbers1 = (List<Number>) colEval1.evaluate(tuple);
double[] column1 = new double[numbers1.size()];
for (int i = 0; i < numbers1.size(); i++) {
column1[i] = numbers1.get(i).doubleValue();
}
int bins = 10;
if (subEvaluators.size() == 2) {
StreamEvaluator binsEval = subEvaluators.get(1);
Number binsNum = (Number) binsEval.evaluate(tuple);
bins = binsNum.intValue();
}
EmpiricalDistribution empiricalDistribution = new EmpiricalDistribution(bins);
empiricalDistribution.load(column1);
List<Map> binList = new ArrayList();
List<SummaryStatistics> summaries = empiricalDistribution.getBinStats();
for (SummaryStatistics statisticalSummary : summaries) {
Map map = new HashMap();
map.put("max", statisticalSummary.getMax());
map.put("mean", statisticalSummary.getMean());
map.put("min", statisticalSummary.getMin());
map.put("stdev", statisticalSummary.getStandardDeviation());
map.put("sum", statisticalSummary.getSum());
map.put("N", statisticalSummary.getN());
map.put("var", statisticalSummary.getVariance());
binList.add(map);
}
return binList;
}
use of org.apache.commons.math3.stat.descriptive.SummaryStatistics in project tika by apache.
the class TokenCounter method _add.
private void _add(String field, Analyzer analyzer, String content) throws IOException {
int totalTokens = 0;
TokenStream ts = analyzer.tokenStream(field, content);
CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
ts.reset();
Map<String, MutableInt> tokenMap = map.get(field);
if (tokenMap == null) {
tokenMap = new HashMap<>();
map.put(field, tokenMap);
}
while (ts.incrementToken()) {
String token = termAtt.toString();
MutableInt cnt = tokenMap.get(token);
if (cnt == null) {
cnt = new MutableInt(1);
tokenMap.put(token, cnt);
} else {
cnt.increment();
}
totalTokens++;
}
ts.close();
ts.end();
int totalUniqueTokens = tokenMap.size();
double ent = 0.0d;
double p = 0.0d;
double base = 2.0;
TokenCountPriorityQueue queue = new TokenCountPriorityQueue(topN);
SummaryStatistics summaryStatistics = new SummaryStatistics();
for (Map.Entry<String, MutableInt> e : tokenMap.entrySet()) {
String token = e.getKey();
int termFreq = e.getValue().intValue();
p = (double) termFreq / (double) totalTokens;
ent += p * FastMath.log(base, p);
int len = token.codePointCount(0, token.length());
for (int i = 0; i < e.getValue().intValue(); i++) {
summaryStatistics.addValue(len);
}
if (queue.top() == null || queue.size() < topN || termFreq >= queue.top().getValue()) {
queue.insertWithOverflow(new TokenIntPair(token, termFreq));
}
}
if (totalTokens > 0) {
ent = (-1.0d / (double) totalTokens) * ent;
}
/* Collections.sort(allTokens);
List<TokenIntPair> topNList = new ArrayList<>(topN);
for (int i = 0; i < topN && i < allTokens.size(); i++) {
topNList.add(allTokens.get(i));
}*/
tokenStatistics.put(field, new TokenStatistics(totalUniqueTokens, totalTokens, queue.getArray(), ent, summaryStatistics));
}
Aggregations