use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project pinot by linkedin.
the class ForwardIndexReaderBenchmark method multiValuedReadBenchMarkV1.
public static void multiValuedReadBenchMarkV1(File file, int numDocs, int totalNumValues, int maxEntriesPerDoc, int columnSizeInBits) throws Exception {
System.out.println("******************************************************************");
System.out.println("Analyzing " + file.getName() + " numDocs:" + numDocs + ", totalNumValues:" + totalNumValues + ", maxEntriesPerDoc:" + maxEntriesPerDoc + ", numBits:" + columnSizeInBits);
long start, end;
boolean readFile = true;
boolean randomRead = true;
boolean contextualRead = true;
boolean signed = false;
boolean isMmap = false;
PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, "benchmarking");
BaseSingleColumnMultiValueReader reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitMultiValueReader(heapBuffer, numDocs, totalNumValues, columnSizeInBits, signed);
int[] intArray = new int[maxEntriesPerDoc];
File outfile = new File("/tmp/" + file.getName() + ".raw");
FileWriter fw = new FileWriter(outfile);
for (int i = 0; i < numDocs; i++) {
int length = reader.getIntArray(i, intArray);
StringBuilder sb = new StringBuilder();
String delim = "";
for (int j = 0; j < length; j++) {
sb.append(delim);
sb.append(intArray[j]);
delim = ",";
}
fw.write(sb.toString());
fw.write("\n");
}
fw.close();
// sequential read
if (readFile) {
DescriptiveStatistics stats = new DescriptiveStatistics();
RandomAccessFile raf = new RandomAccessFile(file, "rw");
ByteBuffer buffer = ByteBuffer.allocateDirect((int) file.length());
raf.getChannel().read(buffer);
for (int run = 0; run < MAX_RUNS; run++) {
long length = file.length();
start = System.currentTimeMillis();
for (int i = 0; i < length; i++) {
byte b = buffer.get(i);
}
end = System.currentTimeMillis();
stats.addValue((end - start));
}
System.out.println("v1 multi value read bytes stats for " + file.getName());
System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
raf.close();
}
if (randomRead) {
DescriptiveStatistics stats = new DescriptiveStatistics();
for (int run = 0; run < MAX_RUNS; run++) {
start = System.currentTimeMillis();
for (int i = 0; i < numDocs; i++) {
int length = reader.getIntArray(i, intArray);
}
end = System.currentTimeMillis();
stats.addValue((end - start));
}
System.out.println("v1 multi value sequential read one stats for " + file.getName());
System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
}
if (contextualRead) {
DescriptiveStatistics stats = new DescriptiveStatistics();
for (int run = 0; run < MAX_RUNS; run++) {
MultiValueReaderContext context = (MultiValueReaderContext) reader.createContext();
start = System.currentTimeMillis();
for (int i = 0; i < numDocs; i++) {
int length = reader.getIntArray(i, intArray, context);
}
end = System.currentTimeMillis();
// System.out.println("RUN:" + run + "Time:" + (end-start));
stats.addValue((end - start));
}
System.out.println("v1 multi value sequential read one with context stats for " + file.getName());
System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
}
reader.close();
heapBuffer.close();
System.out.println("******************************************************************");
}
use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project pinot by linkedin.
the class ForwardIndexReaderBenchmark method singleValuedReadBenchMarkV2.
public static void singleValuedReadBenchMarkV2(File file, int numDocs, int numBits) throws Exception {
boolean signed = false;
boolean isMmap = false;
long start, end;
boolean fullScan = true;
boolean batchRead = true;
boolean singleRead = true;
PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.heap, FileChannel.MapMode.READ_ONLY, "benchmarking");
com.linkedin.pinot.core.io.reader.impl.v2.FixedBitSingleValueReader reader = new com.linkedin.pinot.core.io.reader.impl.v2.FixedBitSingleValueReader(heapBuffer, numDocs, numBits, signed);
if (fullScan) {
DescriptiveStatistics stats = new DescriptiveStatistics();
ByteBuffer buffer = ByteBuffer.allocateDirect((int) file.length());
RandomAccessFile raf = new RandomAccessFile(file, "r");
raf.getChannel().read(buffer);
raf.close();
int[] input = new int[numBits];
int[] output = new int[32];
int numBatches = (numDocs + 31) / 32;
for (int run = 0; run < MAX_RUNS; run++) {
start = System.currentTimeMillis();
for (int i = 0; i < numBatches; i++) {
for (int j = 0; j < numBits; j++) {
input[j] = buffer.getInt(i * numBits * 4 + j * 4);
}
BitPacking.fastunpack(input, 0, output, 0, numBits);
}
end = System.currentTimeMillis();
stats.addValue((end - start));
}
System.out.println(" v2 full scan stats for " + file.getName());
System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
}
if (singleRead) {
DescriptiveStatistics stats = new DescriptiveStatistics();
// sequential read
for (int run = 0; run < MAX_RUNS; run++) {
start = System.currentTimeMillis();
for (int i = 0; i < numDocs; i++) {
int value = reader.getInt(i);
}
end = System.currentTimeMillis();
stats.addValue((end - start));
}
System.out.println(" v2 sequential single read for " + file.getName());
System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
}
if (batchRead) {
DescriptiveStatistics stats = new DescriptiveStatistics();
int batchSize = Math.min(5000, numDocs);
int[] output = new int[batchSize];
int[] rowIds = new int[batchSize];
// sequential read
for (int run = 0; run < MAX_RUNS; run++) {
start = System.currentTimeMillis();
int rowId = 0;
while (rowId < numDocs) {
int length = Math.min(batchSize, numDocs - rowId);
for (int i = 0; i < length; i++) {
rowIds[i] = rowId + i;
}
reader.getIntBatch(rowIds, output, length);
rowId = rowId + length;
}
end = System.currentTimeMillis();
stats.addValue((end - start));
}
System.out.println("v2 sequential batch read stats for " + file.getName());
System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
}
reader.close();
}
use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project GDSC-SMLM by aherbert.
the class PCPALMMolecules method calculateAveragePrecision.
/**
* Calculate the average precision by fitting a skewed Gaussian to the histogram of the precision distribution.
* <p>
* A simple mean and SD of the histogram is computed. If the mean of the Skewed Gaussian does not fit within 3 SDs
* of the simple mean then the simple mean is returned.
*
* @param molecules
* @param title
* the plot title (null if no plot should be displayed)
* @param histogramBins
* @param logFitParameters
* Record the fit parameters to the ImageJ log
* @param removeOutliers
* The distribution is created using all values within 1.5x the inter-quartile range (IQR) of the data
* @return The average precision
*/
public double calculateAveragePrecision(ArrayList<Molecule> molecules, String title, int histogramBins, boolean logFitParameters, boolean removeOutliers) {
// Plot histogram of the precision
float[] data = new float[molecules.size()];
DescriptiveStatistics stats = new DescriptiveStatistics();
double yMin = Double.NEGATIVE_INFINITY, yMax = 0;
for (int i = 0; i < data.length; i++) {
data[i] = (float) molecules.get(i).precision;
stats.addValue(data[i]);
}
// Set the min and max y-values using 1.5 x IQR
if (removeOutliers) {
double lower = stats.getPercentile(25);
double upper = stats.getPercentile(75);
if (Double.isNaN(lower) || Double.isNaN(upper)) {
if (logFitParameters)
Utils.log("Error computing IQR: %f - %f", lower, upper);
} else {
double iqr = upper - lower;
yMin = FastMath.max(lower - iqr, stats.getMin());
yMax = FastMath.min(upper + iqr, stats.getMax());
if (logFitParameters)
Utils.log(" Data range: %f - %f. Plotting 1.5x IQR: %f - %f", stats.getMin(), stats.getMax(), yMin, yMax);
}
}
if (yMin == Double.NEGATIVE_INFINITY) {
yMin = stats.getMin();
yMax = stats.getMax();
if (logFitParameters)
Utils.log(" Data range: %f - %f", yMin, yMax);
}
float[][] hist = Utils.calcHistogram(data, yMin, yMax, histogramBins);
Plot2 plot = null;
if (title != null) {
plot = new Plot2(title, "Precision", "Frequency");
float[] xValues = hist[0];
float[] yValues = hist[1];
if (xValues.length > 0) {
double xPadding = 0.05 * (xValues[xValues.length - 1] - xValues[0]);
plot.setLimits(xValues[0] - xPadding, xValues[xValues.length - 1] + xPadding, 0, Maths.max(yValues) * 1.05);
}
plot.addPoints(xValues, yValues, Plot2.BAR);
Utils.display(title, plot);
}
// Extract non-zero data
float[] x = Arrays.copyOf(hist[0], hist[0].length);
float[] y = hist[1];
int count = 0;
float dx = (x[1] - x[0]) * 0.5f;
for (int i = 0; i < y.length; i++) if (y[i] > 0) {
x[count] = x[i] + dx;
y[count] = y[i];
count++;
}
x = Arrays.copyOf(x, count);
y = Arrays.copyOf(y, count);
// Sense check to fitted data. Get mean and SD of histogram
double[] stats2 = Utils.getHistogramStatistics(x, y);
double mean = stats2[0];
if (logFitParameters)
log(" Initial Statistics: %f +/- %f", stats2[0], stats2[1]);
// Standard Gaussian fit
double[] parameters = fitGaussian(x, y);
if (parameters == null) {
log(" Failed to fit initial Gaussian");
return mean;
}
double newMean = parameters[1];
double error = Math.abs(stats2[0] - newMean) / stats2[1];
if (error > 3) {
log(" Failed to fit Gaussian: %f standard deviations from histogram mean", error);
return mean;
}
if (newMean < yMin || newMean > yMax) {
log(" Failed to fit Gaussian: %f outside data range %f - %f", newMean, yMin, yMax);
return mean;
}
mean = newMean;
if (logFitParameters)
log(" Initial Gaussian: %f @ %f +/- %f", parameters[0], parameters[1], parameters[2]);
double[] initialSolution = new double[] { parameters[0], parameters[1], parameters[2], -1 };
// Fit to a skewed Gaussian (or appropriate function)
double[] skewParameters = fitSkewGaussian(x, y, initialSolution);
if (skewParameters == null) {
log(" Failed to fit Skewed Gaussian");
return mean;
}
SkewNormalFunction sn = new SkewNormalFunction(skewParameters);
if (logFitParameters)
log(" Skewed Gaussian: %f @ %f +/- %f (a = %f) => %f +/- %f", skewParameters[0], skewParameters[1], skewParameters[2], skewParameters[3], sn.getMean(), Math.sqrt(sn.getVariance()));
newMean = sn.getMean();
error = Math.abs(stats2[0] - newMean) / stats2[1];
if (error > 3) {
log(" Failed to fit Skewed Gaussian: %f standard deviations from histogram mean", error);
return mean;
}
if (newMean < yMin || newMean > yMax) {
log(" Failed to fit Skewed Gaussian: %f outside data range %f - %f", newMean, yMin, yMax);
return mean;
}
// Use original histogram x-axis to maintain all the bins
if (plot != null) {
x = hist[0];
for (int i = 0; i < y.length; i++) x[i] += dx;
plot.setColor(Color.red);
addToPlot(plot, x, skewParameters, Plot2.LINE);
plot.setColor(Color.black);
Utils.display(title, plot);
}
// Return the average precision from the fitted curve
return newMean;
}
use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project GDSC-SMLM by aherbert.
the class SummariseResults method addSummary.
private void addSummary(StringBuilder sb, MemoryPeakResults result) {
DescriptiveStatistics[] stats = new DescriptiveStatistics[2];
char[] suffix = new char[2];
for (int i = 0; i < stats.length; i++) {
stats[i] = new DescriptiveStatistics();
suffix[i] = 0;
}
if (result.hasNullResults()) {
IJ.log("Null results in dataset: " + result.getName());
if (removeNullResults == UNKNOWN) {
GenericDialog gd = new GenericDialog(TITLE);
gd.addMessage("There are invalid results in memory.\n \nClean these results?");
gd.enableYesNoCancel();
gd.hideCancelButton();
gd.showDialog();
removeNullResults = (gd.wasOKed()) ? YES : NO;
}
if (removeNullResults == NO)
result = result.copy();
result.removeNullResults();
}
final int size = result.size();
if (size > 0) {
// Check if we can use the stored precision
if (result.hasStoredPrecision()) {
suffix[0] = '*';
for (PeakResult peakResult : result.getResults()) {
stats[0].addValue(peakResult.getPrecision());
}
} else if (result.isCalibratedForPrecision()) {
final double nmPerPixel = result.getNmPerPixel();
final double gain = result.getGain();
final boolean emCCD = result.isEMCCD();
for (PeakResult peakResult : result.getResults()) {
stats[0].addValue(peakResult.getPrecision(nmPerPixel, gain, emCCD));
}
}
// SNR requires noise
if (result.getHead().noise > 0) {
for (PeakResult peakResult : result.getResults()) {
stats[1].addValue(peakResult.getSignal() / peakResult.noise);
}
}
}
sb.append(result.getName());
sb.append("\t").append(result.size());
int maxT = getMaxT(result);
sb.append("\t").append(maxT);
final double exposureTime = (result.getCalibration() != null) ? result.getCalibration().getExposureTime() : 0;
sb.append("\t").append(Utils.timeToString(maxT * exposureTime));
if (size > 0) {
boolean includeDeviations = result.getHead().paramsStdDev != null;
long memorySize = MemoryPeakResults.estimateMemorySize(size, includeDeviations);
String memory = MemoryPeakResults.memorySizeString(memorySize);
sb.append("\t").append(memory);
} else {
sb.append("\t-");
}
Rectangle bounds = result.getBounds(true);
sb.append(String.format("\t%d,%d,%d,%d\t%s\t%s\t%s", bounds.x, bounds.y, bounds.x + bounds.width, bounds.y + bounds.height, Utils.rounded(result.getNmPerPixel(), 4), Utils.rounded(result.getGain(), 4), Utils.rounded(exposureTime, 4)));
for (int i = 0; i < stats.length; i++) {
if (Double.isNaN(stats[i].getMean())) {
sb.append("\t-\t-\t-\t-");
} else {
sb.append("\t").append(IJ.d2s(stats[i].getMean(), 3));
if (suffix[i] != 0)
sb.append(suffix[i]);
sb.append("\t").append(IJ.d2s(stats[i].getPercentile(50), 3));
sb.append("\t").append(IJ.d2s(stats[i].getMin(), 3));
sb.append("\t").append(IJ.d2s(stats[i].getMax(), 3));
}
}
sb.append("\n");
}
use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project GDSC-SMLM by aherbert.
the class FIRE method calculatePrecisionHistogram.
/**
* Calculate a histogram of the precision. The precision can be either stored in the results or calculated using the
* Mortensen formula. If the precision method for Q estimation is not fixed then the histogram is fitted with a
* Gaussian to create an initial estimate.
*
* @param precisionMethod
* the precision method
* @return The precision histogram
*/
private PrecisionHistogram calculatePrecisionHistogram() {
boolean logFitParameters = false;
String title = results.getName() + " Precision Histogram";
// Check if the results has the precision already or if it can be computed.
boolean canUseStored = canUseStoredPrecision(results);
boolean canCalculatePrecision = canCalculatePrecision(results);
// Set the method to compute a histogram. Default to the user selected option.
PrecisionMethod m = null;
if (canUseStored && precisionMethod == PrecisionMethod.STORED)
m = precisionMethod;
else if (canCalculatePrecision && precisionMethod == PrecisionMethod.CALCULATE)
m = precisionMethod;
if (m == null) {
// We only have two choices so if one is available then select it.
if (canUseStored)
m = PrecisionMethod.STORED;
else if (canCalculatePrecision)
m = PrecisionMethod.CALCULATE;
// If the user selected a method not available then log a warning
if (m != null && precisionMethod != PrecisionMethod.FIXED) {
IJ.log(String.format("%s : Selected precision method '%s' not available, switching to '%s'", TITLE, precisionMethod, m.getName()));
}
if (m == null) {
// This does not matter if the user has provide a fixed input.
if (precisionMethod == PrecisionMethod.FIXED) {
PrecisionHistogram histogram = new PrecisionHistogram(title);
histogram.mean = mean;
histogram.sigma = sigma;
return histogram;
}
// No precision
return null;
}
}
// We get here if we can compute precision.
// Build the histogram
StoredDataStatistics precision = new StoredDataStatistics(results.size());
if (m == PrecisionMethod.STORED) {
for (PeakResult r : results.getResults()) {
precision.add(r.getPrecision());
}
} else {
final double nmPerPixel = results.getNmPerPixel();
final double gain = results.getGain();
final boolean emCCD = results.isEMCCD();
for (PeakResult r : results.getResults()) {
precision.add(r.getPrecision(nmPerPixel, gain, emCCD));
}
}
//System.out.printf("Raw p = %f\n", precision.getMean());
double yMin = Double.NEGATIVE_INFINITY, yMax = 0;
// Set the min and max y-values using 1.5 x IQR
DescriptiveStatistics stats = precision.getStatistics();
double lower = stats.getPercentile(25);
double upper = stats.getPercentile(75);
if (Double.isNaN(lower) || Double.isNaN(upper)) {
if (logFitParameters)
Utils.log("Error computing IQR: %f - %f", lower, upper);
} else {
double iqr = upper - lower;
yMin = FastMath.max(lower - iqr, stats.getMin());
yMax = FastMath.min(upper + iqr, stats.getMax());
if (logFitParameters)
Utils.log(" Data range: %f - %f. Plotting 1.5x IQR: %f - %f", stats.getMin(), stats.getMax(), yMin, yMax);
}
if (yMin == Double.NEGATIVE_INFINITY) {
int n = 5;
yMin = Math.max(stats.getMin(), stats.getMean() - n * stats.getStandardDeviation());
yMax = Math.min(stats.getMax(), stats.getMean() + n * stats.getStandardDeviation());
if (logFitParameters)
Utils.log(" Data range: %f - %f. Plotting mean +/- %dxSD: %f - %f", stats.getMin(), stats.getMax(), n, yMin, yMax);
}
// Get the data within the range
double[] data = precision.getValues();
precision = new StoredDataStatistics(data.length);
for (double d : data) {
if (d < yMin || d > yMax)
continue;
precision.add(d);
}
int histogramBins = Utils.getBins(precision, Utils.BinMethod.SCOTT);
float[][] hist = Utils.calcHistogram(precision.getFloatValues(), yMin, yMax, histogramBins);
PrecisionHistogram histogram = new PrecisionHistogram(hist, precision, title);
if (precisionMethod == PrecisionMethod.FIXED) {
histogram.mean = mean;
histogram.sigma = sigma;
return histogram;
}
// Fitting of the histogram to produce the initial estimate
// Extract non-zero data
float[] x = Arrays.copyOf(hist[0], hist[0].length);
float[] y = hist[1];
int count = 0;
float dx = (x[1] - x[0]) * 0.5f;
for (int i = 0; i < y.length; i++) if (y[i] > 0) {
x[count] = x[i] + dx;
y[count] = y[i];
count++;
}
x = Arrays.copyOf(x, count);
y = Arrays.copyOf(y, count);
// Sense check to fitted data. Get mean and SD of histogram
double[] stats2 = Utils.getHistogramStatistics(x, y);
if (logFitParameters)
Utils.log(" Initial Statistics: %f +/- %f", stats2[0], stats2[1]);
histogram.mean = stats2[0];
histogram.sigma = stats2[1];
// Standard Gaussian fit
double[] parameters = fitGaussian(x, y);
if (parameters == null) {
Utils.log(" Failed to fit initial Gaussian");
return histogram;
}
double newMean = parameters[1];
double error = Math.abs(stats2[0] - newMean) / stats2[1];
if (error > 3) {
Utils.log(" Failed to fit Gaussian: %f standard deviations from histogram mean", error);
return histogram;
}
if (newMean < yMin || newMean > yMax) {
Utils.log(" Failed to fit Gaussian: %f outside data range %f - %f", newMean, yMin, yMax);
return histogram;
}
if (logFitParameters)
Utils.log(" Initial Gaussian: %f @ %f +/- %f", parameters[0], parameters[1], parameters[2]);
histogram.mean = parameters[1];
histogram.sigma = parameters[2];
return histogram;
}
Aggregations