use of org.apache.commons.math3.stat.descriptive.rank.Min in project metron by apache.
the class StatisticalBinningPerformanceDriver method main.
public static void main(String... argv) {
DescriptiveStatistics perfStats = new DescriptiveStatistics();
OnlineStatisticsProvider statsProvider = new OnlineStatisticsProvider();
List<Double> values = new ArrayList<>();
GaussianRandomGenerator gaussian = new GaussianRandomGenerator(new MersenneTwister(0L));
for (int i = 0; i < NUM_DATA_POINTS; ++i) {
// get the data point out of the [0,1] range
double d = 1000 * gaussian.nextNormalizedDouble();
values.add(d);
statsProvider.addValue(d);
}
for (int perfRun = 0; perfRun < NUM_RUNS; ++perfRun) {
StellarStatisticsFunctions.StatsBin bin = new StellarStatisticsFunctions.StatsBin();
long start = System.currentTimeMillis();
Random r = new Random(0);
for (int i = 0; i < TRIALS_PER_RUN; ++i) {
// grab a random value and fuzz it a bit so we make sure there's no cheating via caching in t-digest.
bin.apply(ImmutableList.of(statsProvider, values.get(r.nextInt(values.size())) - 3.5, PERCENTILES));
}
perfStats.addValue(System.currentTimeMillis() - start);
}
System.out.println("Min/25th/50th/75th/Max Milliseconds: " + perfStats.getMin() + " / " + perfStats.getPercentile(25) + " / " + perfStats.getPercentile(50) + " / " + perfStats.getPercentile(75) + " / " + perfStats.getMax());
}
use of org.apache.commons.math3.stat.descriptive.rank.Min in project metron by apache.
the class StellarStatisticsFunctionsTest method run.
/**
* Runs a Stellar expression.
* @param expr The expression to run.
* @param variables The variables available to the expression.
*/
private static Object run(String expr, Map<String, Object> variables) {
StellarProcessor processor = new StellarProcessor();
Object ret = processor.parse(expr, new DefaultVariableResolver(x -> variables.get(x), x -> variables.containsKey(x)), StellarFunctions.FUNCTION_RESOLVER(), Context.EMPTY_CONTEXT());
byte[] raw = SerDeUtils.toBytes(ret);
Object actual = SerDeUtils.fromBytes(raw, Object.class);
if (ret instanceof StatisticsProvider) {
StatisticsProvider left = (StatisticsProvider) ret;
StatisticsProvider right = (StatisticsProvider) actual;
// N
tolerantAssertEquals(prov -> prov.getCount(), left, right);
// sum
tolerantAssertEquals(prov -> prov.getSum(), left, right, 1e-3);
// sum of squares
tolerantAssertEquals(prov -> prov.getSumSquares(), left, right, 1e-3);
// sum of squares
tolerantAssertEquals(prov -> prov.getSumLogs(), left, right, 1e-3);
// Mean
tolerantAssertEquals(prov -> prov.getMean(), left, right, 1e-3);
// Quadratic Mean
tolerantAssertEquals(prov -> prov.getQuadraticMean(), left, right, 1e-3);
// SD
tolerantAssertEquals(prov -> prov.getStandardDeviation(), left, right, 1e-3);
// Variance
tolerantAssertEquals(prov -> prov.getVariance(), left, right, 1e-3);
// Min
tolerantAssertEquals(prov -> prov.getMin(), left, right, 1e-3);
// Max
tolerantAssertEquals(prov -> prov.getMax(), left, right, 1e-3);
// Kurtosis
tolerantAssertEquals(prov -> prov.getKurtosis(), left, right, 1e-3);
// Skewness
tolerantAssertEquals(prov -> prov.getSkewness(), left, right, 1e-3);
for (double d = 10.0; d < 100.0; d += 10) {
final double pctile = d;
// This is a sketch, so we're a bit more forgiving here in our choice of \epsilon.
tolerantAssertEquals(prov -> prov.getPercentile(pctile), left, right, 1e-2);
}
}
return ret;
}
use of org.apache.commons.math3.stat.descriptive.rank.Min in project bayou by capergroup.
the class MetricCalculator method execute.
public void execute() throws IOException {
if (cmdLine == null)
return;
int topk = cmdLine.hasOption("t") ? Integer.parseInt(cmdLine.getOptionValue("t")) : 10;
Metric metric;
String m = cmdLine.getOptionValue("m");
switch(m) {
case "equality-ast":
metric = new EqualityASTMetric();
break;
case "jaccard-sequences":
metric = new JaccardSequencesMetric();
break;
case "jaccard-api-calls":
metric = new JaccardAPICallsMetric();
break;
case "num-control-structures":
metric = new NumControlStructuresMetric();
break;
case "num-statements":
metric = new NumStatementsMetric();
break;
default:
System.err.println("invalid metric: " + cmdLine.getOptionValue("m"));
return;
}
int inCorpus = cmdLine.hasOption("c") ? Integer.parseInt(cmdLine.getOptionValue("c")) : 1;
String aggregate = cmdLine.hasOption("a") ? cmdLine.getOptionValue("a") : "min";
List<JSONInputFormat.DataPoint> data = JSONInputFormat.readData(cmdLine.getOptionValue("f"));
if (inCorpus == 2)
data = data.stream().filter(datapoint -> datapoint.in_corpus).collect(Collectors.toList());
else if (inCorpus == 3)
data = data.stream().filter(datapoint -> !datapoint.in_corpus).collect(Collectors.toList());
List<Float> values = new ArrayList<>();
for (JSONInputFormat.DataPoint datapoint : data) {
DSubTree originalAST = datapoint.ast;
List<DSubTree> predictedASTs = datapoint.out_asts.subList(0, Math.min(topk, datapoint.out_asts.size()));
values.add(metric.compute(originalAST, predictedASTs, aggregate));
}
List<Float> values2 = new ArrayList<>();
if (cmdLine.hasOption("p")) {
List<JSONInputFormat.DataPoint> data2 = JSONInputFormat.readData(cmdLine.getOptionValue("p"));
if (inCorpus == 2)
data2 = data2.stream().filter(datapoint -> datapoint.in_corpus).collect(Collectors.toList());
else if (inCorpus == 3)
data2 = data2.stream().filter(datapoint -> !datapoint.in_corpus).collect(Collectors.toList());
for (JSONInputFormat.DataPoint datapoint : data2) {
DSubTree originalAST = datapoint.ast;
List<DSubTree> predictedASTs = datapoint.out_asts.subList(0, Math.min(topk, datapoint.out_asts.size()));
values2.add(metric.compute(originalAST, predictedASTs, aggregate));
}
if (values.size() != values2.size())
throw new Error("DATA files do not match in size. Cannot compute p-value.");
}
float average = Metric.mean(values);
float stdv = Metric.standardDeviation(values);
if (cmdLine.hasOption("p")) {
double[] dValues = values.stream().mapToDouble(v -> v.floatValue()).toArray();
double[] dValues2 = values2.stream().mapToDouble(v -> v.floatValue()).toArray();
double pValue = new TTest().pairedTTest(dValues, dValues2);
System.out.println(String.format("%s (%d data points, each aggregated with %s): average=%f, stdv=%f, pvalue=%e", m, data.size(), aggregate, average, stdv, pValue));
} else
System.out.println(String.format("%s (%d data points, each aggregated with %s): average=%f, stdv=%f", m, data.size(), aggregate, average, stdv));
}
use of org.apache.commons.math3.stat.descriptive.rank.Min in project vcell by virtualcell.
the class TimeSeriesMultitrialData method chiSquaredTest.
public static double chiSquaredTest(double[] rawData1, double[] rawData2) {
try {
int numBins = 1 + (int) Math.ceil(Math.sqrt(rawData1.length));
// rawData2 = ramp(0,10,rawData2.length);
Max max = new Max();
max.incrementAll(rawData1);
max.incrementAll(rawData2);
Min min = new Min();
min.incrementAll(rawData1);
min.incrementAll(rawData2);
long[] histogram1 = calcHistogram(rawData1, min.getResult(), max.getResult(), numBins);
long[] histogram2 = calcHistogram(rawData2, min.getResult(), max.getResult(), numBins);
//
// remove histogram indices where both bins are zero
//
ArrayList<Long> histogram1List = new ArrayList<Long>();
ArrayList<Long> histogram2List = new ArrayList<Long>();
for (int i = 0; i < histogram1.length; i++) {
if (histogram1[i] != 0 || histogram2[i] != 0) {
histogram1List.add(histogram1[i]);
histogram2List.add(histogram2[i]);
// }else{
// histogram1List.add(new Long(1));
// histogram2List.add(new Long(1));
}
}
histogram1 = new long[histogram1List.size()];
histogram2 = new long[histogram2List.size()];
for (int i = 0; i < histogram1List.size(); i++) {
histogram1[i] = histogram1List.get(i);
histogram2[i] = histogram2List.get(i);
}
if (histogram1.length == 1) {
return 0.0;
}
ChiSquareTest chiSquareTest = new ChiSquareTest();
return chiSquareTest.chiSquareTestDataSetsComparison(histogram1, histogram2);
} catch (Exception e) {
e.printStackTrace(System.out);
return -1;
}
}
use of org.apache.commons.math3.stat.descriptive.rank.Min in project vcell by virtualcell.
the class TimeSeriesMultitrialData method kolmogorovSmirnovTest.
public static double kolmogorovSmirnovTest(double[] rawData1, double[] rawData2) {
try {
int numBins = 1 + (int) Math.ceil(Math.sqrt(rawData1.length));
// rawData2 = ramp(0,10,rawData2.length);
TimeSeriesMultitrialData.MinMaxHelp minMaxHelp1 = new TimeSeriesMultitrialData.MinMaxHelp(rawData1);
TimeSeriesMultitrialData.MinMaxHelp minMaxHelp2 = new TimeSeriesMultitrialData.MinMaxHelp(rawData2);
double min = Math.min(minMaxHelp1.min, minMaxHelp2.min);
double max = Math.max(minMaxHelp1.max, minMaxHelp2.max);
double[] cdf1 = calculateCDF(rawData1, min, max, numBins);
double[] cdf2 = calculateCDF(rawData2, min, max, numBins);
KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
return test.kolmogorovSmirnovStatistic(cdf1, cdf2);
} catch (Exception e) {
e.printStackTrace(System.out);
return -1;
}
}
Aggregations