Search in sources :

Example 46 with Min

use of org.apache.commons.math3.stat.descriptive.rank.Min in project metron by apache.

the class StatisticalBinningPerformanceDriver method main.

public static void main(String... argv) {
    DescriptiveStatistics perfStats = new DescriptiveStatistics();
    OnlineStatisticsProvider statsProvider = new OnlineStatisticsProvider();
    List<Double> values = new ArrayList<>();
    GaussianRandomGenerator gaussian = new GaussianRandomGenerator(new MersenneTwister(0L));
    for (int i = 0; i < NUM_DATA_POINTS; ++i) {
        // get the data point out of the [0,1] range
        double d = 1000 * gaussian.nextNormalizedDouble();
        values.add(d);
        statsProvider.addValue(d);
    }
    for (int perfRun = 0; perfRun < NUM_RUNS; ++perfRun) {
        StellarStatisticsFunctions.StatsBin bin = new StellarStatisticsFunctions.StatsBin();
        long start = System.currentTimeMillis();
        Random r = new Random(0);
        for (int i = 0; i < TRIALS_PER_RUN; ++i) {
            // grab a random value and fuzz it a bit so we make sure there's no cheating via caching in t-digest.
            bin.apply(ImmutableList.of(statsProvider, values.get(r.nextInt(values.size())) - 3.5, PERCENTILES));
        }
        perfStats.addValue(System.currentTimeMillis() - start);
    }
    System.out.println("Min/25th/50th/75th/Max Milliseconds: " + perfStats.getMin() + " / " + perfStats.getPercentile(25) + " / " + perfStats.getPercentile(50) + " / " + perfStats.getPercentile(75) + " / " + perfStats.getMax());
}
Also used : DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) GaussianRandomGenerator(org.apache.commons.math3.random.GaussianRandomGenerator) ArrayList(java.util.ArrayList) Random(java.util.Random) MersenneTwister(org.apache.commons.math3.random.MersenneTwister)

Example 47 with Min

use of org.apache.commons.math3.stat.descriptive.rank.Min in project metron by apache.

the class StellarStatisticsFunctionsTest method run.

/**
 * Runs a Stellar expression.
 * @param expr The expression to run.
 * @param variables The variables available to the expression.
 */
private static Object run(String expr, Map<String, Object> variables) {
    StellarProcessor processor = new StellarProcessor();
    Object ret = processor.parse(expr, new DefaultVariableResolver(x -> variables.get(x), x -> variables.containsKey(x)), StellarFunctions.FUNCTION_RESOLVER(), Context.EMPTY_CONTEXT());
    byte[] raw = SerDeUtils.toBytes(ret);
    Object actual = SerDeUtils.fromBytes(raw, Object.class);
    if (ret instanceof StatisticsProvider) {
        StatisticsProvider left = (StatisticsProvider) ret;
        StatisticsProvider right = (StatisticsProvider) actual;
        // N
        tolerantAssertEquals(prov -> prov.getCount(), left, right);
        // sum
        tolerantAssertEquals(prov -> prov.getSum(), left, right, 1e-3);
        // sum of squares
        tolerantAssertEquals(prov -> prov.getSumSquares(), left, right, 1e-3);
        // sum of squares
        tolerantAssertEquals(prov -> prov.getSumLogs(), left, right, 1e-3);
        // Mean
        tolerantAssertEquals(prov -> prov.getMean(), left, right, 1e-3);
        // Quadratic Mean
        tolerantAssertEquals(prov -> prov.getQuadraticMean(), left, right, 1e-3);
        // SD
        tolerantAssertEquals(prov -> prov.getStandardDeviation(), left, right, 1e-3);
        // Variance
        tolerantAssertEquals(prov -> prov.getVariance(), left, right, 1e-3);
        // Min
        tolerantAssertEquals(prov -> prov.getMin(), left, right, 1e-3);
        // Max
        tolerantAssertEquals(prov -> prov.getMax(), left, right, 1e-3);
        // Kurtosis
        tolerantAssertEquals(prov -> prov.getKurtosis(), left, right, 1e-3);
        // Skewness
        tolerantAssertEquals(prov -> prov.getSkewness(), left, right, 1e-3);
        for (double d = 10.0; d < 100.0; d += 10) {
            final double pctile = d;
            // This is a sketch, so we're a bit more forgiving here in our choice of \epsilon.
            tolerantAssertEquals(prov -> prov.getPercentile(pctile), left, right, 1e-2);
        }
    }
    return ret;
}
Also used : StellarProcessor(org.apache.metron.stellar.common.StellarProcessor) java.util(java.util) Assert.assertNotNull(org.junit.Assert.assertNotNull) SerDeUtils(org.apache.metron.common.utils.SerDeUtils) StellarProcessor(org.apache.metron.stellar.common.StellarProcessor) RunWith(org.junit.runner.RunWith) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) GaussianRandomGenerator(org.apache.commons.math3.random.GaussianRandomGenerator) DefaultVariableResolver(org.apache.metron.stellar.dsl.DefaultVariableResolver) Function(java.util.function.Function) String.format(java.lang.String.format) SummaryStatistics(org.apache.commons.math3.stat.descriptive.SummaryStatistics) ImmutableList(com.google.common.collect.ImmutableList) MersenneTwister(org.apache.commons.math3.random.MersenneTwister) DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) StellarFunctions(org.apache.metron.stellar.dsl.StellarFunctions) Assert(org.junit.Assert) Parameterized(org.junit.runners.Parameterized) Assert.assertEquals(org.junit.Assert.assertEquals) Joiner(com.google.common.base.Joiner) Context(org.apache.metron.stellar.dsl.Context) Before(org.junit.Before) DefaultVariableResolver(org.apache.metron.stellar.dsl.DefaultVariableResolver)

Example 48 with Min

use of org.apache.commons.math3.stat.descriptive.rank.Min in project bayou by capergroup.

the class MetricCalculator method execute.

public void execute() throws IOException {
    if (cmdLine == null)
        return;
    int topk = cmdLine.hasOption("t") ? Integer.parseInt(cmdLine.getOptionValue("t")) : 10;
    Metric metric;
    String m = cmdLine.getOptionValue("m");
    switch(m) {
        case "equality-ast":
            metric = new EqualityASTMetric();
            break;
        case "jaccard-sequences":
            metric = new JaccardSequencesMetric();
            break;
        case "jaccard-api-calls":
            metric = new JaccardAPICallsMetric();
            break;
        case "num-control-structures":
            metric = new NumControlStructuresMetric();
            break;
        case "num-statements":
            metric = new NumStatementsMetric();
            break;
        default:
            System.err.println("invalid metric: " + cmdLine.getOptionValue("m"));
            return;
    }
    int inCorpus = cmdLine.hasOption("c") ? Integer.parseInt(cmdLine.getOptionValue("c")) : 1;
    String aggregate = cmdLine.hasOption("a") ? cmdLine.getOptionValue("a") : "min";
    List<JSONInputFormat.DataPoint> data = JSONInputFormat.readData(cmdLine.getOptionValue("f"));
    if (inCorpus == 2)
        data = data.stream().filter(datapoint -> datapoint.in_corpus).collect(Collectors.toList());
    else if (inCorpus == 3)
        data = data.stream().filter(datapoint -> !datapoint.in_corpus).collect(Collectors.toList());
    List<Float> values = new ArrayList<>();
    for (JSONInputFormat.DataPoint datapoint : data) {
        DSubTree originalAST = datapoint.ast;
        List<DSubTree> predictedASTs = datapoint.out_asts.subList(0, Math.min(topk, datapoint.out_asts.size()));
        values.add(metric.compute(originalAST, predictedASTs, aggregate));
    }
    List<Float> values2 = new ArrayList<>();
    if (cmdLine.hasOption("p")) {
        List<JSONInputFormat.DataPoint> data2 = JSONInputFormat.readData(cmdLine.getOptionValue("p"));
        if (inCorpus == 2)
            data2 = data2.stream().filter(datapoint -> datapoint.in_corpus).collect(Collectors.toList());
        else if (inCorpus == 3)
            data2 = data2.stream().filter(datapoint -> !datapoint.in_corpus).collect(Collectors.toList());
        for (JSONInputFormat.DataPoint datapoint : data2) {
            DSubTree originalAST = datapoint.ast;
            List<DSubTree> predictedASTs = datapoint.out_asts.subList(0, Math.min(topk, datapoint.out_asts.size()));
            values2.add(metric.compute(originalAST, predictedASTs, aggregate));
        }
        if (values.size() != values2.size())
            throw new Error("DATA files do not match in size. Cannot compute p-value.");
    }
    float average = Metric.mean(values);
    float stdv = Metric.standardDeviation(values);
    if (cmdLine.hasOption("p")) {
        double[] dValues = values.stream().mapToDouble(v -> v.floatValue()).toArray();
        double[] dValues2 = values2.stream().mapToDouble(v -> v.floatValue()).toArray();
        double pValue = new TTest().pairedTTest(dValues, dValues2);
        System.out.println(String.format("%s (%d data points, each aggregated with %s): average=%f, stdv=%f, pvalue=%e", m, data.size(), aggregate, average, stdv, pValue));
    } else
        System.out.println(String.format("%s (%d data points, each aggregated with %s): average=%f, stdv=%f", m, data.size(), aggregate, average, stdv));
}
Also used : List(java.util.List) edu.rice.cs.caper.bayou.core.dsl(edu.rice.cs.caper.bayou.core.dsl) Metric(edu.rice.cs.caper.bayou.core.sketch_metric.Metric) org.apache.commons.cli(org.apache.commons.cli) TTest(org.apache.commons.math3.stat.inference.TTest) IOException(java.io.IOException) EqualityASTMetric(edu.rice.cs.caper.bayou.core.sketch_metric.EqualityASTMetric) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) TTest(org.apache.commons.math3.stat.inference.TTest) ArrayList(java.util.ArrayList) EqualityASTMetric(edu.rice.cs.caper.bayou.core.sketch_metric.EqualityASTMetric) Metric(edu.rice.cs.caper.bayou.core.sketch_metric.Metric) EqualityASTMetric(edu.rice.cs.caper.bayou.core.sketch_metric.EqualityASTMetric)

Example 49 with Min

use of org.apache.commons.math3.stat.descriptive.rank.Min in project vcell by virtualcell.

the class TimeSeriesMultitrialData method chiSquaredTest.

public static double chiSquaredTest(double[] rawData1, double[] rawData2) {
    try {
        int numBins = 1 + (int) Math.ceil(Math.sqrt(rawData1.length));
        // rawData2 = ramp(0,10,rawData2.length);
        Max max = new Max();
        max.incrementAll(rawData1);
        max.incrementAll(rawData2);
        Min min = new Min();
        min.incrementAll(rawData1);
        min.incrementAll(rawData2);
        long[] histogram1 = calcHistogram(rawData1, min.getResult(), max.getResult(), numBins);
        long[] histogram2 = calcHistogram(rawData2, min.getResult(), max.getResult(), numBins);
        // 
        // remove histogram indices where both bins are zero
        // 
        ArrayList<Long> histogram1List = new ArrayList<Long>();
        ArrayList<Long> histogram2List = new ArrayList<Long>();
        for (int i = 0; i < histogram1.length; i++) {
            if (histogram1[i] != 0 || histogram2[i] != 0) {
                histogram1List.add(histogram1[i]);
                histogram2List.add(histogram2[i]);
            // }else{
            // histogram1List.add(new Long(1));
            // histogram2List.add(new Long(1));
            }
        }
        histogram1 = new long[histogram1List.size()];
        histogram2 = new long[histogram2List.size()];
        for (int i = 0; i < histogram1List.size(); i++) {
            histogram1[i] = histogram1List.get(i);
            histogram2[i] = histogram2List.get(i);
        }
        if (histogram1.length == 1) {
            return 0.0;
        }
        ChiSquareTest chiSquareTest = new ChiSquareTest();
        return chiSquareTest.chiSquareTestDataSetsComparison(histogram1, histogram2);
    } catch (Exception e) {
        e.printStackTrace(System.out);
        return -1;
    }
}
Also used : Min(org.apache.commons.math3.stat.descriptive.rank.Min) Max(org.apache.commons.math3.stat.descriptive.rank.Max) ArrayList(java.util.ArrayList) ChiSquareTest(org.apache.commons.math3.stat.inference.ChiSquareTest) ExpressionException(cbit.vcell.parser.ExpressionException)

Example 50 with Min

use of org.apache.commons.math3.stat.descriptive.rank.Min in project vcell by virtualcell.

the class TimeSeriesMultitrialData method kolmogorovSmirnovTest.

public static double kolmogorovSmirnovTest(double[] rawData1, double[] rawData2) {
    try {
        int numBins = 1 + (int) Math.ceil(Math.sqrt(rawData1.length));
        // rawData2 = ramp(0,10,rawData2.length);
        TimeSeriesMultitrialData.MinMaxHelp minMaxHelp1 = new TimeSeriesMultitrialData.MinMaxHelp(rawData1);
        TimeSeriesMultitrialData.MinMaxHelp minMaxHelp2 = new TimeSeriesMultitrialData.MinMaxHelp(rawData2);
        double min = Math.min(minMaxHelp1.min, minMaxHelp2.min);
        double max = Math.max(minMaxHelp1.max, minMaxHelp2.max);
        double[] cdf1 = calculateCDF(rawData1, min, max, numBins);
        double[] cdf2 = calculateCDF(rawData2, min, max, numBins);
        KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
        return test.kolmogorovSmirnovStatistic(cdf1, cdf2);
    } catch (Exception e) {
        e.printStackTrace(System.out);
        return -1;
    }
}
Also used : KolmogorovSmirnovTest(org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest) ExpressionException(cbit.vcell.parser.ExpressionException)

Aggregations

ArrayList (java.util.ArrayList)16 List (java.util.List)10 DescriptiveStatistics (org.apache.commons.math3.stat.descriptive.DescriptiveStatistics)8 SummaryStatistics (org.apache.commons.math3.stat.descriptive.SummaryStatistics)7 Map (java.util.Map)6 UnivariateFunction (org.apache.commons.math3.analysis.UnivariateFunction)6 MaxEval (org.apache.commons.math3.optim.MaxEval)6 Collectors (java.util.stream.Collectors)5 ExpressionException (cbit.vcell.parser.ExpressionException)4 Plot2 (ij.gui.Plot2)4 TooManyEvaluationsException (org.apache.commons.math3.exception.TooManyEvaluationsException)4 InitialGuess (org.apache.commons.math3.optim.InitialGuess)4 PointValuePair (org.apache.commons.math3.optim.PointValuePair)4 RandomDataGenerator (org.apache.commons.math3.random.RandomDataGenerator)4 VisibleForTesting (com.google.common.annotations.VisibleForTesting)3 StoredDataStatistics (gdsc.core.utils.StoredDataStatistics)3 HashMap (java.util.HashMap)3 SimpsonIntegrator (org.apache.commons.math3.analysis.integration.SimpsonIntegrator)3 BrentOptimizer (org.apache.commons.math3.optim.univariate.BrentOptimizer)3 SearchInterval (org.apache.commons.math3.optim.univariate.SearchInterval)3