Search in sources :

Example 1 with TTest

use of org.apache.commons.math3.stat.inference.TTest in project GDSC-SMLM by aherbert.

the class BaseFunctionSolverTest method canFitSingleGaussianBetter.

void canFitSingleGaussianBetter(FunctionSolver solver, boolean applyBounds, FunctionSolver solver2, boolean applyBounds2, String name, String name2, NoiseModel noiseModel) {
    double[] noise = getNoise(noiseModel);
    if (solver.isWeighted())
        solver.setWeights(getWeights(noiseModel));
    int LOOPS = 5;
    randomGenerator.setSeed(seed);
    StoredDataStatistics[] stats = new StoredDataStatistics[6];
    String[] statName = { "Signal", "X", "Y" };
    int[] betterPrecision = new int[3];
    int[] totalPrecision = new int[3];
    int[] betterAccuracy = new int[3];
    int[] totalAccuracy = new int[3];
    int i1 = 0, i2 = 0;
    for (double s : signal) {
        double[] expected = createParams(1, s, 0, 0, 1);
        double[] lower = null, upper = null;
        if (applyBounds || applyBounds2) {
            lower = createParams(0, s * 0.5, -0.2, -0.2, 0.8);
            upper = createParams(3, s * 2, 0.2, 0.2, 1.2);
        }
        if (applyBounds)
            solver.setBounds(lower, upper);
        if (applyBounds2)
            solver2.setBounds(lower, upper);
        for (int loop = LOOPS; loop-- > 0; ) {
            double[] data = drawGaussian(expected, noise, noiseModel);
            for (int i = 0; i < stats.length; i++) stats[i] = new StoredDataStatistics();
            for (double db : base) for (double dx : shift) for (double dy : shift) for (double dsx : factor) {
                double[] p = createParams(db, s, dx, dy, dsx);
                double[] fp = fitGaussian(solver, data, p, expected);
                i1 += solver.getEvaluations();
                double[] fp2 = fitGaussian(solver2, data, p, expected);
                i2 += solver2.getEvaluations();
                // Get the mean and sd (the fit precision)
                compare(fp, expected, fp2, expected, Gaussian2DFunction.SIGNAL, stats[0], stats[1]);
                compare(fp, expected, fp2, expected, Gaussian2DFunction.X_POSITION, stats[2], stats[3]);
                compare(fp, expected, fp2, expected, Gaussian2DFunction.Y_POSITION, stats[4], stats[5]);
            // Use the distance
            //stats[2].add(distance(fp, expected));
            //stats[3].add(distance(fp2, expected2));
            }
            // two sided
            double alpha = 0.05;
            for (int i = 0; i < stats.length; i += 2) {
                double u1 = stats[i].getMean();
                double u2 = stats[i + 1].getMean();
                double sd1 = stats[i].getStandardDeviation();
                double sd2 = stats[i + 1].getStandardDeviation();
                TTest tt = new TTest();
                boolean diff = tt.tTest(stats[i].getValues(), stats[i + 1].getValues(), alpha);
                int index = i / 2;
                String msg = String.format("%s vs %s : %.1f (%s) %s %f +/- %f vs %f +/- %f  (N=%d) %b", name2, name, s, noiseModel, statName[index], u2, sd2, u1, sd1, stats[i].getN(), diff);
                if (diff) {
                    // Different means. Check they are roughly the same
                    if (DoubleEquality.almostEqualRelativeOrAbsolute(u1, u2, 0.1, 0)) {
                        // Basically the same. Check which is more precise
                        if (!DoubleEquality.almostEqualRelativeOrAbsolute(sd1, sd2, 0.05, 0)) {
                            if (sd2 < sd1) {
                                betterPrecision[index]++;
                                println(msg + " P*");
                            } else
                                println(msg + " P");
                            totalPrecision[index]++;
                        }
                    } else {
                        // Check which is more accurate (closer to zero)
                        u1 = Math.abs(u1);
                        u2 = Math.abs(u2);
                        if (u2 < u1) {
                            betterAccuracy[index]++;
                            println(msg + " A*");
                        } else
                            println(msg + " A");
                        totalAccuracy[index]++;
                    }
                } else {
                    // The same means. Check that it is more precise
                    if (!DoubleEquality.almostEqualRelativeOrAbsolute(sd1, sd2, 0.05, 0)) {
                        if (sd2 < sd1) {
                            betterPrecision[index]++;
                            println(msg + " P*");
                        } else
                            println(msg + " P");
                        totalPrecision[index]++;
                    }
                }
            }
        }
    }
    int better = 0, total = 0;
    for (int index = 0; index < statName.length; index++) {
        better += betterPrecision[index] + betterAccuracy[index];
        total += totalPrecision[index] + totalAccuracy[index];
        test(name2, name, statName[index] + " P", betterPrecision[index], totalPrecision[index], printBetterDetails);
        test(name2, name, statName[index] + " A", betterAccuracy[index], totalAccuracy[index], printBetterDetails);
    }
    test(name2, name, String.format("All (eval [%d] [%d]) : ", i2, i1), better, total, true);
}
Also used : TTest(org.apache.commons.math3.stat.inference.TTest) StoredDataStatistics(gdsc.core.utils.StoredDataStatistics)

Example 2 with TTest

use of org.apache.commons.math3.stat.inference.TTest in project sketches-pig by DataSketches.

the class ArrayOfDoublesSketchesToPValueEstimatesTest method largeDataSet.

/**
 * Check p-value for a large data set.
 * @throws Exception
 */
@Test
public void largeDataSet() throws Exception {
    EvalFunc<Tuple> func = new ArrayOfDoublesSketchesToPValueEstimates();
    // Create the two sketches
    ArrayOfDoublesUpdatableSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(1).setNominalEntries(16000).build();
    ArrayOfDoublesUpdatableSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(1).setNominalEntries(16000).build();
    // Number of values to use.
    int n = 100000;
    int bShift = 1000;
    double[] a = new double[n];
    double[] b = new double[n];
    // Random number generator
    Random rand = new Random(41L);
    // Add values to A sketch
    for (int i = 0; i < n; i++) {
        double val = rand.nextGaussian();
        sketchA.update(i, new double[] { val });
        a[i] = val;
    }
    // Add values to B sketch
    for (int i = 0; i < n; i++) {
        double val = rand.nextGaussian() + bShift;
        sketchB.update(i, new double[] { val });
        b[i] = val;
    }
    TTest tTest = new TTest();
    double expectedPValue = tTest.tTest(a, b);
    // Convert to a tuple and execute the UDF
    Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketchA.compact().toByteArray()), new DataByteArray(sketchB.compact().toByteArray()));
    Tuple resultTuple = func.exec(inputTuple);
    // Should get 1 p-value back
    Assert.assertNotNull(resultTuple);
    Assert.assertEquals(resultTuple.size(), 1);
    // Check p-value values, with a delta
    Assert.assertEquals((double) resultTuple.get(0), expectedPValue, 0.01);
}
Also used : TTest(org.apache.commons.math3.stat.inference.TTest) ArrayOfDoublesUpdatableSketchBuilder(com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder) ArrayOfDoublesUpdatableSketch(com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketch) Random(java.util.Random) DataByteArray(org.apache.pig.data.DataByteArray) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test) TTest(org.apache.commons.math3.stat.inference.TTest)

Example 3 with TTest

use of org.apache.commons.math3.stat.inference.TTest in project sketches-pig by DataSketches.

the class ArrayOfDoublesSketchesToPValueEstimates method exec.

@Override
public Tuple exec(final Tuple input) throws IOException {
    if ((input == null) || (input.size() != 2)) {
        return null;
    }
    // Get the two sketches
    final DataByteArray dbaA = (DataByteArray) input.get(0);
    final DataByteArray dbaB = (DataByteArray) input.get(1);
    final ArrayOfDoublesSketch sketchA = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dbaA.get()));
    final ArrayOfDoublesSketch sketchB = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dbaB.get()));
    // Check that the size of the arrays in the sketches are the same
    if (sketchA.getNumValues() != sketchB.getNumValues()) {
        throw new IllegalArgumentException("Both sketches must have the same number of values");
    }
    // Store the number of metrics
    final int numMetrics = sketchA.getNumValues();
    // If the sketches contain fewer than 2 values, the p-value can't be calculated
    if (sketchA.getRetainedEntries() < 2 || sketchB.getRetainedEntries() < 2) {
        return null;
    }
    // Get the statistical summary from each sketch
    final SummaryStatistics[] summariesA = ArrayOfDoublesSketchStats.sketchToSummaryStatistics(sketchA);
    final SummaryStatistics[] summariesB = ArrayOfDoublesSketchStats.sketchToSummaryStatistics(sketchB);
    // Calculate the p-values
    final TTest tTest = new TTest();
    final Tuple pValues = TupleFactory.getInstance().newTuple(numMetrics);
    for (int i = 0; i < numMetrics; i++) {
        // Pass the sampled values for each metric
        pValues.set(i, tTest.tTest(summariesA[i], summariesB[i]));
    }
    return pValues;
}
Also used : TTest(org.apache.commons.math3.stat.inference.TTest) SummaryStatistics(org.apache.commons.math3.stat.descriptive.SummaryStatistics) DataByteArray(org.apache.pig.data.DataByteArray) ArrayOfDoublesSketch(com.yahoo.sketches.tuple.ArrayOfDoublesSketch) Tuple(org.apache.pig.data.Tuple)

Example 4 with TTest

use of org.apache.commons.math3.stat.inference.TTest in project druid by druid-io.

the class ArrayOfDoublesSketchTTestPostAggregator method compute.

@Override
public double[] compute(final Map<String, Object> combinedAggregators) {
    final ArrayOfDoublesSketch sketch1 = (ArrayOfDoublesSketch) getFields().get(0).compute(combinedAggregators);
    final ArrayOfDoublesSketch sketch2 = (ArrayOfDoublesSketch) getFields().get(1).compute(combinedAggregators);
    if (sketch1.getNumValues() != sketch2.getNumValues()) {
        throw new IAE("Sketches have different number of values: %d and %d", sketch1.getNumValues(), sketch2.getNumValues());
    }
    final SummaryStatistics[] stats1 = getStats(sketch1);
    final SummaryStatistics[] stats2 = getStats(sketch2);
    final int numberOfValues = sketch1.getNumValues();
    final double[] pValues = new double[numberOfValues];
    final TTest test = new TTest();
    for (int i = 0; i < pValues.length; i++) {
        pValues[i] = test.tTest(stats1[i], stats2[i]);
    }
    return pValues;
}
Also used : TTest(org.apache.commons.math3.stat.inference.TTest) SummaryStatistics(org.apache.commons.math3.stat.descriptive.SummaryStatistics) IAE(org.apache.druid.java.util.common.IAE) ArrayOfDoublesSketch(org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch)

Example 5 with TTest

use of org.apache.commons.math3.stat.inference.TTest in project vcell by virtualcell.

the class TimeSeriesMultitrialData method statisticsSummary.

public static SummaryStatistics statisticsSummary(TimeSeriesMultitrialData data1, TimeSeriesMultitrialData data2) {
    SummaryStatistics ss = new SummaryStatistics();
    // ss.maxTTest = Double.NEGATIVE_INFINITY;
    // ss.maxDiffMeans = Double.NEGATIVE_INFINITY;
    // ss.maxChiSquare = Double.NEGATIVE_INFINITY;
    // ss.maxKolmogorovSmirnov = Double.NEGATIVE_INFINITY;
    ss.size1 = data1.numTrials;
    ss.size2 = data2.numTrials;
    ss.smallestPValue = 1.0;
    ss.timeSmallestPValue = -1.0;
    for (int varIndex = 0; varIndex < data1.varNames.length; varIndex++) {
        String varName = data1.varNames[varIndex];
        // StochtestFileUtils.MinMaxHelp minmaxStoch = new StochtestFileUtils.MinMaxHelp(trajectory1);
        for (int timeIndex = 0; timeIndex < data1.times.length; timeIndex++) {
            // double diffMeans = Math.abs((trajectory1[timeIndex]/minmaxStoch.diff)-(trajectory2[timeIndex]/minmaxStoch.diff));
            double[] varTimeData1 = data1.getVarTimeData(varName, timeIndex);
            double[] varTimeData2 = data2.getVarTimeData(varName, timeIndex);
            TTest ttest = new TTest();
            ss.numExperiments++;
            double pValue = ttest.tTest(varTimeData1, varTimeData2);
            if (pValue < 0.05) {
                ss.numFail_95++;
            }
            if (pValue < 0.01) {
                ss.numFail_99++;
            }
            if (pValue < 0.001) {
                ss.numFail_999++;
            }
            if (pValue < ss.smallestPValue) {
                ss.smallestPValue = pValue;
                ss.varSmallestPValue = varName;
                ss.timeSmallestPValue = data1.times[timeIndex];
            }
        // double chiSquared = TimeSeriesMultitrialData.chiSquaredTest(varTimeData1, varTimeData2);
        // double ks = TimeSeriesMultitrialData.kolmogorovSmirnovTest(varTimeData1, varTimeData2);
        // ss.maxTTest = Math.max(ss.maxTTest,ttest_p);
        // ss.maxDiffMeans = Math.max(ss.maxDiffMeans,diffMeans);
        // ss.maxChiSquare = Math.max(ss.maxChiSquare,chiSquared);
        // ss.maxKolmogorovSmirnov = Math.max(ss.maxKolmogorovSmirnov,ks);
        }
    }
    return ss;
}
Also used : TTest(org.apache.commons.math3.stat.inference.TTest)

Aggregations

TTest (org.apache.commons.math3.stat.inference.TTest)7 SummaryStatistics (org.apache.commons.math3.stat.descriptive.SummaryStatistics)2 DataByteArray (org.apache.pig.data.DataByteArray)2 Tuple (org.apache.pig.data.Tuple)2 ArrayOfDoublesSketch (com.yahoo.sketches.tuple.ArrayOfDoublesSketch)1 ArrayOfDoublesUpdatableSketch (com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketch)1 ArrayOfDoublesUpdatableSketchBuilder (com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder)1 edu.rice.cs.caper.bayou.core.dsl (edu.rice.cs.caper.bayou.core.dsl)1 EqualityASTMetric (edu.rice.cs.caper.bayou.core.sketch_metric.EqualityASTMetric)1 Metric (edu.rice.cs.caper.bayou.core.sketch_metric.Metric)1 StoredDataStatistics (gdsc.core.utils.StoredDataStatistics)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 Random (java.util.Random)1 Collectors (java.util.stream.Collectors)1 org.apache.commons.cli (org.apache.commons.cli)1 UniformRandomProvider (org.apache.commons.rng.UniformRandomProvider)1 ArrayOfDoublesSketch (org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch)1 IAE (org.apache.druid.java.util.common.IAE)1