Search in sources :

Example 26 with Max

use of org.apache.commons.math3.stat.descriptive.rank.Max in project metron by apache.

the class HLLPMeasurement method main.

public static void main(String[] args) {
    Options options = new Options();
    try {
        CommandLineParser parser = new PosixParser();
        CommandLine cmd = null;
        try {
            cmd = ParserOptions.parse(parser, args);
        } catch (ParseException pe) {
            final HelpFormatter usageFormatter = new HelpFormatter();
            usageFormatter.printHelp("HLLPMeasurement", null, options, null, true);
        if (cmd.hasOption("h")) {
            final HelpFormatter usageFormatter = new HelpFormatter();
            usageFormatter.printHelp("HLLPMeasurement", null, options, null, true);
        final String chartDelim = ParserOptions.CHART_DELIM.get(cmd, "|");
        final int numTrials = Integer.parseInt(ParserOptions.NUM_TRIALS.get(cmd, "5000"));
        final int cardMin = Integer.parseInt(ParserOptions.CARD_MIN.get(cmd, "200"));
        final int cardMax = Integer.parseInt(ParserOptions.CARD_MAX.get(cmd, "1000"));
        final int cardStep = Integer.parseInt(ParserOptions.CARD_STEP.get(cmd, "200"));
        final int cardStart = (((cardMin - 1) / cardStep) * cardStep) + cardStep;
        final int spMin = Integer.parseInt(ParserOptions.SP_MIN.get(cmd, "4"));
        final int spMax = Integer.parseInt(ParserOptions.SP_MAX.get(cmd, "32"));
        final int spStep = Integer.parseInt(ParserOptions.SP_STEP.get(cmd, "4"));
        final int pMin = Integer.parseInt(ParserOptions.P_MIN.get(cmd, "4"));
        final int pMax = Integer.parseInt(ParserOptions.P_MAX.get(cmd, "32"));
        final int pStep = Integer.parseInt(ParserOptions.P_STEP.get(cmd, "4"));
        final double errorPercentile = Double.parseDouble(ParserOptions.ERR_PERCENTILE.get(cmd, "50"));
        final double timePercentile = Double.parseDouble(ParserOptions.TIME_PERCENTILE.get(cmd, "50"));
        final double sizePercentile = Double.parseDouble(ParserOptions.SIZE_PERCENTILE.get(cmd, "50"));
        final boolean formatErrPercent = Boolean.parseBoolean(ParserOptions.ERR_FORMAT_PERCENT.get(cmd, "true"));
        final int errMultiplier = formatErrPercent ? 100 : 1;
        final Function<Double, String> errorFormatter = (v -> ERR_FORMAT.format(v * errMultiplier));
        final Function<Double, String> timeFormatter = (v -> TIME_FORMAT.format(v / NANO_TO_MILLIS));
        final Function<Double, String> sizeFormatter = (v -> SIZE_FORMAT.format(v));
        final String[] chartKey = new String[] { "card: cardinality", "sp: sparse precision value", "p: normal precision value", "err: error as a percent of the expected cardinality; ", "time: total time to add all values to the hllp estimator and calculate a cardinality estimate", "size: size of the hllp set in bytes once all values have been added for the specified cardinality", "l=low, m=mid(based on percentile chosen), h=high, std=standard deviation" };
        final String[] chartHeader = new String[] { "card", "sp", "p", "err l/m/h/std (% of actual)", "time l/m/h/std (ms)", "size l/m/h/std (b)" };
        final int[] chartPadding = new int[] { 10, 10, 10, 40, 40, 30 };
        if (spMin < pMin) {
            throw new IllegalArgumentException("p must be <= sp");
        if (spMax < pMax) {
            throw new IllegalArgumentException("p must be <= sp");
        println("Options Used");
        println("num trials: " + numTrials);
        println("card min: " + cardMin);
        println("card max: " + cardMax);
        println("card step: " + cardStep);
        println("card start: " + cardStart);
        println("sp min: " + spMin);
        println("sp max: " + spMax);
        println("sp step: " + spStep);
        println("p min: " + pMin);
        println("p max: " + pMax);
        println("p step: " + pStep);
        println("error percentile: " + errorPercentile);
        println("time percentile: " + timePercentile);
        println("size percentile: " + sizePercentile);
        println("format err as %: " + formatErrPercent);
        printHeading(chartKey, chartHeader, chartPadding, chartDelim);
        for (int c = cardStart; c <= cardMax; c += cardStep) {
            for (int sp = spMin; sp <= spMax; sp += spStep) {
                for (int p = pMin; p <= pMax; p += pStep) {
                    DescriptiveStatistics errorStats = new DescriptiveStatistics();
                    DescriptiveStatistics timeStats = new DescriptiveStatistics();
                    DescriptiveStatistics sizeStats = new DescriptiveStatistics();
                    for (int i = 0; i < numTrials; i++) {
                        List<Object> trialSet = buildTrialSet(c);
                        Set unique = new HashSet();
                        long distinctVals = unique.size();
                        HyperLogLogPlus hllp = new HyperLogLogPlus(p, sp);
                        long timeStart = System.nanoTime();
                        long dvEstimate = hllp.cardinality();
                        long timeEnd = System.nanoTime();
                        long timeElapsed = timeEnd - timeStart;
                        double rawError = Math.abs(dvEstimate - distinctVals) / (double) distinctVals;
                    MeasureResultFormatter errorRF = new MeasureResultFormatter(errorStats, errorFormatter, errorPercentile);
                    MeasureResultFormatter timeRF = new MeasureResultFormatter(timeStats, timeFormatter, timePercentile);
                    MeasureResultFormatter sizeRF = new MeasureResultFormatter(sizeStats, sizeFormatter, sizePercentile);
                    println(formatWithPadding(new String[] { "" + c, "" + sp, "" + p, errorRF.getFormattedResults(), timeRF.getFormattedResults(), sizeRF.getFormattedResults() }, chartPadding, chartDelim));
    } catch (Exception e) {
Also used : DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics)

Example 27 with Max

use of org.apache.commons.math3.stat.descriptive.rank.Max in project hive by apache.

the class TestHostAffinitySplitLocationProvider method testHashDistribution.

private double testHashDistribution(int locs, final int missCount, FileSplit[] splits, AtomicInteger errorCount) {
    // This relies heavily on what method determineSplits ... calls and doesn't.
    // We could do a wrapper with only size() and get() methods instead of List, to be sure.
    @SuppressWarnings("unchecked") List<String> partLocs = (List<String>) Mockito.mock(List.class);
    final AtomicInteger state = new AtomicInteger(0);
    Mockito.when(partLocs.get(Mockito.anyInt())).thenAnswer(new Answer<String>() {

        public String answer(InvocationOnMock invocation) throws Throwable {
            return (state.getAndIncrement() == missCount) ? "not-null" : null;
    int[] hitCounts = new int[locs];
    for (int splitIx = 0; splitIx < splits.length; ++splitIx) {
        int index = HostAffinitySplitLocationProvider.determineLocation(partLocs, splits[splitIx].getPath().toString(), splits[splitIx].getStart(), null);
    SummaryStatistics ss = new SummaryStatistics();
    for (int hitCount : hitCounts) {
    // All of this is completely bogus and mostly captures the following function:
    // f(output) = I-eyeballed-the(output) == they-look-ok.
    // It's pretty much a golden file...
    // The fact that stdev doesn't increase with increasing missCount is captured outside.
    double avg = ss.getSum() / ss.getN(), stdev = ss.getStandardDeviation(), cv = stdev / avg;
    double allowedMin = avg - 2.5 * stdev, allowedMax = avg + 2.5 * stdev;
    if (allowedMin > ss.getMin() || allowedMax < ss.getMax() || cv > 0.22) {"The distribution for " + locs + " locations, " + missCount + " misses isn't to " + "our liking: avg " + avg + ", stdev " + stdev + ", cv " + cv + ", min " + ss.getMin() + ", max " + ss.getMax());
    return cv;
Also used : SummaryStatistics(org.apache.commons.math3.stat.descriptive.SummaryStatistics) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) InvocationOnMock(org.mockito.invocation.InvocationOnMock) ArrayList(java.util.ArrayList) List(java.util.List)

Example 28 with Max

use of org.apache.commons.math3.stat.descriptive.rank.Max in project vcell by virtualcell.

the class TimeSeriesMultitrialData method kolmogorovSmirnovTest.

public static double kolmogorovSmirnovTest(double[] rawData1, double[] rawData2) {
    try {
        int numBins = 1 + (int) Math.ceil(Math.sqrt(rawData1.length));
        // rawData2 = ramp(0,10,rawData2.length);
        Max max = new Max();
        Min min = new Min();
        double[] cdf1 = calculateCDF(rawData1, min.getResult(), max.getResult(), numBins);
        double[] cdf2 = calculateCDF(rawData2, min.getResult(), max.getResult(), numBins);
        KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
        return test.kolmogorovSmirnovStatistic(cdf1, cdf2);
    } catch (Exception e) {
        return -1;
Also used : Min(org.apache.commons.math3.stat.descriptive.rank.Min) Max(org.apache.commons.math3.stat.descriptive.rank.Max) KolmogorovSmirnovTest(org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest) ExpressionException(cbit.vcell.parser.ExpressionException)

Example 29 with Max

use of org.apache.commons.math3.stat.descriptive.rank.Max in project vcell by virtualcell.

the class TimeSeriesMultitrialData method chiSquaredTest.

public static double chiSquaredTest(double[] rawData1, double[] rawData2) {
    try {
        int numBins = 1 + (int) Math.ceil(Math.sqrt(rawData1.length));
        // rawData2 = ramp(0,10,rawData2.length);
        TimeSeriesMultitrialData.MinMaxHelp minMaxHelp1 = new TimeSeriesMultitrialData.MinMaxHelp(rawData1);
        TimeSeriesMultitrialData.MinMaxHelp minMaxHelp2 = new TimeSeriesMultitrialData.MinMaxHelp(rawData2);
        double min = Math.min(minMaxHelp1.min, minMaxHelp2.min);
        double max = Math.max(minMaxHelp1.max, minMaxHelp2.max);
        long[] histogram1 = calcHistogram(rawData1, min, max, numBins);
        long[] histogram2 = calcHistogram(rawData2, min, max, numBins);
        // remove histogram indices where both bins are zero
        ArrayList<Long> histogram1List = new ArrayList<Long>();
        ArrayList<Long> histogram2List = new ArrayList<Long>();
        for (int i = 0; i < histogram1.length; i++) {
            if (histogram1[i] != 0 || histogram2[i] != 0) {
            // }else{
            // histogram1List.add(new Long(1));
            // histogram2List.add(new Long(1));
        histogram1 = new long[histogram1List.size()];
        histogram2 = new long[histogram2List.size()];
        for (int i = 0; i < histogram1List.size(); i++) {
            histogram1[i] = histogram1List.get(i);
            histogram2[i] = histogram2List.get(i);
        if (histogram1.length == 1) {
            return 0.0;
        ChiSquareTest chiSquareTest = new ChiSquareTest();
        return chiSquareTest.chiSquareTestDataSetsComparison(histogram1, histogram2);
    } catch (Exception e) {
        return -1;
Also used : ArrayList(java.util.ArrayList) ChiSquareTest(org.apache.commons.math3.stat.inference.ChiSquareTest) ExpressionException(cbit.vcell.parser.ExpressionException)

Example 30 with Max

use of org.apache.commons.math3.stat.descriptive.rank.Max in project FSensor by KalebKE.

the class CalibrationUtil method getCalibration.

 * Transforms the ellipsoid into a sphere with the offset vector = [0,0,0]
 * and the radii vector = [1,1,1].
 * @param fitPoints the representation of the calibration ellipsoid
public static Calibration getCalibration(FitPoints fitPoints) {
    // The scalar values to transform the radii vector into [1,1,1]
    RealMatrix scalar = new Array2DRowRealMatrix(3, 3);
    // RIV determines the magnitude of the radii. We have to know the
    // magnitudes because the eigenvalues, and thus the radii, are returned
    // in ascending order. Without knowing the magnitudes, we wouldn't know
    // what radii to apply to what axis.
    // Find the max and minimum magnitudes.
    double max = fitPoints.riv.getEntry(0);
    double min = fitPoints.riv.getEntry(0);
    // The indexes of the maximum, median, and minimum radii.
    // Note that these are the opposite of the max and min
    // because a smaller riv value means a greater magnitude.
    int maxi = 0, midi = 0, mini = 0;
    // Find max and min radii
    for (int i = 0; i < fitPoints.riv.getDimension(); i++) {
        if (fitPoints.riv.getEntry(i) > max) {
            max = fitPoints.riv.getEntry(i);
            mini = i;
        if (fitPoints.riv.getEntry(i) < min) {
            min = fitPoints.riv.getEntry(i);
            maxi = i;
    // Find median radii
    for (int i = 0; i < fitPoints.riv.getDimension(); i++) {
        if (fitPoints.riv.getEntry(i) < max && fitPoints.riv.getEntry(i) > min) {
            midi = i;
    // Create the scalar vector in the correct orientation.
    scalar.setEntry(0, 0, 1 / fitPoints.radii.getEntry(mini));
    scalar.setEntry(1, 1, 1 / fitPoints.radii.getEntry(midi));
    scalar.setEntry(2, 2, 1 / fitPoints.radii.getEntry(maxi));
    return new Calibration(scalar,;
Also used : Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix)


ArrayList (java.util.ArrayList)26 List (java.util.List)19 Collectors ( DescriptiveStatistics (org.apache.commons.math3.stat.descriptive.DescriptiveStatistics)13 Arrays (java.util.Arrays)11 Map (java.util.Map)11 IntStream ( RandomGenerator (org.apache.commons.math3.random.RandomGenerator)10 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)9 RealMatrix (org.apache.commons.math3.linear.RealMatrix)9 Plot2 (ij.gui.Plot2)8 File ( IOException ( TooManyEvaluationsException (org.apache.commons.math3.exception.TooManyEvaluationsException)7 Test (org.testng.annotations.Test)7 StoredDataStatistics (gdsc.core.utils.StoredDataStatistics)6 Collections (java.util.Collections)6 HashMap (java.util.HashMap)6 Random (java.util.Random)6 UnivariateFunction (org.apache.commons.math3.analysis.UnivariateFunction)6