Search in sources :

Example 11 with Min

use of org.apache.commons.math3.stat.descriptive.rank.Min in project gatk-protected by broadinstitute.

the class RobustBrentSolver method doSolve.

@Override
protected double doSolve() throws TooManyEvaluationsException, NoBracketingException {
    final double min = getMin();
    final double max = getMax();
    final double[] xSearchGrid = createHybridSearchGrid(min, max, numBisections, depth);
    final double[] fSearchGrid = Arrays.stream(xSearchGrid).map(this::computeObjectiveValue).toArray();
    /* find bracketing intervals on the search grid */
    final List<Bracket> bracketsList = detectBrackets(xSearchGrid, fSearchGrid);
    if (bracketsList.isEmpty()) {
        throw new NoBracketingException(min, max, fSearchGrid[0], fSearchGrid[fSearchGrid.length - 1]);
    }
    final BrentSolver solver = new BrentSolver(getRelativeAccuracy(), getAbsoluteAccuracy(), getFunctionValueAccuracy());
    final List<Double> roots = bracketsList.stream().map(b -> solver.solve(getMaxEvaluations(), this::computeObjectiveValue, b.min, b.max, 0.5 * (b.min + b.max))).collect(Collectors.toList());
    if (roots.size() == 1 || meritFunc == null) {
        return roots.get(0);
    }
    final double[] merits = roots.stream().mapToDouble(meritFunc::value).toArray();
    final int bestRootIndex = IntStream.range(0, roots.size()).boxed().max((i, j) -> (int) (merits[i] - merits[j])).get();
    return roots.get(bestRootIndex);
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) FastMath(org.apache.commons.math3.util.FastMath) Collectors(java.util.stream.Collectors) BrentSolver(org.apache.commons.math3.analysis.solvers.BrentSolver) AbstractUnivariateSolver(org.apache.commons.math3.analysis.solvers.AbstractUnivariateSolver) ArrayList(java.util.ArrayList) List(java.util.List) UnivariateFunction(org.apache.commons.math3.analysis.UnivariateFunction) TooManyEvaluationsException(org.apache.commons.math3.exception.TooManyEvaluationsException) Utils(org.broadinstitute.hellbender.utils.Utils) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Nullable(javax.annotation.Nullable) NoBracketingException(org.apache.commons.math3.exception.NoBracketingException) NoBracketingException(org.apache.commons.math3.exception.NoBracketingException) BrentSolver(org.apache.commons.math3.analysis.solvers.BrentSolver)

Example 12 with Min

use of org.apache.commons.math3.stat.descriptive.rank.Min in project gatk by broadinstitute.

the class RobustBrentSolver method doSolve.

@Override
protected double doSolve() throws TooManyEvaluationsException, NoBracketingException {
    final double min = getMin();
    final double max = getMax();
    final double[] xSearchGrid = createHybridSearchGrid(min, max, numBisections, depth);
    final double[] fSearchGrid = Arrays.stream(xSearchGrid).map(this::computeObjectiveValue).toArray();
    /* find bracketing intervals on the search grid */
    final List<Bracket> bracketsList = detectBrackets(xSearchGrid, fSearchGrid);
    if (bracketsList.isEmpty()) {
        throw new NoBracketingException(min, max, fSearchGrid[0], fSearchGrid[fSearchGrid.length - 1]);
    }
    final BrentSolver solver = new BrentSolver(getRelativeAccuracy(), getAbsoluteAccuracy(), getFunctionValueAccuracy());
    final List<Double> roots = bracketsList.stream().map(b -> solver.solve(getMaxEvaluations(), this::computeObjectiveValue, b.min, b.max, 0.5 * (b.min + b.max))).collect(Collectors.toList());
    if (roots.size() == 1 || meritFunc == null) {
        return roots.get(0);
    }
    final double[] merits = roots.stream().mapToDouble(meritFunc::value).toArray();
    final int bestRootIndex = IntStream.range(0, roots.size()).boxed().max((i, j) -> (int) (merits[i] - merits[j])).get();
    return roots.get(bestRootIndex);
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) FastMath(org.apache.commons.math3.util.FastMath) Collectors(java.util.stream.Collectors) BrentSolver(org.apache.commons.math3.analysis.solvers.BrentSolver) AbstractUnivariateSolver(org.apache.commons.math3.analysis.solvers.AbstractUnivariateSolver) ArrayList(java.util.ArrayList) List(java.util.List) UnivariateFunction(org.apache.commons.math3.analysis.UnivariateFunction) TooManyEvaluationsException(org.apache.commons.math3.exception.TooManyEvaluationsException) Utils(org.broadinstitute.hellbender.utils.Utils) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Nullable(javax.annotation.Nullable) NoBracketingException(org.apache.commons.math3.exception.NoBracketingException) NoBracketingException(org.apache.commons.math3.exception.NoBracketingException) BrentSolver(org.apache.commons.math3.analysis.solvers.BrentSolver)

Example 13 with Min

use of org.apache.commons.math3.stat.descriptive.rank.Min in project gatk by broadinstitute.

the class FisherExactTest method twoSidedPValue.

/**
     * Computes the 2-sided pvalue of the Fisher's exact test on a normalized table that ensures that the sum of
     * all four entries is less than 2 * 200.
     */
public static double twoSidedPValue(final int[][] normalizedTable) {
    Utils.nonNull(normalizedTable);
    Utils.validateArg(normalizedTable.length == 2, () -> "input must be 2x2 " + Arrays.deepToString(normalizedTable));
    Utils.validateArg(normalizedTable[0] != null && normalizedTable[0].length == 2, () -> "input must be 2x2 " + Arrays.deepToString(normalizedTable));
    Utils.validateArg(normalizedTable[1] != null && normalizedTable[1].length == 2, () -> "input must be 2x2 " + Arrays.deepToString(normalizedTable));
    //Note: this implementation follows the one in R base package
    final int[][] x = normalizedTable;
    final int m = x[0][0] + x[0][1];
    final int n = x[1][0] + x[1][1];
    final int k = x[0][0] + x[1][0];
    final int lo = Math.max(0, k - n);
    final int hi = Math.min(k, m);
    final IndexRange support = new IndexRange(lo, hi + 1);
    if (support.size() <= 1) {
        //special case, support has only one value
        return 1.0;
    }
    final AbstractIntegerDistribution dist = new HypergeometricDistribution(null, m + n, m, k);
    final double[] logds = support.mapToDouble(dist::logProbability);
    final double threshold = logds[x[0][0] - lo] * REL_ERR;
    final double[] log10ds = DoubleStream.of(logds).filter(d -> d <= threshold).map(MathUtils::logToLog10).toArray();
    final double pValue = MathUtils.sumLog10(log10ds);
    // min is necessary as numerical precision can result in pValue being slightly greater than 1.0
    return Math.min(pValue, 1.0);
}
Also used : HypergeometricDistribution(org.apache.commons.math3.distribution.HypergeometricDistribution) AbstractIntegerDistribution(org.apache.commons.math3.distribution.AbstractIntegerDistribution)

Example 14 with Min

use of org.apache.commons.math3.stat.descriptive.rank.Min in project metron by apache.

the class HLLPMeasurement method main.

public static void main(String[] args) {
    Options options = new Options();
    try {
        CommandLineParser parser = new PosixParser();
        CommandLine cmd = null;
        try {
            cmd = ParserOptions.parse(parser, args);
        } catch (ParseException pe) {
            pe.printStackTrace();
            final HelpFormatter usageFormatter = new HelpFormatter();
            usageFormatter.printHelp("HLLPMeasurement", null, options, null, true);
            System.exit(-1);
        }
        if (cmd.hasOption("h")) {
            final HelpFormatter usageFormatter = new HelpFormatter();
            usageFormatter.printHelp("HLLPMeasurement", null, options, null, true);
            System.exit(0);
        }
        final String chartDelim = ParserOptions.CHART_DELIM.get(cmd, "|");
        final int numTrials = Integer.parseInt(ParserOptions.NUM_TRIALS.get(cmd, "5000"));
        final int cardMin = Integer.parseInt(ParserOptions.CARD_MIN.get(cmd, "200"));
        final int cardMax = Integer.parseInt(ParserOptions.CARD_MAX.get(cmd, "1000"));
        final int cardStep = Integer.parseInt(ParserOptions.CARD_STEP.get(cmd, "200"));
        final int cardStart = (((cardMin - 1) / cardStep) * cardStep) + cardStep;
        final int spMin = Integer.parseInt(ParserOptions.SP_MIN.get(cmd, "4"));
        final int spMax = Integer.parseInt(ParserOptions.SP_MAX.get(cmd, "32"));
        final int spStep = Integer.parseInt(ParserOptions.SP_STEP.get(cmd, "4"));
        final int pMin = Integer.parseInt(ParserOptions.P_MIN.get(cmd, "4"));
        final int pMax = Integer.parseInt(ParserOptions.P_MAX.get(cmd, "32"));
        final int pStep = Integer.parseInt(ParserOptions.P_STEP.get(cmd, "4"));
        final double errorPercentile = Double.parseDouble(ParserOptions.ERR_PERCENTILE.get(cmd, "50"));
        final double timePercentile = Double.parseDouble(ParserOptions.TIME_PERCENTILE.get(cmd, "50"));
        final double sizePercentile = Double.parseDouble(ParserOptions.SIZE_PERCENTILE.get(cmd, "50"));
        final boolean formatErrPercent = Boolean.parseBoolean(ParserOptions.ERR_FORMAT_PERCENT.get(cmd, "true"));
        final int errMultiplier = formatErrPercent ? 100 : 1;
        final Function<Double, String> errorFormatter = (v -> ERR_FORMAT.format(v * errMultiplier));
        final Function<Double, String> timeFormatter = (v -> TIME_FORMAT.format(v / NANO_TO_MILLIS));
        final Function<Double, String> sizeFormatter = (v -> SIZE_FORMAT.format(v));
        final String[] chartKey = new String[] { "card: cardinality", "sp: sparse precision value", "p: normal precision value", "err: error as a percent of the expected cardinality; ", "time: total time to add all values to the hllp estimator and calculate a cardinality estimate", "size: size of the hllp set in bytes once all values have been added for the specified cardinality", "l=low, m=mid(based on percentile chosen), h=high, std=standard deviation" };
        final String[] chartHeader = new String[] { "card", "sp", "p", "err l/m/h/std (% of actual)", "time l/m/h/std (ms)", "size l/m/h/std (b)" };
        final int[] chartPadding = new int[] { 10, 10, 10, 40, 40, 30 };
        if (spMin < pMin) {
            throw new IllegalArgumentException("p must be <= sp");
        }
        if (spMax < pMax) {
            throw new IllegalArgumentException("p must be <= sp");
        }
        println("Options Used");
        println("------------");
        println("num trials: " + numTrials);
        println("card min: " + cardMin);
        println("card max: " + cardMax);
        println("card step: " + cardStep);
        println("card start: " + cardStart);
        println("sp min: " + spMin);
        println("sp max: " + spMax);
        println("sp step: " + spStep);
        println("p min: " + pMin);
        println("p max: " + pMax);
        println("p step: " + pStep);
        println("error percentile: " + errorPercentile);
        println("time percentile: " + timePercentile);
        println("size percentile: " + sizePercentile);
        println("format err as %: " + formatErrPercent);
        println("");
        printHeading(chartKey, chartHeader, chartPadding, chartDelim);
        for (int c = cardStart; c <= cardMax; c += cardStep) {
            for (int sp = spMin; sp <= spMax; sp += spStep) {
                for (int p = pMin; p <= pMax; p += pStep) {
                    DescriptiveStatistics errorStats = new DescriptiveStatistics();
                    DescriptiveStatistics timeStats = new DescriptiveStatistics();
                    DescriptiveStatistics sizeStats = new DescriptiveStatistics();
                    for (int i = 0; i < numTrials; i++) {
                        List<Object> trialSet = buildTrialSet(c);
                        Set unique = new HashSet();
                        unique.addAll(trialSet);
                        long distinctVals = unique.size();
                        HyperLogLogPlus hllp = new HyperLogLogPlus(p, sp);
                        long timeStart = System.nanoTime();
                        hllp.addAll(trialSet);
                        long dvEstimate = hllp.cardinality();
                        long timeEnd = System.nanoTime();
                        long timeElapsed = timeEnd - timeStart;
                        double rawError = Math.abs(dvEstimate - distinctVals) / (double) distinctVals;
                        errorStats.addValue(rawError);
                        timeStats.addValue(timeElapsed);
                        sizeStats.addValue(SerDeUtils.toBytes(hllp).length);
                    }
                    MeasureResultFormatter errorRF = new MeasureResultFormatter(errorStats, errorFormatter, errorPercentile);
                    MeasureResultFormatter timeRF = new MeasureResultFormatter(timeStats, timeFormatter, timePercentile);
                    MeasureResultFormatter sizeRF = new MeasureResultFormatter(sizeStats, sizeFormatter, sizePercentile);
                    println(formatWithPadding(new String[] { "" + c, "" + sp, "" + p, errorRF.getFormattedResults(), timeRF.getFormattedResults(), sizeRF.getFormattedResults() }, chartPadding, chartDelim));
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
    }
}
Also used : DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics)

Example 15 with Min

use of org.apache.commons.math3.stat.descriptive.rank.Min in project hive by apache.

the class TestHostAffinitySplitLocationProvider method testHashDistribution.

private double testHashDistribution(int locs, final int missCount, FileSplit[] splits, AtomicInteger errorCount) {
    // This relies heavily on what method determineSplits ... calls and doesn't.
    // We could do a wrapper with only size() and get() methods instead of List, to be sure.
    @SuppressWarnings("unchecked") List<String> partLocs = (List<String>) Mockito.mock(List.class);
    Mockito.when(partLocs.size()).thenReturn(locs);
    final AtomicInteger state = new AtomicInteger(0);
    Mockito.when(partLocs.get(Mockito.anyInt())).thenAnswer(new Answer<String>() {

        @Override
        public String answer(InvocationOnMock invocation) throws Throwable {
            return (state.getAndIncrement() == missCount) ? "not-null" : null;
        }
    });
    int[] hitCounts = new int[locs];
    for (int splitIx = 0; splitIx < splits.length; ++splitIx) {
        state.set(0);
        int index = HostAffinitySplitLocationProvider.determineLocation(partLocs, splits[splitIx].getPath().toString(), splits[splitIx].getStart(), null);
        ++hitCounts[index];
    }
    SummaryStatistics ss = new SummaryStatistics();
    for (int hitCount : hitCounts) {
        ss.addValue(hitCount);
    }
    // All of this is completely bogus and mostly captures the following function:
    // f(output) = I-eyeballed-the(output) == they-look-ok.
    // It's pretty much a golden file...
    // The fact that stdev doesn't increase with increasing missCount is captured outside.
    double avg = ss.getSum() / ss.getN(), stdev = ss.getStandardDeviation(), cv = stdev / avg;
    double allowedMin = avg - 2.5 * stdev, allowedMax = avg + 2.5 * stdev;
    if (allowedMin > ss.getMin() || allowedMax < ss.getMax() || cv > 0.22) {
        LOG.info("The distribution for " + locs + " locations, " + missCount + " misses isn't to " + "our liking: avg " + avg + ", stdev " + stdev + ", cv " + cv + ", min " + ss.getMin() + ", max " + ss.getMax());
        errorCount.incrementAndGet();
    }
    return cv;
}
Also used : SummaryStatistics(org.apache.commons.math3.stat.descriptive.SummaryStatistics) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) InvocationOnMock(org.mockito.invocation.InvocationOnMock) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

ArrayList (java.util.ArrayList)16 List (java.util.List)10 DescriptiveStatistics (org.apache.commons.math3.stat.descriptive.DescriptiveStatistics)8 SummaryStatistics (org.apache.commons.math3.stat.descriptive.SummaryStatistics)7 Map (java.util.Map)6 UnivariateFunction (org.apache.commons.math3.analysis.UnivariateFunction)6 MaxEval (org.apache.commons.math3.optim.MaxEval)6 Collectors (java.util.stream.Collectors)5 ExpressionException (cbit.vcell.parser.ExpressionException)4 Plot2 (ij.gui.Plot2)4 TooManyEvaluationsException (org.apache.commons.math3.exception.TooManyEvaluationsException)4 InitialGuess (org.apache.commons.math3.optim.InitialGuess)4 PointValuePair (org.apache.commons.math3.optim.PointValuePair)4 RandomDataGenerator (org.apache.commons.math3.random.RandomDataGenerator)4 VisibleForTesting (com.google.common.annotations.VisibleForTesting)3 StoredDataStatistics (gdsc.core.utils.StoredDataStatistics)3 HashMap (java.util.HashMap)3 SimpsonIntegrator (org.apache.commons.math3.analysis.integration.SimpsonIntegrator)3 BrentOptimizer (org.apache.commons.math3.optim.univariate.BrentOptimizer)3 SearchInterval (org.apache.commons.math3.optim.univariate.SearchInterval)3