use of org.apache.commons.math3.stat.descriptive.rank.Min in project gatk-protected by broadinstitute.
the class RobustBrentSolver method doSolve.
@Override
protected double doSolve() throws TooManyEvaluationsException, NoBracketingException {
final double min = getMin();
final double max = getMax();
final double[] xSearchGrid = createHybridSearchGrid(min, max, numBisections, depth);
final double[] fSearchGrid = Arrays.stream(xSearchGrid).map(this::computeObjectiveValue).toArray();
/* find bracketing intervals on the search grid */
final List<Bracket> bracketsList = detectBrackets(xSearchGrid, fSearchGrid);
if (bracketsList.isEmpty()) {
throw new NoBracketingException(min, max, fSearchGrid[0], fSearchGrid[fSearchGrid.length - 1]);
}
final BrentSolver solver = new BrentSolver(getRelativeAccuracy(), getAbsoluteAccuracy(), getFunctionValueAccuracy());
final List<Double> roots = bracketsList.stream().map(b -> solver.solve(getMaxEvaluations(), this::computeObjectiveValue, b.min, b.max, 0.5 * (b.min + b.max))).collect(Collectors.toList());
if (roots.size() == 1 || meritFunc == null) {
return roots.get(0);
}
final double[] merits = roots.stream().mapToDouble(meritFunc::value).toArray();
final int bestRootIndex = IntStream.range(0, roots.size()).boxed().max((i, j) -> (int) (merits[i] - merits[j])).get();
return roots.get(bestRootIndex);
}
use of org.apache.commons.math3.stat.descriptive.rank.Min in project gatk by broadinstitute.
the class RobustBrentSolver method doSolve.
@Override
protected double doSolve() throws TooManyEvaluationsException, NoBracketingException {
final double min = getMin();
final double max = getMax();
final double[] xSearchGrid = createHybridSearchGrid(min, max, numBisections, depth);
final double[] fSearchGrid = Arrays.stream(xSearchGrid).map(this::computeObjectiveValue).toArray();
/* find bracketing intervals on the search grid */
final List<Bracket> bracketsList = detectBrackets(xSearchGrid, fSearchGrid);
if (bracketsList.isEmpty()) {
throw new NoBracketingException(min, max, fSearchGrid[0], fSearchGrid[fSearchGrid.length - 1]);
}
final BrentSolver solver = new BrentSolver(getRelativeAccuracy(), getAbsoluteAccuracy(), getFunctionValueAccuracy());
final List<Double> roots = bracketsList.stream().map(b -> solver.solve(getMaxEvaluations(), this::computeObjectiveValue, b.min, b.max, 0.5 * (b.min + b.max))).collect(Collectors.toList());
if (roots.size() == 1 || meritFunc == null) {
return roots.get(0);
}
final double[] merits = roots.stream().mapToDouble(meritFunc::value).toArray();
final int bestRootIndex = IntStream.range(0, roots.size()).boxed().max((i, j) -> (int) (merits[i] - merits[j])).get();
return roots.get(bestRootIndex);
}
use of org.apache.commons.math3.stat.descriptive.rank.Min in project gatk by broadinstitute.
the class FisherExactTest method twoSidedPValue.
/**
* Computes the 2-sided pvalue of the Fisher's exact test on a normalized table that ensures that the sum of
* all four entries is less than 2 * 200.
*/
public static double twoSidedPValue(final int[][] normalizedTable) {
Utils.nonNull(normalizedTable);
Utils.validateArg(normalizedTable.length == 2, () -> "input must be 2x2 " + Arrays.deepToString(normalizedTable));
Utils.validateArg(normalizedTable[0] != null && normalizedTable[0].length == 2, () -> "input must be 2x2 " + Arrays.deepToString(normalizedTable));
Utils.validateArg(normalizedTable[1] != null && normalizedTable[1].length == 2, () -> "input must be 2x2 " + Arrays.deepToString(normalizedTable));
//Note: this implementation follows the one in R base package
final int[][] x = normalizedTable;
final int m = x[0][0] + x[0][1];
final int n = x[1][0] + x[1][1];
final int k = x[0][0] + x[1][0];
final int lo = Math.max(0, k - n);
final int hi = Math.min(k, m);
final IndexRange support = new IndexRange(lo, hi + 1);
if (support.size() <= 1) {
//special case, support has only one value
return 1.0;
}
final AbstractIntegerDistribution dist = new HypergeometricDistribution(null, m + n, m, k);
final double[] logds = support.mapToDouble(dist::logProbability);
final double threshold = logds[x[0][0] - lo] * REL_ERR;
final double[] log10ds = DoubleStream.of(logds).filter(d -> d <= threshold).map(MathUtils::logToLog10).toArray();
final double pValue = MathUtils.sumLog10(log10ds);
// min is necessary as numerical precision can result in pValue being slightly greater than 1.0
return Math.min(pValue, 1.0);
}
use of org.apache.commons.math3.stat.descriptive.rank.Min in project metron by apache.
the class HLLPMeasurement method main.
public static void main(String[] args) {
Options options = new Options();
try {
CommandLineParser parser = new PosixParser();
CommandLine cmd = null;
try {
cmd = ParserOptions.parse(parser, args);
} catch (ParseException pe) {
pe.printStackTrace();
final HelpFormatter usageFormatter = new HelpFormatter();
usageFormatter.printHelp("HLLPMeasurement", null, options, null, true);
System.exit(-1);
}
if (cmd.hasOption("h")) {
final HelpFormatter usageFormatter = new HelpFormatter();
usageFormatter.printHelp("HLLPMeasurement", null, options, null, true);
System.exit(0);
}
final String chartDelim = ParserOptions.CHART_DELIM.get(cmd, "|");
final int numTrials = Integer.parseInt(ParserOptions.NUM_TRIALS.get(cmd, "5000"));
final int cardMin = Integer.parseInt(ParserOptions.CARD_MIN.get(cmd, "200"));
final int cardMax = Integer.parseInt(ParserOptions.CARD_MAX.get(cmd, "1000"));
final int cardStep = Integer.parseInt(ParserOptions.CARD_STEP.get(cmd, "200"));
final int cardStart = (((cardMin - 1) / cardStep) * cardStep) + cardStep;
final int spMin = Integer.parseInt(ParserOptions.SP_MIN.get(cmd, "4"));
final int spMax = Integer.parseInt(ParserOptions.SP_MAX.get(cmd, "32"));
final int spStep = Integer.parseInt(ParserOptions.SP_STEP.get(cmd, "4"));
final int pMin = Integer.parseInt(ParserOptions.P_MIN.get(cmd, "4"));
final int pMax = Integer.parseInt(ParserOptions.P_MAX.get(cmd, "32"));
final int pStep = Integer.parseInt(ParserOptions.P_STEP.get(cmd, "4"));
final double errorPercentile = Double.parseDouble(ParserOptions.ERR_PERCENTILE.get(cmd, "50"));
final double timePercentile = Double.parseDouble(ParserOptions.TIME_PERCENTILE.get(cmd, "50"));
final double sizePercentile = Double.parseDouble(ParserOptions.SIZE_PERCENTILE.get(cmd, "50"));
final boolean formatErrPercent = Boolean.parseBoolean(ParserOptions.ERR_FORMAT_PERCENT.get(cmd, "true"));
final int errMultiplier = formatErrPercent ? 100 : 1;
final Function<Double, String> errorFormatter = (v -> ERR_FORMAT.format(v * errMultiplier));
final Function<Double, String> timeFormatter = (v -> TIME_FORMAT.format(v / NANO_TO_MILLIS));
final Function<Double, String> sizeFormatter = (v -> SIZE_FORMAT.format(v));
final String[] chartKey = new String[] { "card: cardinality", "sp: sparse precision value", "p: normal precision value", "err: error as a percent of the expected cardinality; ", "time: total time to add all values to the hllp estimator and calculate a cardinality estimate", "size: size of the hllp set in bytes once all values have been added for the specified cardinality", "l=low, m=mid(based on percentile chosen), h=high, std=standard deviation" };
final String[] chartHeader = new String[] { "card", "sp", "p", "err l/m/h/std (% of actual)", "time l/m/h/std (ms)", "size l/m/h/std (b)" };
final int[] chartPadding = new int[] { 10, 10, 10, 40, 40, 30 };
if (spMin < pMin) {
throw new IllegalArgumentException("p must be <= sp");
}
if (spMax < pMax) {
throw new IllegalArgumentException("p must be <= sp");
}
println("Options Used");
println("------------");
println("num trials: " + numTrials);
println("card min: " + cardMin);
println("card max: " + cardMax);
println("card step: " + cardStep);
println("card start: " + cardStart);
println("sp min: " + spMin);
println("sp max: " + spMax);
println("sp step: " + spStep);
println("p min: " + pMin);
println("p max: " + pMax);
println("p step: " + pStep);
println("error percentile: " + errorPercentile);
println("time percentile: " + timePercentile);
println("size percentile: " + sizePercentile);
println("format err as %: " + formatErrPercent);
println("");
printHeading(chartKey, chartHeader, chartPadding, chartDelim);
for (int c = cardStart; c <= cardMax; c += cardStep) {
for (int sp = spMin; sp <= spMax; sp += spStep) {
for (int p = pMin; p <= pMax; p += pStep) {
DescriptiveStatistics errorStats = new DescriptiveStatistics();
DescriptiveStatistics timeStats = new DescriptiveStatistics();
DescriptiveStatistics sizeStats = new DescriptiveStatistics();
for (int i = 0; i < numTrials; i++) {
List<Object> trialSet = buildTrialSet(c);
Set unique = new HashSet();
unique.addAll(trialSet);
long distinctVals = unique.size();
HyperLogLogPlus hllp = new HyperLogLogPlus(p, sp);
long timeStart = System.nanoTime();
hllp.addAll(trialSet);
long dvEstimate = hllp.cardinality();
long timeEnd = System.nanoTime();
long timeElapsed = timeEnd - timeStart;
double rawError = Math.abs(dvEstimate - distinctVals) / (double) distinctVals;
errorStats.addValue(rawError);
timeStats.addValue(timeElapsed);
sizeStats.addValue(SerDeUtils.toBytes(hllp).length);
}
MeasureResultFormatter errorRF = new MeasureResultFormatter(errorStats, errorFormatter, errorPercentile);
MeasureResultFormatter timeRF = new MeasureResultFormatter(timeStats, timeFormatter, timePercentile);
MeasureResultFormatter sizeRF = new MeasureResultFormatter(sizeStats, sizeFormatter, sizePercentile);
println(formatWithPadding(new String[] { "" + c, "" + sp, "" + p, errorRF.getFormattedResults(), timeRF.getFormattedResults(), sizeRF.getFormattedResults() }, chartPadding, chartDelim));
}
}
}
} catch (Exception e) {
e.printStackTrace();
System.exit(-1);
}
}
use of org.apache.commons.math3.stat.descriptive.rank.Min in project hive by apache.
the class TestHostAffinitySplitLocationProvider method testHashDistribution.
private double testHashDistribution(int locs, final int missCount, FileSplit[] splits, AtomicInteger errorCount) {
// This relies heavily on what method determineSplits ... calls and doesn't.
// We could do a wrapper with only size() and get() methods instead of List, to be sure.
@SuppressWarnings("unchecked") List<String> partLocs = (List<String>) Mockito.mock(List.class);
Mockito.when(partLocs.size()).thenReturn(locs);
final AtomicInteger state = new AtomicInteger(0);
Mockito.when(partLocs.get(Mockito.anyInt())).thenAnswer(new Answer<String>() {
@Override
public String answer(InvocationOnMock invocation) throws Throwable {
return (state.getAndIncrement() == missCount) ? "not-null" : null;
}
});
int[] hitCounts = new int[locs];
for (int splitIx = 0; splitIx < splits.length; ++splitIx) {
state.set(0);
int index = HostAffinitySplitLocationProvider.determineLocation(partLocs, splits[splitIx].getPath().toString(), splits[splitIx].getStart(), null);
++hitCounts[index];
}
SummaryStatistics ss = new SummaryStatistics();
for (int hitCount : hitCounts) {
ss.addValue(hitCount);
}
// All of this is completely bogus and mostly captures the following function:
// f(output) = I-eyeballed-the(output) == they-look-ok.
// It's pretty much a golden file...
// The fact that stdev doesn't increase with increasing missCount is captured outside.
double avg = ss.getSum() / ss.getN(), stdev = ss.getStandardDeviation(), cv = stdev / avg;
double allowedMin = avg - 2.5 * stdev, allowedMax = avg + 2.5 * stdev;
if (allowedMin > ss.getMin() || allowedMax < ss.getMax() || cv > 0.22) {
LOG.info("The distribution for " + locs + " locations, " + missCount + " misses isn't to " + "our liking: avg " + avg + ", stdev " + stdev + ", cv " + cv + ", min " + ss.getMin() + ", max " + ss.getMax());
errorCount.incrementAndGet();
}
return cv;
}
Aggregations