use of org.apache.commons.math3.stat.descriptive.rank.Max in project metron by apache.
the class HLLPMeasurement method main.
public static void main(String[] args) {
Options options = new Options();
try {
CommandLineParser parser = new PosixParser();
CommandLine cmd = null;
try {
cmd = ParserOptions.parse(parser, args);
} catch (ParseException pe) {
pe.printStackTrace();
final HelpFormatter usageFormatter = new HelpFormatter();
usageFormatter.printHelp("HLLPMeasurement", null, options, null, true);
System.exit(-1);
}
if (cmd.hasOption("h")) {
final HelpFormatter usageFormatter = new HelpFormatter();
usageFormatter.printHelp("HLLPMeasurement", null, options, null, true);
System.exit(0);
}
final String chartDelim = ParserOptions.CHART_DELIM.get(cmd, "|");
final int numTrials = Integer.parseInt(ParserOptions.NUM_TRIALS.get(cmd, "5000"));
final int cardMin = Integer.parseInt(ParserOptions.CARD_MIN.get(cmd, "200"));
final int cardMax = Integer.parseInt(ParserOptions.CARD_MAX.get(cmd, "1000"));
final int cardStep = Integer.parseInt(ParserOptions.CARD_STEP.get(cmd, "200"));
final int cardStart = (((cardMin - 1) / cardStep) * cardStep) + cardStep;
final int spMin = Integer.parseInt(ParserOptions.SP_MIN.get(cmd, "4"));
final int spMax = Integer.parseInt(ParserOptions.SP_MAX.get(cmd, "32"));
final int spStep = Integer.parseInt(ParserOptions.SP_STEP.get(cmd, "4"));
final int pMin = Integer.parseInt(ParserOptions.P_MIN.get(cmd, "4"));
final int pMax = Integer.parseInt(ParserOptions.P_MAX.get(cmd, "32"));
final int pStep = Integer.parseInt(ParserOptions.P_STEP.get(cmd, "4"));
final double errorPercentile = Double.parseDouble(ParserOptions.ERR_PERCENTILE.get(cmd, "50"));
final double timePercentile = Double.parseDouble(ParserOptions.TIME_PERCENTILE.get(cmd, "50"));
final double sizePercentile = Double.parseDouble(ParserOptions.SIZE_PERCENTILE.get(cmd, "50"));
final boolean formatErrPercent = Boolean.parseBoolean(ParserOptions.ERR_FORMAT_PERCENT.get(cmd, "true"));
final int errMultiplier = formatErrPercent ? 100 : 1;
final Function<Double, String> errorFormatter = (v -> ERR_FORMAT.format(v * errMultiplier));
final Function<Double, String> timeFormatter = (v -> TIME_FORMAT.format(v / NANO_TO_MILLIS));
final Function<Double, String> sizeFormatter = (v -> SIZE_FORMAT.format(v));
final String[] chartKey = new String[] { "card: cardinality", "sp: sparse precision value", "p: normal precision value", "err: error as a percent of the expected cardinality; ", "time: total time to add all values to the hllp estimator and calculate a cardinality estimate", "size: size of the hllp set in bytes once all values have been added for the specified cardinality", "l=low, m=mid(based on percentile chosen), h=high, std=standard deviation" };
final String[] chartHeader = new String[] { "card", "sp", "p", "err l/m/h/std (% of actual)", "time l/m/h/std (ms)", "size l/m/h/std (b)" };
final int[] chartPadding = new int[] { 10, 10, 10, 40, 40, 30 };
if (spMin < pMin) {
throw new IllegalArgumentException("p must be <= sp");
}
if (spMax < pMax) {
throw new IllegalArgumentException("p must be <= sp");
}
println("Options Used");
println("------------");
println("num trials: " + numTrials);
println("card min: " + cardMin);
println("card max: " + cardMax);
println("card step: " + cardStep);
println("card start: " + cardStart);
println("sp min: " + spMin);
println("sp max: " + spMax);
println("sp step: " + spStep);
println("p min: " + pMin);
println("p max: " + pMax);
println("p step: " + pStep);
println("error percentile: " + errorPercentile);
println("time percentile: " + timePercentile);
println("size percentile: " + sizePercentile);
println("format err as %: " + formatErrPercent);
println("");
printHeading(chartKey, chartHeader, chartPadding, chartDelim);
for (int c = cardStart; c <= cardMax; c += cardStep) {
for (int sp = spMin; sp <= spMax; sp += spStep) {
for (int p = pMin; p <= pMax; p += pStep) {
DescriptiveStatistics errorStats = new DescriptiveStatistics();
DescriptiveStatistics timeStats = new DescriptiveStatistics();
DescriptiveStatistics sizeStats = new DescriptiveStatistics();
for (int i = 0; i < numTrials; i++) {
List<Object> trialSet = buildTrialSet(c);
Set unique = new HashSet();
unique.addAll(trialSet);
long distinctVals = unique.size();
HyperLogLogPlus hllp = new HyperLogLogPlus(p, sp);
long timeStart = System.nanoTime();
hllp.addAll(trialSet);
long dvEstimate = hllp.cardinality();
long timeEnd = System.nanoTime();
long timeElapsed = timeEnd - timeStart;
double rawError = Math.abs(dvEstimate - distinctVals) / (double) distinctVals;
errorStats.addValue(rawError);
timeStats.addValue(timeElapsed);
sizeStats.addValue(SerDeUtils.toBytes(hllp).length);
}
MeasureResultFormatter errorRF = new MeasureResultFormatter(errorStats, errorFormatter, errorPercentile);
MeasureResultFormatter timeRF = new MeasureResultFormatter(timeStats, timeFormatter, timePercentile);
MeasureResultFormatter sizeRF = new MeasureResultFormatter(sizeStats, sizeFormatter, sizePercentile);
println(formatWithPadding(new String[] { "" + c, "" + sp, "" + p, errorRF.getFormattedResults(), timeRF.getFormattedResults(), sizeRF.getFormattedResults() }, chartPadding, chartDelim));
}
}
}
} catch (Exception e) {
e.printStackTrace();
System.exit(-1);
}
}
use of org.apache.commons.math3.stat.descriptive.rank.Max in project hive by apache.
the class TestHostAffinitySplitLocationProvider method testHashDistribution.
private double testHashDistribution(int locs, final int missCount, FileSplit[] splits, AtomicInteger errorCount) {
// This relies heavily on what method determineSplits ... calls and doesn't.
// We could do a wrapper with only size() and get() methods instead of List, to be sure.
@SuppressWarnings("unchecked") List<String> partLocs = (List<String>) Mockito.mock(List.class);
Mockito.when(partLocs.size()).thenReturn(locs);
final AtomicInteger state = new AtomicInteger(0);
Mockito.when(partLocs.get(Mockito.anyInt())).thenAnswer(new Answer<String>() {
@Override
public String answer(InvocationOnMock invocation) throws Throwable {
return (state.getAndIncrement() == missCount) ? "not-null" : null;
}
});
int[] hitCounts = new int[locs];
for (int splitIx = 0; splitIx < splits.length; ++splitIx) {
state.set(0);
int index = HostAffinitySplitLocationProvider.determineLocation(partLocs, splits[splitIx].getPath().toString(), splits[splitIx].getStart(), null);
++hitCounts[index];
}
SummaryStatistics ss = new SummaryStatistics();
for (int hitCount : hitCounts) {
ss.addValue(hitCount);
}
// All of this is completely bogus and mostly captures the following function:
// f(output) = I-eyeballed-the(output) == they-look-ok.
// It's pretty much a golden file...
// The fact that stdev doesn't increase with increasing missCount is captured outside.
double avg = ss.getSum() / ss.getN(), stdev = ss.getStandardDeviation(), cv = stdev / avg;
double allowedMin = avg - 2.5 * stdev, allowedMax = avg + 2.5 * stdev;
if (allowedMin > ss.getMin() || allowedMax < ss.getMax() || cv > 0.22) {
LOG.info("The distribution for " + locs + " locations, " + missCount + " misses isn't to " + "our liking: avg " + avg + ", stdev " + stdev + ", cv " + cv + ", min " + ss.getMin() + ", max " + ss.getMax());
errorCount.incrementAndGet();
}
return cv;
}
use of org.apache.commons.math3.stat.descriptive.rank.Max in project vcell by virtualcell.
the class TimeSeriesMultitrialData method kolmogorovSmirnovTest.
public static double kolmogorovSmirnovTest(double[] rawData1, double[] rawData2) {
try {
int numBins = 1 + (int) Math.ceil(Math.sqrt(rawData1.length));
// rawData2 = ramp(0,10,rawData2.length);
Max max = new Max();
max.incrementAll(rawData1);
max.incrementAll(rawData2);
Min min = new Min();
min.incrementAll(rawData1);
min.incrementAll(rawData2);
double[] cdf1 = calculateCDF(rawData1, min.getResult(), max.getResult(), numBins);
double[] cdf2 = calculateCDF(rawData2, min.getResult(), max.getResult(), numBins);
KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
return test.kolmogorovSmirnovStatistic(cdf1, cdf2);
} catch (Exception e) {
e.printStackTrace(System.out);
return -1;
}
}
use of org.apache.commons.math3.stat.descriptive.rank.Max in project vcell by virtualcell.
the class TimeSeriesMultitrialData method chiSquaredTest.
public static double chiSquaredTest(double[] rawData1, double[] rawData2) {
try {
int numBins = 1 + (int) Math.ceil(Math.sqrt(rawData1.length));
// rawData2 = ramp(0,10,rawData2.length);
TimeSeriesMultitrialData.MinMaxHelp minMaxHelp1 = new TimeSeriesMultitrialData.MinMaxHelp(rawData1);
TimeSeriesMultitrialData.MinMaxHelp minMaxHelp2 = new TimeSeriesMultitrialData.MinMaxHelp(rawData2);
double min = Math.min(minMaxHelp1.min, minMaxHelp2.min);
double max = Math.max(minMaxHelp1.max, minMaxHelp2.max);
long[] histogram1 = calcHistogram(rawData1, min, max, numBins);
long[] histogram2 = calcHistogram(rawData2, min, max, numBins);
//
// remove histogram indices where both bins are zero
//
ArrayList<Long> histogram1List = new ArrayList<Long>();
ArrayList<Long> histogram2List = new ArrayList<Long>();
for (int i = 0; i < histogram1.length; i++) {
if (histogram1[i] != 0 || histogram2[i] != 0) {
histogram1List.add(histogram1[i]);
histogram2List.add(histogram2[i]);
// }else{
// histogram1List.add(new Long(1));
// histogram2List.add(new Long(1));
}
}
histogram1 = new long[histogram1List.size()];
histogram2 = new long[histogram2List.size()];
for (int i = 0; i < histogram1List.size(); i++) {
histogram1[i] = histogram1List.get(i);
histogram2[i] = histogram2List.get(i);
}
if (histogram1.length == 1) {
return 0.0;
}
ChiSquareTest chiSquareTest = new ChiSquareTest();
return chiSquareTest.chiSquareTestDataSetsComparison(histogram1, histogram2);
} catch (Exception e) {
e.printStackTrace(System.out);
return -1;
}
}
use of org.apache.commons.math3.stat.descriptive.rank.Max in project FSensor by KalebKE.
the class CalibrationUtil method getCalibration.
/**
* Transforms the ellipsoid into a sphere with the offset vector = [0,0,0]
* and the radii vector = [1,1,1].
*
* @param fitPoints the representation of the calibration ellipsoid
*/
public static Calibration getCalibration(FitPoints fitPoints) {
// The scalar values to transform the radii vector into [1,1,1]
RealMatrix scalar = new Array2DRowRealMatrix(3, 3);
// RIV determines the magnitude of the radii. We have to know the
// magnitudes because the eigenvalues, and thus the radii, are returned
// in ascending order. Without knowing the magnitudes, we wouldn't know
// what radii to apply to what axis.
// Find the max and minimum magnitudes.
double max = fitPoints.riv.getEntry(0);
double min = fitPoints.riv.getEntry(0);
// The indexes of the maximum, median, and minimum radii.
// Note that these are the opposite of the max and min
// because a smaller riv value means a greater magnitude.
int maxi = 0, midi = 0, mini = 0;
// Find max and min radii
for (int i = 0; i < fitPoints.riv.getDimension(); i++) {
if (fitPoints.riv.getEntry(i) > max) {
max = fitPoints.riv.getEntry(i);
mini = i;
}
if (fitPoints.riv.getEntry(i) < min) {
min = fitPoints.riv.getEntry(i);
maxi = i;
}
}
// Find median radii
for (int i = 0; i < fitPoints.riv.getDimension(); i++) {
if (fitPoints.riv.getEntry(i) < max && fitPoints.riv.getEntry(i) > min) {
midi = i;
}
}
// Create the scalar vector in the correct orientation.
scalar.setEntry(0, 0, 1 / fitPoints.radii.getEntry(mini));
scalar.setEntry(1, 1, 1 / fitPoints.radii.getEntry(midi));
scalar.setEntry(2, 2, 1 / fitPoints.radii.getEntry(maxi));
return new Calibration(scalar, fitPoints.center);
}
Aggregations