use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project metron by apache.
the class LazzyLoggerImplPerfTest method runTrial.
private StatisticalSummary runTrial(int reps, Operation operation) {
DescriptiveStatistics stats = new DescriptiveStatistics();
long trialTime = timeOperation(() -> {
for (int i = 0; i < reps; i++) {
long time = timeOperation(operation);
stats.addValue(time / NANO_TO_MILLIS);
}
});
System.out.println("Total trial time (ms): " + (trialTime / NANO_TO_MILLIS));
return stats;
}
use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project metron by apache.
the class StellarStatisticsFunctionsTest method testMergeProviders.
@Test
public void testMergeProviders() {
List<StatisticsProvider> providers = new ArrayList<>();
/*
Create 10 providers, each with a sample drawn from a gaussian distribution.
Update the reference stats from commons math to ensure we are
*/
GaussianRandomGenerator gaussian = new GaussianRandomGenerator(new MersenneTwister(1L));
SummaryStatistics sStatistics = new SummaryStatistics();
DescriptiveStatistics dStatistics = new DescriptiveStatistics();
for (int i = 0; i < 10; ++i) {
List<Double> sample = new ArrayList<>();
for (int j = 0; j < 100; ++j) {
double s = gaussian.nextNormalizedDouble();
sample.add(s);
sStatistics.addValue(s);
dStatistics.addValue(s);
}
StatisticsProvider provider = (StatisticsProvider) run("STATS_ADD(STATS_INIT(), " + Joiner.on(",").join(sample) + ")", new HashMap<>());
providers.add(provider);
}
/*
Merge the providers and validate
*/
Map<String, Object> providerVariables = new HashMap<>();
for (int i = 0; i < providers.size(); ++i) {
providerVariables.put("provider_" + i, providers.get(i));
}
StatisticsProvider mergedProvider = (StatisticsProvider) run("STATS_MERGE([" + Joiner.on(",").join(providerVariables.keySet()) + "])", providerVariables);
OnlineStatisticsProviderTest.validateStatisticsProvider(mergedProvider, sStatistics, dStatistics);
}
use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project metron by apache.
the class MedianAbsoluteDeviationTest method testLongTailed.
@Test
public void testLongTailed() {
TDistribution generator = new TDistribution(new MersenneTwister(0L), 100);
DescriptiveStatistics stats = new DescriptiveStatistics();
List<MedianAbsoluteDeviationFunctions.State> states = new ArrayList<>();
MedianAbsoluteDeviationFunctions.State currentState = null;
// initialize the state
currentState = (MedianAbsoluteDeviationFunctions.State) run("OUTLIER_MAD_STATE_MERGE(states, NULL)", ImmutableMap.of("states", states));
for (int i = 0, j = 0; i < 10000; ++i, ++j) {
Double d = generator.sample();
stats.addValue(d);
run("OUTLIER_MAD_ADD(currentState, data)", ImmutableMap.of("currentState", currentState, "data", d));
if (j >= 1000) {
j = 0;
List<MedianAbsoluteDeviationFunctions.State> stateWindow = new ArrayList<>();
for (int stateIndex = Math.max(0, states.size() - 5); stateIndex < states.size(); ++stateIndex) {
stateWindow.add(states.get(stateIndex));
}
currentState = (MedianAbsoluteDeviationFunctions.State) run("OUTLIER_MAD_STATE_MERGE(states, currentState)", ImmutableMap.of("states", stateWindow, "currentState", currentState));
}
}
{
Double score = (Double) run("OUTLIER_MAD_SCORE(currentState, value)", ImmutableMap.of("currentState", currentState, "value", stats.getMin()));
assertTrue(score > 3.5, "Score: " + score + " is not an outlier despite being a minimum.");
}
{
Double score = (Double) run("OUTLIER_MAD_SCORE(currentState, value)", ImmutableMap.of("currentState", currentState, "value", stats.getMax()));
assertTrue(score > 3.5, "Score: " + score + " is not an outlier despite being a maximum");
}
{
Double score = (Double) run("OUTLIER_MAD_SCORE(currentState, value)", ImmutableMap.of("currentState", currentState, "value", stats.getMean() + 4 * stats.getStandardDeviation()));
assertTrue(score > 3.5, "Score: " + score + " is not an outlier despite being 4 std deviations away from the mean");
}
{
Double score = (Double) run("OUTLIER_MAD_SCORE(currentState, value)", ImmutableMap.of("currentState", currentState, "value", stats.getMean() - 4 * stats.getStandardDeviation()));
assertTrue(score > 3.5, "Score: " + score + " is not an outlier despite being 4 std deviations away from the mean");
}
{
Double score = (Double) run("OUTLIER_MAD_SCORE(currentState, value)", ImmutableMap.of("currentState", currentState, "value", stats.getMean()));
assertFalse(score > 3.5, "Score: " + score + " is an outlier despite being the mean");
}
}
use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project metron by apache.
the class UniformSamplerTest method validateDistribution.
public void validateDistribution(Sampler sample, DescriptiveStatistics distribution) {
DescriptiveStatistics s = new DescriptiveStatistics();
for (Object d : sample.get()) {
s.addValue((Double) d);
}
assertEquals(s.getMean(), distribution.getMean(), .1);
assertEquals(s.getStandardDeviation(), distribution.getStandardDeviation(), .1);
}
use of org.apache.commons.math3.stat.descriptive.DescriptiveStatistics in project pinot by linkedin.
the class ForwardIndexReaderBenchmark method multiValuedReadBenchMarkV1.
public static void multiValuedReadBenchMarkV1(File file, int numDocs, int totalNumValues, int maxEntriesPerDoc, int columnSizeInBits) throws Exception {
System.out.println("******************************************************************");
System.out.println("Analyzing " + file.getName() + " numDocs:" + numDocs + ", totalNumValues:" + totalNumValues + ", maxEntriesPerDoc:" + maxEntriesPerDoc + ", numBits:" + columnSizeInBits);
long start, end;
boolean readFile = true;
boolean randomRead = true;
boolean contextualRead = true;
boolean signed = false;
boolean isMmap = false;
PinotDataBuffer heapBuffer = PinotDataBuffer.fromFile(file, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, "benchmarking");
BaseSingleColumnMultiValueReader reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitMultiValueReader(heapBuffer, numDocs, totalNumValues, columnSizeInBits, signed);
int[] intArray = new int[maxEntriesPerDoc];
File outfile = new File("/tmp/" + file.getName() + ".raw");
FileWriter fw = new FileWriter(outfile);
for (int i = 0; i < numDocs; i++) {
int length = reader.getIntArray(i, intArray);
StringBuilder sb = new StringBuilder();
String delim = "";
for (int j = 0; j < length; j++) {
sb.append(delim);
sb.append(intArray[j]);
delim = ",";
}
fw.write(sb.toString());
fw.write("\n");
}
fw.close();
// sequential read
if (readFile) {
DescriptiveStatistics stats = new DescriptiveStatistics();
RandomAccessFile raf = new RandomAccessFile(file, "rw");
ByteBuffer buffer = ByteBuffer.allocateDirect((int) file.length());
raf.getChannel().read(buffer);
for (int run = 0; run < MAX_RUNS; run++) {
long length = file.length();
start = System.currentTimeMillis();
for (int i = 0; i < length; i++) {
byte b = buffer.get(i);
}
end = System.currentTimeMillis();
stats.addValue((end - start));
}
System.out.println("v1 multi value read bytes stats for " + file.getName());
System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
raf.close();
}
if (randomRead) {
DescriptiveStatistics stats = new DescriptiveStatistics();
for (int run = 0; run < MAX_RUNS; run++) {
start = System.currentTimeMillis();
for (int i = 0; i < numDocs; i++) {
int length = reader.getIntArray(i, intArray);
}
end = System.currentTimeMillis();
stats.addValue((end - start));
}
System.out.println("v1 multi value sequential read one stats for " + file.getName());
System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
}
if (contextualRead) {
DescriptiveStatistics stats = new DescriptiveStatistics();
for (int run = 0; run < MAX_RUNS; run++) {
MultiValueReaderContext context = (MultiValueReaderContext) reader.createContext();
start = System.currentTimeMillis();
for (int i = 0; i < numDocs; i++) {
int length = reader.getIntArray(i, intArray, context);
}
end = System.currentTimeMillis();
// System.out.println("RUN:" + run + "Time:" + (end-start));
stats.addValue((end - start));
}
System.out.println("v1 multi value sequential read one with context stats for " + file.getName());
System.out.println(stats.toString().replaceAll("\n", ", ") + " raw:" + Arrays.toString(stats.getValues()));
}
reader.close();
heapBuffer.close();
System.out.println("******************************************************************");
}
Aggregations