use of com.hazelcast.internal.util.collection.IntHashSet in project hazelcast by hazelcast.
the class HyperLogLogImplTest method testEstimateErrorRateForBigCardinalities.
/**
* <ul>
* <li>Adds up to {@link #DEFAULT_RUN_LENGTH} random numbers on both a Set and a HyperLogLog encoder.</li>
* <li>Samples the actual count, and the estimate respectively every 100 operations.</li>
* <li>Computes the error rate, of the measurements and store it in a histogram.</li>
* <li>Asserts that the 99th percentile of the histogram is less than the expected max error,
* which is the result of std error (1.04 / sqrt(m)) + [2.0, 6.5]% (2% is the typical accuracy,
* but tests with a lower precision need a higher error range).</li>
* </ul>
*/
@Test
public void testEstimateErrorRateForBigCardinalities() {
double stdError = (1.04f / Math.sqrt(1 << precision)) * 100;
double maxError = Math.ceil(stdError + errorRange);
IntHashSet actualCount = new IntHashSet(DEFAULT_RUN_LENGTH, -1);
Random random = new Random();
Histogram histogram = new Histogram(5);
ByteBuffer bb = ByteBuffer.allocate(4);
int sampleStep = 100;
long expected;
long actual;
for (int i = 1; i <= DEFAULT_RUN_LENGTH; i++) {
int toCount = random.nextInt();
actualCount.add(toCount);
upcast(bb).clear();
bb.putInt(toCount);
hyperLogLog.add(HashUtil.MurmurHash3_x64_64(bb.array(), 0, bb.array().length));
if (i % sampleStep == 0) {
expected = actualCount.size();
actual = hyperLogLog.estimate();
double errorPct = ((actual * 100.0) / expected) - 100;
histogram.recordValue(Math.abs((long) (errorPct * 100)));
}
}
double errorPerc99 = histogram.getValueAtPercentile(99) / 100.0;
if (errorPerc99 > maxError) {
fail("For P=" + precision + ": Expected max error=" + maxError + "%. Actual error=" + errorPerc99 + "%.");
}
}
use of com.hazelcast.internal.util.collection.IntHashSet in project hazelcast by hazelcast.
the class HyperLogLogEncoderAbstractTest method testEstimateErrorRateForBigCardinalities.
/**
* - Add up-to runLength() random numbers on both a Set and a HyperLogLog encoder.
* - Sample the actual count, and the estimate respectively every 100 operations.
* - Compute the error rate, of the measurements and store it in a histogram.
* - Assert that the 99th percentile of the histogram is less than the expected max error,
* which is the result of std error (1.04 / sqrt(m)) + 3%.
* (2% is the typical accuracy, but tests on the implementation showed up rare occurrences of 3%)
*/
@Test
public void testEstimateErrorRateForBigCardinalities() {
double stdError = (1.04 / Math.sqrt(1 << precision())) * 100;
double maxError = Math.ceil(stdError + 3.0);
IntHashSet actualCount = new IntHashSet(runLength(), -1);
Random random = new Random();
Histogram histogram = new Histogram(5);
ByteBuffer bb = ByteBuffer.allocate(4);
int sampleStep = 100;
long expected;
long actual;
for (int i = 1; i <= runLength(); i++) {
int toCount = random.nextInt();
actualCount.add(toCount);
upcast(bb).clear();
bb.putInt(toCount);
encoder.add(HashUtil.MurmurHash3_x64_64(bb.array(), 0, bb.array().length));
if (i % sampleStep == 0) {
expected = actualCount.size();
actual = encoder.estimate();
double errorPct = ((actual * 100.0) / expected) - 100;
histogram.recordValue(Math.abs((long) (errorPct * 100)));
}
}
double errorPerc99 = histogram.getValueAtPercentile(99) / 100.0;
if (errorPerc99 > maxError) {
fail("For P=" + precision() + ", max error=" + maxError + "% expected." + " Error: " + errorPerc99 + "%.");
}
}
Aggregations