Search in sources :

Example 1 with IntHashSet

use of com.hazelcast.internal.util.collection.IntHashSet in project hazelcast by hazelcast.

the class HyperLogLogImplTest method testEstimateErrorRateForBigCardinalities.

/**
 * <ul>
 * <li>Adds up to {@link #DEFAULT_RUN_LENGTH} random numbers on both a Set and a HyperLogLog encoder.</li>
 * <li>Samples the actual count, and the estimate respectively every 100 operations.</li>
 * <li>Computes the error rate, of the measurements and store it in a histogram.</li>
 * <li>Asserts that the 99th percentile of the histogram is less than the expected max error,
 * which is the result of std error (1.04 / sqrt(m)) + [2.0, 6.5]% (2% is the typical accuracy,
 * but tests with a lower precision need a higher error range).</li>
 * </ul>
 */
@Test
public void testEstimateErrorRateForBigCardinalities() {
    double stdError = (1.04f / Math.sqrt(1 << precision)) * 100;
    double maxError = Math.ceil(stdError + errorRange);
    IntHashSet actualCount = new IntHashSet(DEFAULT_RUN_LENGTH, -1);
    Random random = new Random();
    Histogram histogram = new Histogram(5);
    ByteBuffer bb = ByteBuffer.allocate(4);
    int sampleStep = 100;
    long expected;
    long actual;
    for (int i = 1; i <= DEFAULT_RUN_LENGTH; i++) {
        int toCount = random.nextInt();
        actualCount.add(toCount);
        upcast(bb).clear();
        bb.putInt(toCount);
        hyperLogLog.add(HashUtil.MurmurHash3_x64_64(bb.array(), 0, bb.array().length));
        if (i % sampleStep == 0) {
            expected = actualCount.size();
            actual = hyperLogLog.estimate();
            double errorPct = ((actual * 100.0) / expected) - 100;
            histogram.recordValue(Math.abs((long) (errorPct * 100)));
        }
    }
    double errorPerc99 = histogram.getValueAtPercentile(99) / 100.0;
    if (errorPerc99 > maxError) {
        fail("For P=" + precision + ": Expected max error=" + maxError + "%. Actual error=" + errorPerc99 + "%.");
    }
}
Also used : Histogram(org.HdrHistogram.Histogram) Random(java.util.Random) IntHashSet(com.hazelcast.internal.util.collection.IntHashSet) ByteBuffer(java.nio.ByteBuffer) Test(org.junit.Test) SlowTest(com.hazelcast.test.annotation.SlowTest)

Example 2 with IntHashSet

use of com.hazelcast.internal.util.collection.IntHashSet in project hazelcast by hazelcast.

the class HyperLogLogEncoderAbstractTest method testEstimateErrorRateForBigCardinalities.

/**
 * - Add up-to runLength() random numbers on both a Set and a HyperLogLog encoder.
 * - Sample the actual count, and the estimate respectively every 100 operations.
 * - Compute the error rate, of the measurements and store it in a histogram.
 * - Assert that the 99th percentile of the histogram is less than the expected max error,
 * which is the result of std error (1.04 / sqrt(m)) + 3%.
 * (2% is the typical accuracy, but tests on the implementation showed up rare occurrences of 3%)
 */
@Test
public void testEstimateErrorRateForBigCardinalities() {
    double stdError = (1.04 / Math.sqrt(1 << precision())) * 100;
    double maxError = Math.ceil(stdError + 3.0);
    IntHashSet actualCount = new IntHashSet(runLength(), -1);
    Random random = new Random();
    Histogram histogram = new Histogram(5);
    ByteBuffer bb = ByteBuffer.allocate(4);
    int sampleStep = 100;
    long expected;
    long actual;
    for (int i = 1; i <= runLength(); i++) {
        int toCount = random.nextInt();
        actualCount.add(toCount);
        upcast(bb).clear();
        bb.putInt(toCount);
        encoder.add(HashUtil.MurmurHash3_x64_64(bb.array(), 0, bb.array().length));
        if (i % sampleStep == 0) {
            expected = actualCount.size();
            actual = encoder.estimate();
            double errorPct = ((actual * 100.0) / expected) - 100;
            histogram.recordValue(Math.abs((long) (errorPct * 100)));
        }
    }
    double errorPerc99 = histogram.getValueAtPercentile(99) / 100.0;
    if (errorPerc99 > maxError) {
        fail("For P=" + precision() + ", max error=" + maxError + "% expected." + " Error: " + errorPerc99 + "%.");
    }
}
Also used : Histogram(org.HdrHistogram.Histogram) Random(java.util.Random) IntHashSet(com.hazelcast.internal.util.collection.IntHashSet) ByteBuffer(java.nio.ByteBuffer) ParallelJVMTest(com.hazelcast.test.annotation.ParallelJVMTest) QuickTest(com.hazelcast.test.annotation.QuickTest) Test(org.junit.Test)

Aggregations

IntHashSet (com.hazelcast.internal.util.collection.IntHashSet)2 ByteBuffer (java.nio.ByteBuffer)2 Random (java.util.Random)2 Histogram (org.HdrHistogram.Histogram)2 Test (org.junit.Test)2 ParallelJVMTest (com.hazelcast.test.annotation.ParallelJVMTest)1 QuickTest (com.hazelcast.test.annotation.QuickTest)1 SlowTest (com.hazelcast.test.annotation.SlowTest)1