Search in sources :

Example 1 with IntHashSet

use of com.hazelcast.util.collection.IntHashSet in project hazelcast by hazelcast.

the class HyperLogLogEncoderAbstractTest method testEstimateErrorRateForBigCardinalities.

/**
     * - Add up-to runLength() random numbers on both a Set and a HyperLogLog encoder.
     * - Sample the actual count, and the estimate respectively every 100 operations.
     * - Compute the error rate, of the measurements and store it in a histogram.
     * - Assert that the 99th percentile of the histogram is less than the expected max error,
     * which is the result of std error (1.04 / sqrt(m)) + 3%.
     * (2% is the typical accuracy, but tests on the implementation showed up rare occurrences of 3%)
     */
@Test
public void testEstimateErrorRateForBigCardinalities() {
    double stdError = (1.04 / Math.sqrt(1 << precision())) * 100;
    double maxError = Math.ceil(stdError + 3.0);
    IntHashSet actualCount = new IntHashSet(runLength(), -1);
    Random random = new Random();
    Histogram histogram = new Histogram(5);
    ByteBuffer bb = ByteBuffer.allocate(4);
    int sampleStep = 100;
    long expected;
    long actual;
    for (int i = 1; i <= runLength(); i++) {
        int toCount = random.nextInt();
        actualCount.add(toCount);
        bb.clear();
        bb.putInt(toCount);
        encoder.add(HashUtil.MurmurHash3_x64_64(bb.array(), 0, bb.array().length));
        if (i % sampleStep == 0) {
            expected = actualCount.size();
            actual = encoder.estimate();
            double errorPct = ((actual * 100.0) / expected) - 100;
            histogram.recordValue(Math.abs((long) (errorPct * 100)));
        }
    }
    double errorPerc99 = histogram.getValueAtPercentile(99) / 100.0;
    if (errorPerc99 > maxError) {
        fail("For P=" + precision() + ", max error=" + maxError + "% expected." + " Error: " + errorPerc99 + "%.");
    }
}
Also used : Histogram(org.HdrHistogram.Histogram) Random(java.util.Random) IntHashSet(com.hazelcast.util.collection.IntHashSet) ByteBuffer(java.nio.ByteBuffer) QuickTest(com.hazelcast.test.annotation.QuickTest) Test(org.junit.Test) ParallelTest(com.hazelcast.test.annotation.ParallelTest)

Example 2 with IntHashSet

use of com.hazelcast.util.collection.IntHashSet in project hazelcast by hazelcast.

the class HyperLogLogImplTest method testEstimateErrorRateForBigCardinalities.

/**
     * - Add up-to runLength() random numbers on both a Set and a HyperLogLog encoder.
     * - Sample the actual count, and the estimate respectively every 100 operations.
     * - Compute the error rate, of the measurements and store it in a histogram.
     * - Assert that the 99th percentile of the histogram is less than the expected max error,
     * which is the result of std error (1.04 / sqrt(m)) + 3%.
     * (2% is the typical accuracy, but tests on the implementation showed up rare occurrences of 3%)
     */
@Test
public void testEstimateErrorRateForBigCardinalities() {
    double stdError = (1.04 / Math.sqrt(1 << precision)) * 100;
    double maxError = Math.ceil(stdError + 3.0);
    IntHashSet actualCount = new IntHashSet(runLength, -1);
    Random random = new Random();
    Histogram histogram = new Histogram(5);
    ByteBuffer bb = ByteBuffer.allocate(4);
    int sampleStep = 100;
    long expected;
    long actual;
    for (int i = 1; i <= runLength; i++) {
        int toCount = random.nextInt();
        actualCount.add(toCount);
        bb.clear();
        bb.putInt(toCount);
        hyperLogLog.add(HashUtil.MurmurHash3_x64_64(bb.array(), 0, bb.array().length));
        if (i % sampleStep == 0) {
            expected = actualCount.size();
            actual = hyperLogLog.estimate();
            double errorPct = ((actual * 100.0) / expected) - 100;
            histogram.recordValue(Math.abs((long) (errorPct * 100)));
        }
    }
    double errorPerc99 = histogram.getValueAtPercentile(99) / 100.0;
    if (errorPerc99 > maxError) {
        fail("For P=" + precision + ", Expected max error=" + maxError + "%." + " Actual error: " + errorPerc99 + "%.");
    }
}
Also used : Histogram(org.HdrHistogram.Histogram) Random(java.util.Random) IntHashSet(com.hazelcast.util.collection.IntHashSet) ByteBuffer(java.nio.ByteBuffer) Test(org.junit.Test)

Aggregations

IntHashSet (com.hazelcast.util.collection.IntHashSet)2 ByteBuffer (java.nio.ByteBuffer)2 Random (java.util.Random)2 Histogram (org.HdrHistogram.Histogram)2 Test (org.junit.Test)2 ParallelTest (com.hazelcast.test.annotation.ParallelTest)1 QuickTest (com.hazelcast.test.annotation.QuickTest)1