Search in sources :

Example 21 with HyperLogLog

use of com.facebook.airlift.stats.cardinality.HyperLogLog in project presto by prestodb.

the class KHyperLogLog method uniquenessDistribution.

public Long2DoubleMap uniquenessDistribution(long histogramSize) {
    Long2DoubleMap out = new Long2DoubleOpenHashMap();
    PrimitiveIterator.OfLong iterator = LongStream.rangeClosed(1, histogramSize).iterator();
    while (iterator.hasNext()) {
        // Initialize all entries to zero
        out.put(iterator.nextLong(), 0D);
    }
    int size = minhash.size();
    for (HyperLogLog hll : minhash.values()) {
        long bucket = Math.min(hll.cardinality(), histogramSize);
        out.merge(bucket, (double) 1 / size, Double::sum);
    }
    return out;
}
Also used : PrimitiveIterator(java.util.PrimitiveIterator) Long2DoubleMap(it.unimi.dsi.fastutil.longs.Long2DoubleMap) Long2DoubleOpenHashMap(it.unimi.dsi.fastutil.longs.Long2DoubleOpenHashMap) HyperLogLog(com.facebook.airlift.stats.cardinality.HyperLogLog)

Example 22 with HyperLogLog

use of com.facebook.airlift.stats.cardinality.HyperLogLog in project presto by prestodb.

the class KHyperLogLog method removeOverflowEntries.

private void removeOverflowEntries() {
    while (minhash.size() > maxSize) {
        HyperLogLog hll = minhash.remove(minhash.lastLongKey());
        decreaseTotalHllSize(hll);
    }
}
Also used : HyperLogLog(com.facebook.airlift.stats.cardinality.HyperLogLog)

Example 23 with HyperLogLog

use of com.facebook.airlift.stats.cardinality.HyperLogLog in project presto by prestodb.

the class KHyperLogLog method update.

private void update(long hash, long uii) {
    if (!(minhash.containsKey(hash) || isExact() || hash < minhash.lastLongKey())) {
        return;
    }
    HyperLogLog hll = minhash.computeIfAbsent(hash, k -> {
        HyperLogLog newHll = HyperLogLog.newInstance(hllBuckets);
        increaseTotalHllSize(newHll);
        return newHll;
    });
    decreaseTotalHllSize(hll);
    hll.add(uii);
    increaseTotalHllSize(hll);
    removeOverflowEntries();
}
Also used : HyperLogLog(com.facebook.airlift.stats.cardinality.HyperLogLog)

Example 24 with HyperLogLog

use of com.facebook.airlift.stats.cardinality.HyperLogLog in project presto by prestodb.

the class SetDigest method newInstance.

public static SetDigest newInstance(Slice serialized) {
    requireNonNull(serialized, "serialized is null");
    SliceInput input = serialized.getInput();
    checkArgument(input.readByte() == UNCOMPRESSED_FORMAT, "Unexpected version");
    int hllLength = input.readInt();
    Slice serializedHll = Slices.allocate(hllLength);
    input.readBytes(serializedHll, hllLength);
    HyperLogLog hll = HyperLogLog.newInstance(serializedHll);
    Long2ShortRBTreeMap minhash = new Long2ShortRBTreeMap();
    int maxHashes = input.readInt();
    int minhashLength = input.readInt();
    // The values are stored after the keys
    SliceInput valuesInput = serialized.getInput();
    valuesInput.setPosition(input.position() + minhashLength * SIZE_OF_LONG);
    for (int i = 0; i < minhashLength; i++) {
        minhash.put(input.readLong(), valuesInput.readShort());
    }
    return new SetDigest(maxHashes, hll, minhash);
}
Also used : Slice(io.airlift.slice.Slice) Long2ShortRBTreeMap(it.unimi.dsi.fastutil.longs.Long2ShortRBTreeMap) SliceInput(io.airlift.slice.SliceInput) HyperLogLog(com.facebook.airlift.stats.cardinality.HyperLogLog)

Example 25 with HyperLogLog

use of com.facebook.airlift.stats.cardinality.HyperLogLog in project presto by prestodb.

the class CreateHll method createHll.

@ScalarFunction
@SqlType(StandardTypes.HYPER_LOG_LOG)
public static Slice createHll(@SqlType(StandardTypes.BIGINT) long value, @SqlType(StandardTypes.DOUBLE) double maxStandardError) {
    HyperLogLog hll = HyperLogLog.newInstance(standardErrorToBuckets(maxStandardError));
    hll.add(value);
    return hll.serialize();
}
Also used : HyperLogLog(com.facebook.airlift.stats.cardinality.HyperLogLog) ScalarFunction(com.facebook.presto.spi.function.ScalarFunction) SqlType(com.facebook.presto.spi.function.SqlType)

Aggregations

HyperLogLog (com.facebook.airlift.stats.cardinality.HyperLogLog)25 InputFunction (com.facebook.presto.spi.function.InputFunction)6 Slice (io.airlift.slice.Slice)6 SqlType (com.facebook.presto.spi.function.SqlType)4 ScalarFunction (com.facebook.presto.spi.function.ScalarFunction)3 TypeParameter (com.facebook.presto.spi.function.TypeParameter)3 ImmutableList (com.google.common.collect.ImmutableList)3 SliceInput (io.airlift.slice.SliceInput)2 Block (com.facebook.presto.common.block.Block)1 BlockBuilder (com.facebook.presto.common.block.BlockBuilder)1 SqlVarbinary (com.facebook.presto.common.type.SqlVarbinary)1 PrestoException (com.facebook.presto.spi.PrestoException)1 Description (com.facebook.presto.spi.function.Description)1 LiteralParameters (com.facebook.presto.spi.function.LiteralParameters)1 ScalarOperator (com.facebook.presto.spi.function.ScalarOperator)1 SqlNullable (com.facebook.presto.spi.function.SqlNullable)1 DynamicSliceOutput (io.airlift.slice.DynamicSliceOutput)1 SliceOutput (io.airlift.slice.SliceOutput)1 IntArrayList (it.unimi.dsi.fastutil.ints.IntArrayList)1 IntList (it.unimi.dsi.fastutil.ints.IntList)1