use of com.facebook.airlift.stats.cardinality.HyperLogLog in project presto by prestodb.
the class KHyperLogLog method uniquenessDistribution.
public Long2DoubleMap uniquenessDistribution(long histogramSize) {
Long2DoubleMap out = new Long2DoubleOpenHashMap();
PrimitiveIterator.OfLong iterator = LongStream.rangeClosed(1, histogramSize).iterator();
while (iterator.hasNext()) {
// Initialize all entries to zero
out.put(iterator.nextLong(), 0D);
}
int size = minhash.size();
for (HyperLogLog hll : minhash.values()) {
long bucket = Math.min(hll.cardinality(), histogramSize);
out.merge(bucket, (double) 1 / size, Double::sum);
}
return out;
}
use of com.facebook.airlift.stats.cardinality.HyperLogLog in project presto by prestodb.
the class KHyperLogLog method removeOverflowEntries.
private void removeOverflowEntries() {
while (minhash.size() > maxSize) {
HyperLogLog hll = minhash.remove(minhash.lastLongKey());
decreaseTotalHllSize(hll);
}
}
use of com.facebook.airlift.stats.cardinality.HyperLogLog in project presto by prestodb.
the class KHyperLogLog method update.
private void update(long hash, long uii) {
if (!(minhash.containsKey(hash) || isExact() || hash < minhash.lastLongKey())) {
return;
}
HyperLogLog hll = minhash.computeIfAbsent(hash, k -> {
HyperLogLog newHll = HyperLogLog.newInstance(hllBuckets);
increaseTotalHllSize(newHll);
return newHll;
});
decreaseTotalHllSize(hll);
hll.add(uii);
increaseTotalHllSize(hll);
removeOverflowEntries();
}
use of com.facebook.airlift.stats.cardinality.HyperLogLog in project presto by prestodb.
the class SetDigest method newInstance.
public static SetDigest newInstance(Slice serialized) {
requireNonNull(serialized, "serialized is null");
SliceInput input = serialized.getInput();
checkArgument(input.readByte() == UNCOMPRESSED_FORMAT, "Unexpected version");
int hllLength = input.readInt();
Slice serializedHll = Slices.allocate(hllLength);
input.readBytes(serializedHll, hllLength);
HyperLogLog hll = HyperLogLog.newInstance(serializedHll);
Long2ShortRBTreeMap minhash = new Long2ShortRBTreeMap();
int maxHashes = input.readInt();
int minhashLength = input.readInt();
// The values are stored after the keys
SliceInput valuesInput = serialized.getInput();
valuesInput.setPosition(input.position() + minhashLength * SIZE_OF_LONG);
for (int i = 0; i < minhashLength; i++) {
minhash.put(input.readLong(), valuesInput.readShort());
}
return new SetDigest(maxHashes, hll, minhash);
}
use of com.facebook.airlift.stats.cardinality.HyperLogLog in project presto by prestodb.
the class CreateHll method createHll.
@ScalarFunction
@SqlType(StandardTypes.HYPER_LOG_LOG)
public static Slice createHll(@SqlType(StandardTypes.BIGINT) long value, @SqlType(StandardTypes.DOUBLE) double maxStandardError) {
HyperLogLog hll = HyperLogLog.newInstance(standardErrorToBuckets(maxStandardError));
hll.add(value);
return hll.serialize();
}
Aggregations