Search in sources :

Example 16 with HyperLogLog

use of com.clearspring.analytics.stream.cardinality.HyperLogLog in project pinot by linkedin.

the class HllUtil method singleValueHllAsString.

/**
   * Generate a hll from a single value, and convert it to string type.
   * It is used for default derived field value.
   * @param log2m
   * @param value
   * @return
   */
public static String singleValueHllAsString(int log2m, Object value) {
    HyperLogLog hll = new HyperLogLog(log2m);
    hll.offer(value);
    return convertHllToString(hll);
}
Also used : HyperLogLog(com.clearspring.analytics.stream.cardinality.HyperLogLog)

Example 17 with HyperLogLog

use of com.clearspring.analytics.stream.cardinality.HyperLogLog in project drill by apache.

the class HLLMergedStatistic method merge.

@Override
public void merge(MapVector input) {
    // Check the input is a Map Vector
    assert (input.getField().getType().getMinorType() == TypeProtos.MinorType.MAP);
    for (ValueVector vv : input) {
        String colName = vv.getField().getName();
        HyperLogLog colHLLHolder = null;
        if (hllHolder.get(colName) != null) {
            colHLLHolder = hllHolder.get(colName);
        }
        NullableVarBinaryVector hllVector = (NullableVarBinaryVector) vv;
        NullableVarBinaryVector.Accessor accessor = hllVector.getAccessor();
        try {
            if (!accessor.isNull(0)) {
                ByteArrayInputStream bais = new ByteArrayInputStream(accessor.get(0), 0, vv.getBufferSize());
                HyperLogLog other = HyperLogLog.Builder.build(new DataInputStream(bais));
                if (colHLLHolder != null) {
                    colHLLHolder.addAll(other);
                    hllHolder.put(colName, colHLLHolder);
                } else {
                    hllHolder.put(colName, other);
                }
            }
        } catch (Exception ex) {
        // TODO: Catch IOException/CardinalityMergeException
        // TODO: logger
        }
    }
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) ByteArrayInputStream(java.io.ByteArrayInputStream) NullableVarBinaryVector(org.apache.drill.exec.vector.NullableVarBinaryVector) DataInputStream(java.io.DataInputStream) HyperLogLog(com.clearspring.analytics.stream.cardinality.HyperLogLog) IOException(java.io.IOException)

Example 18 with HyperLogLog

use of com.clearspring.analytics.stream.cardinality.HyperLogLog in project drill by apache.

the class NDVMergedStatistic method merge.

@Override
public void merge(MapVector input) {
    // Check the input is a Map Vector
    assert (input.getField().getType().getMinorType() == TypeProtos.MinorType.MAP);
    // Dependencies have been configured correctly
    assert (state == State.MERGE);
    for (ValueVector vv : input) {
        String colName = vv.getField().getName();
        HyperLogLog colHLLHolder = null;
        if (hllHolder.get(colName) != null) {
            colHLLHolder = hllHolder.get(colName);
        }
        NullableVarBinaryVector hllVector = (NullableVarBinaryVector) vv;
        NullableVarBinaryVector.Accessor accessor = hllVector.getAccessor();
        try {
            if (!accessor.isNull(0)) {
                ByteArrayInputStream bais = new ByteArrayInputStream(accessor.get(0), 0, vv.getBufferSize());
                HyperLogLog other = HyperLogLog.Builder.build(new DataInputStream(bais));
                if (colHLLHolder != null) {
                    colHLLHolder.addAll(other);
                    hllHolder.put(colName, colHLLHolder);
                } else {
                    hllHolder.put(colName, other);
                }
            }
        } catch (CardinalityMergeException ex) {
            throw new IllegalStateException("Failed to merge the NDV statistics");
        } catch (Exception ex) {
            throw new IllegalStateException(ex);
        }
    }
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) ByteArrayInputStream(java.io.ByteArrayInputStream) NullableVarBinaryVector(org.apache.drill.exec.vector.NullableVarBinaryVector) DataInputStream(java.io.DataInputStream) HyperLogLog(com.clearspring.analytics.stream.cardinality.HyperLogLog) CardinalityMergeException(com.clearspring.analytics.stream.cardinality.CardinalityMergeException) IOException(java.io.IOException) CardinalityMergeException(com.clearspring.analytics.stream.cardinality.CardinalityMergeException)

Aggregations

HyperLogLog (com.clearspring.analytics.stream.cardinality.HyperLogLog)18 CardinalityMergeException (com.clearspring.analytics.stream.cardinality.CardinalityMergeException)8 ValueVector (org.apache.drill.exec.vector.ValueVector)4 IOException (java.io.IOException)3 NullableVarBinaryVector (org.apache.drill.exec.vector.NullableVarBinaryVector)3 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)2 ByteArrayInputStream (java.io.ByteArrayInputStream)2 DataInputStream (java.io.DataInputStream)2 Dictionary (com.linkedin.pinot.core.segment.index.readers.Dictionary)1 NullableBigIntVector (org.apache.drill.exec.vector.NullableBigIntVector)1 Test (org.testng.annotations.Test)1