Search in sources :

Example 11 with HyperLogLog

use of com.clearspring.analytics.stream.cardinality.HyperLogLog in project pinot by linkedin.

the class DistinctCountHLLMVAggregationFunction method aggregate.

@Override
public void aggregate(int length, @Nonnull AggregationResultHolder aggregationResultHolder, @Nonnull BlockValSet... blockValSets) {
    HyperLogLog hyperLogLog = aggregationResultHolder.getResult();
    if (hyperLogLog == null) {
        hyperLogLog = new HyperLogLog(HllConstants.DEFAULT_LOG2M);
        aggregationResultHolder.setValue(hyperLogLog);
    }
    FieldSpec.DataType valueType = blockValSets[0].getValueType();
    switch(valueType) {
        case INT:
            int[][] intValues = blockValSets[0].getIntValuesMV();
            for (int i = 0; i < length; i++) {
                for (int value : intValues[i]) {
                    hyperLogLog.offer(value);
                }
            }
            break;
        case LONG:
            long[][] longValues = blockValSets[0].getLongValuesMV();
            for (int i = 0; i < length; i++) {
                for (long value : longValues[i]) {
                    hyperLogLog.offer(Long.valueOf(value).hashCode());
                }
            }
            break;
        case FLOAT:
            float[][] floatValues = blockValSets[0].getFloatValuesMV();
            for (int i = 0; i < length; i++) {
                for (float value : floatValues[i]) {
                    hyperLogLog.offer(Float.valueOf(value).hashCode());
                }
            }
            break;
        case DOUBLE:
            double[][] doubleValues = blockValSets[0].getDoubleValuesMV();
            for (int i = 0; i < length; i++) {
                for (double value : doubleValues[i]) {
                    hyperLogLog.offer(Double.valueOf(value).hashCode());
                }
            }
            break;
        case STRING:
            String[][] stringValues = blockValSets[0].getStringValuesMV();
            for (int i = 0; i < length; i++) {
                for (String value : stringValues[i]) {
                    hyperLogLog.offer(value.hashCode());
                }
            }
            break;
        default:
            throw new IllegalArgumentException("Illegal data type for distinct count aggregation function: " + valueType);
    }
}
Also used : FieldSpec(com.linkedin.pinot.common.data.FieldSpec) HyperLogLog(com.clearspring.analytics.stream.cardinality.HyperLogLog)

Example 12 with HyperLogLog

use of com.clearspring.analytics.stream.cardinality.HyperLogLog in project pinot by linkedin.

the class FastHLLAggregationFunction method aggregateGroupBySV.

@Override
public void aggregateGroupBySV(int length, @Nonnull int[] groupKeyArray, @Nonnull GroupByResultHolder groupByResultHolder, @Nonnull BlockValSet... blockValSets) {
    String[] valueArray = blockValSets[0].getStringValuesSV();
    for (int i = 0; i < length; i++) {
        int groupKey = groupKeyArray[i];
        HyperLogLog hyperLogLog = groupByResultHolder.getResult(groupKey);
        if (hyperLogLog == null) {
            hyperLogLog = new HyperLogLog(_log2m);
            groupByResultHolder.setValueForKey(groupKey, hyperLogLog);
        }
        try {
            hyperLogLog.addAll(HllUtil.convertStringToHll(valueArray[i]));
        } catch (CardinalityMergeException e) {
            throw new RuntimeException("Caught exception while aggregating HyperLogLog.", e);
        }
    }
}
Also used : HyperLogLog(com.clearspring.analytics.stream.cardinality.HyperLogLog) CardinalityMergeException(com.clearspring.analytics.stream.cardinality.CardinalityMergeException)

Example 13 with HyperLogLog

use of com.clearspring.analytics.stream.cardinality.HyperLogLog in project pinot by linkedin.

the class FastHLLMVAggregationFunction method aggregateGroupBySV.

@Override
public void aggregateGroupBySV(int length, @Nonnull int[] groupKeyArray, @Nonnull GroupByResultHolder groupByResultHolder, @Nonnull BlockValSet... blockValSets) {
    String[][] valuesArray = blockValSets[0].getStringValuesMV();
    for (int i = 0; i < length; i++) {
        int groupKey = groupKeyArray[i];
        HyperLogLog hyperLogLog = groupByResultHolder.getResult(groupKey);
        if (hyperLogLog == null) {
            hyperLogLog = new HyperLogLog(_log2m);
            groupByResultHolder.setValueForKey(groupKey, hyperLogLog);
        }
        try {
            for (String value : valuesArray[i]) {
                hyperLogLog.addAll(HllUtil.convertStringToHll(value));
            }
        } catch (CardinalityMergeException e) {
            throw new RuntimeException("Caught exception while aggregating HyperLogLog.", e);
        }
    }
}
Also used : HyperLogLog(com.clearspring.analytics.stream.cardinality.HyperLogLog) CardinalityMergeException(com.clearspring.analytics.stream.cardinality.CardinalityMergeException)

Example 14 with HyperLogLog

use of com.clearspring.analytics.stream.cardinality.HyperLogLog in project pinot by linkedin.

the class FastHLLMVAggregationFunction method aggregate.

@Override
public void aggregate(int length, @Nonnull AggregationResultHolder aggregationResultHolder, @Nonnull BlockValSet... blockValSets) {
    String[][] valuesArray = blockValSets[0].getStringValuesMV();
    HyperLogLog hyperLogLog = aggregationResultHolder.getResult();
    if (hyperLogLog == null) {
        hyperLogLog = new HyperLogLog(_log2m);
        aggregationResultHolder.setValue(hyperLogLog);
    }
    for (int i = 0; i < length; i++) {
        try {
            for (String value : valuesArray[i]) {
                hyperLogLog.addAll(HllUtil.convertStringToHll(value));
            }
        } catch (CardinalityMergeException e) {
            throw new RuntimeException("Caught exception while aggregating HyperLogLog.", e);
        }
    }
}
Also used : HyperLogLog(com.clearspring.analytics.stream.cardinality.HyperLogLog) CardinalityMergeException(com.clearspring.analytics.stream.cardinality.CardinalityMergeException)

Example 15 with HyperLogLog

use of com.clearspring.analytics.stream.cardinality.HyperLogLog in project pinot by linkedin.

the class DistinctCountHLLAggregationFunction method setValueForGroupKey.

/**
   * Helper method to set value for a groupKey into the result holder.
   *
   * @param groupByResultHolder Result holder
   * @param groupKey Group-key for which to set the value
   * @param value Value for the group key
   */
private void setValueForGroupKey(@Nonnull GroupByResultHolder groupByResultHolder, int groupKey, int value) {
    HyperLogLog hyperLogLog = groupByResultHolder.getResult(groupKey);
    if (hyperLogLog == null) {
        hyperLogLog = new HyperLogLog(HllConstants.DEFAULT_LOG2M);
        groupByResultHolder.setValueForKey(groupKey, hyperLogLog);
    }
    hyperLogLog.offer(value);
}
Also used : HyperLogLog(com.clearspring.analytics.stream.cardinality.HyperLogLog)

Aggregations

HyperLogLog (com.clearspring.analytics.stream.cardinality.HyperLogLog)18 CardinalityMergeException (com.clearspring.analytics.stream.cardinality.CardinalityMergeException)8 ValueVector (org.apache.drill.exec.vector.ValueVector)4 IOException (java.io.IOException)3 NullableVarBinaryVector (org.apache.drill.exec.vector.NullableVarBinaryVector)3 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)2 ByteArrayInputStream (java.io.ByteArrayInputStream)2 DataInputStream (java.io.DataInputStream)2 Dictionary (com.linkedin.pinot.core.segment.index.readers.Dictionary)1 NullableBigIntVector (org.apache.drill.exec.vector.NullableBigIntVector)1 Test (org.testng.annotations.Test)1