Search in sources :

Example 26 with HyperLogLogPlus

use of com.clearspring.analytics.stream.cardinality.HyperLogLogPlus in project Gaffer by gchq.

the class GetDataFrameOfElementsHandlerTest method getElementsForUserDefinedConversion.

private static List<Element> getElementsForUserDefinedConversion() {
    final List<Element> elements = new ArrayList<>();
    final FreqMap freqMap = new FreqMap();
    freqMap.put("W", 10L);
    freqMap.put("X", 100L);
    final HyperLogLogPlus hllpp = new HyperLogLogPlus(5, 5);
    hllpp.offer("AAA");
    final Entity entity = new Entity.Builder().group(TestGroups.ENTITY).vertex("A").property("freqMap", freqMap).property("hllpp", hllpp).property("myProperty", new MyProperty(10)).build();
    elements.add(entity);
    final Edge edge = new Edge.Builder().group(TestGroups.EDGE).source("B").dest("C").directed(true).build();
    final FreqMap freqMap2 = new FreqMap();
    freqMap2.put("Y", 1000L);
    freqMap2.put("Z", 10000L);
    edge.putProperty("freqMap", freqMap2);
    final HyperLogLogPlus hllpp2 = new HyperLogLogPlus(5, 5);
    hllpp2.offer("AAA");
    hllpp2.offer("BBB");
    edge.putProperty("hllpp", hllpp2);
    edge.putProperty("myProperty", new MyProperty(50));
    elements.add(edge);
    return elements;
}
Also used : Entity(uk.gov.gchq.gaffer.data.element.Entity) FreqMap(uk.gov.gchq.gaffer.types.FreqMap) HyperLogLogPlus(com.clearspring.analytics.stream.cardinality.HyperLogLogPlus) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayList(java.util.ArrayList) Edge(uk.gov.gchq.gaffer.data.element.Edge)

Example 27 with HyperLogLogPlus

use of com.clearspring.analytics.stream.cardinality.HyperLogLogPlus in project Gaffer by gchq.

the class GetDataFrameOfElementsHandlerTest method checkCanDealWithUserDefinedConversion.

@Test
public void checkCanDealWithUserDefinedConversion() throws OperationException {
    final Graph graph = getGraph("/schema-DataFrame/elementsUserDefinedConversion.json", getElementsForUserDefinedConversion());
    // Edges group - check get correct edges
    final List<Converter> converters = new ArrayList<>();
    converters.add(new MyPropertyConverter());
    GetDataFrameOfElements dfOperation = new GetDataFrameOfElements.Builder().view(new View.Builder().edge(EDGE_GROUP).build()).converters(converters).build();
    Dataset<Row> dataFrame = graph.execute(dfOperation, new User());
    Set<Row> results = new HashSet<>(dataFrame.collectAsList());
    final Set<Row> expectedRows = new HashSet<>();
    final MutableList<Object> fields1 = new MutableList<>();
    Map<String, Long> freqMap = Map$.MODULE$.empty();
    freqMap.put("Y", 1000L);
    freqMap.put("Z", 10000L);
    fields1.appendElem(EDGE_GROUP);
    fields1.appendElem("B");
    fields1.appendElem("C");
    fields1.appendElem(true);
    fields1.appendElem(null);
    fields1.appendElem(freqMap);
    final HyperLogLogPlus hllpp = new HyperLogLogPlus(5, 5);
    hllpp.offer("AAA");
    hllpp.offer("BBB");
    fields1.appendElem(hllpp.cardinality());
    fields1.appendElem(50);
    expectedRows.add(Row$.MODULE$.fromSeq(fields1));
    assertEquals(expectedRows, results);
    // Entities group - check get correct entities
    dfOperation = new GetDataFrameOfElements.Builder().view(new View.Builder().entity(ENTITY_GROUP).build()).converters(converters).build();
    dataFrame = graph.execute(dfOperation, new User());
    results.clear();
    results.addAll(dataFrame.collectAsList());
    expectedRows.clear();
    fields1.clear();
    freqMap.clear();
    freqMap.put("W", 10L);
    freqMap.put("X", 100L);
    fields1.appendElem(ENTITY_GROUP);
    fields1.appendElem("A");
    fields1.appendElem(freqMap);
    final HyperLogLogPlus hllpp2 = new HyperLogLogPlus(5, 5);
    hllpp2.offer("AAA");
    fields1.appendElem(hllpp2.cardinality());
    fields1.appendElem(10);
    expectedRows.add(Row$.MODULE$.fromSeq(fields1));
    assertEquals(expectedRows, results);
}
Also used : GetDataFrameOfElements(uk.gov.gchq.gaffer.spark.operation.dataframe.GetDataFrameOfElements) User(uk.gov.gchq.gaffer.user.User) ArrayList(java.util.ArrayList) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) Graph(uk.gov.gchq.gaffer.graph.Graph) MutableList(scala.collection.mutable.MutableList) HyperLogLogPlus(com.clearspring.analytics.stream.cardinality.HyperLogLogPlus) Converter(uk.gov.gchq.gaffer.spark.operation.dataframe.converter.property.Converter) Row(org.apache.spark.sql.Row) HashSet(java.util.HashSet) Test(org.junit.jupiter.api.Test)

Example 28 with HyperLogLogPlus

use of com.clearspring.analytics.stream.cardinality.HyperLogLogPlus in project shifu by ShifuML.

the class UpdateBinningInfoReducer method reduce.

@Override
protected void reduce(IntWritable key, Iterable<BinningInfoWritable> values, Context context) throws IOException, InterruptedException {
    long start = System.currentTimeMillis();
    double sum = 0d;
    double squaredSum = 0d;
    double tripleSum = 0d;
    double quarticSum = 0d;
    double p25th = 0d;
    double median = 0d;
    double p75th = 0d;
    long count = 0L, missingCount = 0L;
    double min = Double.MAX_VALUE, max = Double.MIN_VALUE;
    List<Double> binBoundaryList = null;
    List<String> binCategories = null;
    long[] binCountPos = null;
    long[] binCountNeg = null;
    double[] binWeightPos = null;
    double[] binWeightNeg = null;
    long[] binCountTotal = null;
    int columnConfigIndex = key.get() >= this.columnConfigList.size() ? key.get() % this.columnConfigList.size() : key.get();
    ColumnConfig columnConfig = this.columnConfigList.get(columnConfigIndex);
    HyperLogLogPlus hyperLogLogPlus = null;
    Set<String> fis = new HashSet<String>();
    long totalCount = 0, invalidCount = 0, validNumCount = 0;
    int binSize = 0;
    for (BinningInfoWritable info : values) {
        if (info.isEmpty()) {
            // mapper has no stats, skip it
            continue;
        }
        CountAndFrequentItemsWritable cfiw = info.getCfiw();
        totalCount += cfiw.getCount();
        invalidCount += cfiw.getInvalidCount();
        validNumCount += cfiw.getValidNumCount();
        fis.addAll(cfiw.getFrequetItems());
        if (hyperLogLogPlus == null) {
            hyperLogLogPlus = HyperLogLogPlus.Builder.build(cfiw.getHyperBytes());
        } else {
            try {
                hyperLogLogPlus = (HyperLogLogPlus) hyperLogLogPlus.merge(HyperLogLogPlus.Builder.build(cfiw.getHyperBytes()));
            } catch (CardinalityMergeException e) {
                throw new RuntimeException(e);
            }
        }
        if (columnConfig.isHybrid() && binBoundaryList == null && binCategories == null) {
            binBoundaryList = info.getBinBoundaries();
            binCategories = info.getBinCategories();
            binSize = binBoundaryList.size() + binCategories.size();
            binCountPos = new long[binSize + 1];
            binCountNeg = new long[binSize + 1];
            binWeightPos = new double[binSize + 1];
            binWeightNeg = new double[binSize + 1];
            binCountTotal = new long[binSize + 1];
        } else if (columnConfig.isNumerical() && binBoundaryList == null) {
            binBoundaryList = info.getBinBoundaries();
            binSize = binBoundaryList.size();
            binCountPos = new long[binSize + 1];
            binCountNeg = new long[binSize + 1];
            binWeightPos = new double[binSize + 1];
            binWeightNeg = new double[binSize + 1];
            binCountTotal = new long[binSize + 1];
        } else if (columnConfig.isCategorical() && binCategories == null) {
            binCategories = info.getBinCategories();
            binSize = binCategories.size();
            binCountPos = new long[binSize + 1];
            binCountNeg = new long[binSize + 1];
            binWeightPos = new double[binSize + 1];
            binWeightNeg = new double[binSize + 1];
            binCountTotal = new long[binSize + 1];
        }
        count += info.getTotalCount();
        missingCount += info.getMissingCount();
        // for numeric, such sums are OK, for categorical, such values are all 0, should be updated by using
        // binCountPos and binCountNeg
        sum += info.getSum();
        squaredSum += info.getSquaredSum();
        tripleSum += info.getTripleSum();
        quarticSum += info.getQuarticSum();
        if (Double.compare(max, info.getMax()) < 0) {
            max = info.getMax();
        }
        if (Double.compare(min, info.getMin()) > 0) {
            min = info.getMin();
        }
        for (int i = 0; i < (binSize + 1); i++) {
            binCountPos[i] += info.getBinCountPos()[i];
            binCountNeg[i] += info.getBinCountNeg()[i];
            binWeightPos[i] += info.getBinWeightPos()[i];
            binWeightNeg[i] += info.getBinWeightNeg()[i];
            binCountTotal[i] += info.getBinCountPos()[i];
            binCountTotal[i] += info.getBinCountNeg()[i];
        }
    }
    if (columnConfig.isNumerical()) {
        long p25Count = count / 4;
        long medianCount = p25Count * 2;
        long p75Count = p25Count * 3;
        p25th = min;
        median = min;
        p75th = min;
        int currentCount = 0;
        for (int i = 0; i < binBoundaryList.size(); i++) {
            double left = getCutoffBoundary(binBoundaryList.get(i), max, min);
            double right = ((i == binBoundaryList.size() - 1) ? max : getCutoffBoundary(binBoundaryList.get(i + 1), max, min));
            if (p25Count >= currentCount && p25Count < currentCount + binCountTotal[i]) {
                p25th = ((p25Count - currentCount) / (double) binCountTotal[i]) * (right - left) + left;
            }
            if (medianCount >= currentCount && medianCount < currentCount + binCountTotal[i]) {
                median = ((medianCount - currentCount) / (double) binCountTotal[i]) * (right - left) + left;
            }
            if (p75Count >= currentCount && p75Count < currentCount + binCountTotal[i]) {
                p75th = ((p75Count - currentCount) / (double) binCountTotal[i]) * (right - left) + left;
                // when get 75 percentile stop it
                break;
            }
            currentCount += binCountTotal[i];
        }
        LOG.info("Coloumn num is {}, p25 value is {}, median value is {}, p75 value is {}", columnConfig.getColumnNum(), p25th, median, p75th);
    }
    LOG.info("Coloumn num is {}, columnType value is {}, cateMaxNumBin is {}, binCategory size is {}", columnConfig.getColumnNum(), columnConfig.getColumnType(), modelConfig.getStats().getCateMaxNumBin(), (CollectionUtils.isNotEmpty(columnConfig.getBinCategory()) ? columnConfig.getBinCategory().size() : 0));
    // To merge categorical binning
    if (columnConfig.isCategorical() && modelConfig.getStats().getCateMaxNumBin() > 0 && CollectionUtils.isNotEmpty(binCategories) && binCategories.size() > modelConfig.getStats().getCateMaxNumBin()) {
        // only category size large then expected max bin number
        CateBinningStats cateBinningStats = rebinCategoricalValues(new CateBinningStats(binCategories, binCountPos, binCountNeg, binWeightPos, binWeightNeg));
        LOG.info("For variable - {}, {} bins is rebined to {} bins", columnConfig.getColumnName(), binCategories.size(), cateBinningStats.binCategories.size());
        binCategories = cateBinningStats.binCategories;
        binCountPos = cateBinningStats.binCountPos;
        binCountNeg = cateBinningStats.binCountNeg;
        binWeightPos = cateBinningStats.binWeightPos;
        binWeightNeg = cateBinningStats.binWeightNeg;
    }
    double[] binPosRate;
    if (modelConfig.isRegression()) {
        binPosRate = computePosRate(binCountPos, binCountNeg);
    } else {
        // for multiple classfication, use rate of categories to compute a value
        binPosRate = computeRateForMultiClassfication(binCountPos);
    }
    String binBounString = null;
    if (columnConfig.isHybrid()) {
        if (binCategories.size() > this.maxCateSize) {
            LOG.warn("Column {} {} with invalid bin category size.", key.get(), columnConfig.getColumnName(), binCategories.size());
            return;
        }
        binBounString = binBoundaryList.toString();
        binBounString += Constants.HYBRID_BIN_STR_DILIMETER + Base64Utils.base64Encode("[" + StringUtils.join(binCategories, CalculateStatsUDF.CATEGORY_VAL_SEPARATOR) + "]");
    } else if (columnConfig.isCategorical()) {
        if (binCategories.size() > this.maxCateSize) {
            LOG.warn("Column {} {} with invalid bin category size.", key.get(), columnConfig.getColumnName(), binCategories.size());
            return;
        }
        binBounString = Base64Utils.base64Encode("[" + StringUtils.join(binCategories, CalculateStatsUDF.CATEGORY_VAL_SEPARATOR) + "]");
        // recompute such value for categorical variables
        min = Double.MAX_VALUE;
        max = Double.MIN_VALUE;
        sum = 0d;
        squaredSum = 0d;
        for (int i = 0; i < binPosRate.length; i++) {
            if (!Double.isNaN(binPosRate[i])) {
                if (Double.compare(max, binPosRate[i]) < 0) {
                    max = binPosRate[i];
                }
                if (Double.compare(min, binPosRate[i]) > 0) {
                    min = binPosRate[i];
                }
                long binCount = binCountPos[i] + binCountNeg[i];
                sum += binPosRate[i] * binCount;
                double squaredVal = binPosRate[i] * binPosRate[i];
                squaredSum += squaredVal * binCount;
                tripleSum += squaredVal * binPosRate[i] * binCount;
                quarticSum += squaredVal * squaredVal * binCount;
            }
        }
    } else {
        if (binBoundaryList.size() == 0) {
            LOG.warn("Column {} {} with invalid bin boundary size.", key.get(), columnConfig.getColumnName(), binBoundaryList.size());
            return;
        }
        binBounString = binBoundaryList.toString();
    }
    ColumnMetrics columnCountMetrics = null;
    ColumnMetrics columnWeightMetrics = null;
    if (modelConfig.isRegression()) {
        columnCountMetrics = ColumnStatsCalculator.calculateColumnMetrics(binCountNeg, binCountPos);
        columnWeightMetrics = ColumnStatsCalculator.calculateColumnMetrics(binWeightNeg, binWeightPos);
    }
    // To make it be consistent with SPDT, missingCount is excluded to compute mean, stddev ...
    long realCount = this.statsExcludeMissingValue ? (count - missingCount) : count;
    double mean = sum / realCount;
    double stdDev = Math.sqrt(Math.abs((squaredSum - (sum * sum) / realCount + EPS) / (realCount - 1)));
    double aStdDev = Math.sqrt(Math.abs((squaredSum - (sum * sum) / realCount + EPS) / realCount));
    double skewness = ColumnStatsCalculator.computeSkewness(realCount, mean, aStdDev, sum, squaredSum, tripleSum);
    double kurtosis = ColumnStatsCalculator.computeKurtosis(realCount, mean, aStdDev, sum, squaredSum, tripleSum, quarticSum);
    sb.append(key.get()).append(Constants.DEFAULT_DELIMITER).append(binBounString).append(Constants.DEFAULT_DELIMITER).append(Arrays.toString(binCountNeg)).append(Constants.DEFAULT_DELIMITER).append(Arrays.toString(binCountPos)).append(Constants.DEFAULT_DELIMITER).append(Arrays.toString(new double[0])).append(Constants.DEFAULT_DELIMITER).append(Arrays.toString(binPosRate)).append(Constants.DEFAULT_DELIMITER).append(columnCountMetrics == null ? "" : df.format(columnCountMetrics.getKs())).append(Constants.DEFAULT_DELIMITER).append(columnCountMetrics == null ? "" : df.format(columnCountMetrics.getIv())).append(Constants.DEFAULT_DELIMITER).append(df.format(max)).append(Constants.DEFAULT_DELIMITER).append(df.format(min)).append(Constants.DEFAULT_DELIMITER).append(df.format(mean)).append(Constants.DEFAULT_DELIMITER).append(df.format(stdDev)).append(Constants.DEFAULT_DELIMITER).append(columnConfig.getColumnType().toString()).append(Constants.DEFAULT_DELIMITER).append(median).append(Constants.DEFAULT_DELIMITER).append(missingCount).append(Constants.DEFAULT_DELIMITER).append(count).append(Constants.DEFAULT_DELIMITER).append(missingCount * 1.0d / count).append(Constants.DEFAULT_DELIMITER).append(Arrays.toString(binWeightNeg)).append(Constants.DEFAULT_DELIMITER).append(Arrays.toString(binWeightPos)).append(Constants.DEFAULT_DELIMITER).append(columnCountMetrics == null ? "" : columnCountMetrics.getWoe()).append(Constants.DEFAULT_DELIMITER).append(columnWeightMetrics == null ? "" : columnWeightMetrics.getWoe()).append(Constants.DEFAULT_DELIMITER).append(columnWeightMetrics == null ? "" : columnWeightMetrics.getKs()).append(Constants.DEFAULT_DELIMITER).append(columnWeightMetrics == null ? "" : columnWeightMetrics.getIv()).append(Constants.DEFAULT_DELIMITER).append(columnCountMetrics == null ? Arrays.toString(new double[binSize + 1]) : columnCountMetrics.getBinningWoe().toString()).append(Constants.DEFAULT_DELIMITER).append(columnWeightMetrics == null ? Arrays.toString(new double[binSize + 1]) : // bin weighted WOE
    columnWeightMetrics.getBinningWoe().toString()).append(Constants.DEFAULT_DELIMITER).append(// skewness
    skewness).append(Constants.DEFAULT_DELIMITER).append(// kurtosis
    kurtosis).append(Constants.DEFAULT_DELIMITER).append(// total count
    totalCount).append(Constants.DEFAULT_DELIMITER).append(// invalid count
    invalidCount).append(Constants.DEFAULT_DELIMITER).append(// valid num count
    validNumCount).append(Constants.DEFAULT_DELIMITER).append(// cardinality
    hyperLogLogPlus.cardinality()).append(Constants.DEFAULT_DELIMITER).append(// frequent items
    Base64Utils.base64Encode(limitedFrequentItems(fis))).append(Constants.DEFAULT_DELIMITER).append(// the 25 percentile value
    p25th).append(Constants.DEFAULT_DELIMITER).append(p75th);
    outputValue.set(sb.toString());
    context.write(NullWritable.get(), outputValue);
    sb.delete(0, sb.length());
    LOG.debug("Time:{}", (System.currentTimeMillis() - start));
}
Also used : CountAndFrequentItemsWritable(ml.shifu.shifu.core.autotype.CountAndFrequentItemsWritable) ColumnConfig(ml.shifu.shifu.container.obj.ColumnConfig) CardinalityMergeException(com.clearspring.analytics.stream.cardinality.CardinalityMergeException) HyperLogLogPlus(com.clearspring.analytics.stream.cardinality.HyperLogLogPlus) ColumnMetrics(ml.shifu.shifu.core.ColumnStatsCalculator.ColumnMetrics)

Example 29 with HyperLogLogPlus

use of com.clearspring.analytics.stream.cardinality.HyperLogLogPlus in project Gaffer by gchq.

the class LoadAndQuery9 method run.

public Iterable<Entity> run() throws OperationException {
    // [user] Create a user
    // ---------------------------------------------------------
    final User user = new User("user01");
    // ---------------------------------------------------------
    // [graph] create a graph using our schema and store properties
    // ---------------------------------------------------------
    final Graph graph = new Graph.Builder().addSchemas(getSchemas()).storeProperties(getStoreProperties()).build();
    // ---------------------------------------------------------
    // [add] add the edges to the graph
    // ---------------------------------------------------------
    final OperationChain addOpChain = new OperationChain.Builder().first(new GenerateElements.Builder<String>().generator(new DataGenerator9()).objects(DataUtils.loadData(getData())).build()).then(new AddElements()).build();
    graph.execute(addOpChain, user);
    // ---------------------------------------------------------
    // [get] Get all edges
    // ---------------------------------------------------------
    final Iterable<Edge> edges = graph.execute(new GetAllEdges(), user);
    // ---------------------------------------------------------
    log("\nAll edges:");
    for (final Edge edge : edges) {
        log("GET_ALL_EDGES_RESULT", edge.toString());
    }
    // [get all cardinalities] Get all cardinalities
    // ---------------------------------------------------------
    final GetAllEntities getAllCardinalities = new GetAllEntities.Builder().view(new View.Builder().entity("Cardinality").build()).build();
    // ---------------------------------------------------------
    final CloseableIterable<Entity> allCardinalities = graph.execute(getAllCardinalities, user);
    log("\nAll cardinalities");
    for (final Entity cardinality : allCardinalities) {
        final String edgeGroup = (cardinality.getProperty("edgeGroup")).toString();
        log("ALL_CARDINALITIES_RESULT", "Vertex " + cardinality.getVertex() + " " + edgeGroup + ": " + ((HyperLogLogPlus) cardinality.getProperty("hllp")).cardinality());
    }
    // [get all summarised cardinalities] Get all summarised cardinalities over all edges
    // ---------------------------------------------------------
    final GetAllEntities getAllSummarisedCardinalities = new GetAllEntities.Builder().view(new View.Builder().entity("Cardinality", new ViewElementDefinition.Builder().groupBy().build()).build()).build();
    // ---------------------------------------------------------
    final CloseableIterable<Entity> allSummarisedCardinalities = graph.execute(getAllSummarisedCardinalities, user);
    log("\nAll summarised cardinalities");
    for (final Entity cardinality : allSummarisedCardinalities) {
        final String edgeGroup = (cardinality.getProperty("edgeGroup")).toString();
        log("ALL_SUMMARISED_CARDINALITIES_RESULT", "Vertex " + cardinality.getVertex() + " " + edgeGroup + ": " + ((HyperLogLogPlus) cardinality.getProperty("hllp")).cardinality());
    }
    // [get red edge cardinality 1] Get the cardinality value at vertex 1 for red edges
    // ---------------------------------------------------------
    final GetEntities<EntitySeed> getCardinalities = new GetEntities.Builder<EntitySeed>().addSeed(new EntitySeed("1")).view(new View.Builder().entity("Cardinality", new ViewElementDefinition.Builder().preAggregationFilter(new ElementFilter.Builder().select("edgeGroup").execute(new IsEqual(CollectionUtil.treeSet("red"))).build()).build()).build()).build();
    // ---------------------------------------------------------
    final Entity redCardinality = graph.execute(getCardinalities, user).iterator().next();
    // ---------------------------------------------------------
    log("\nRed edge cardinality at vertex 1:");
    final String edgeGroup = (redCardinality.getProperty("edgeGroup")).toString();
    log("CARDINALITY_OF_1_RESULT", "Vertex " + redCardinality.getVertex() + " " + edgeGroup + ": " + ((HyperLogLogPlus) redCardinality.getProperty("hllp")).cardinality());
    return allSummarisedCardinalities;
}
Also used : AddElements(uk.gov.gchq.gaffer.operation.impl.add.AddElements) Entity(uk.gov.gchq.gaffer.data.element.Entity) GetAllEntities(uk.gov.gchq.gaffer.operation.impl.get.GetAllEntities) User(uk.gov.gchq.gaffer.user.User) GetEntities(uk.gov.gchq.gaffer.operation.impl.get.GetEntities) IsEqual(uk.gov.gchq.gaffer.function.filter.IsEqual) DataGenerator9(uk.gov.gchq.gaffer.example.gettingstarted.generator.DataGenerator9) Graph(uk.gov.gchq.gaffer.graph.Graph) HyperLogLogPlus(com.clearspring.analytics.stream.cardinality.HyperLogLogPlus) GetAllEdges(uk.gov.gchq.gaffer.operation.impl.get.GetAllEdges) OperationChain(uk.gov.gchq.gaffer.operation.OperationChain) ElementFilter(uk.gov.gchq.gaffer.data.element.function.ElementFilter) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) Edge(uk.gov.gchq.gaffer.data.element.Edge)

Example 30 with HyperLogLogPlus

use of com.clearspring.analytics.stream.cardinality.HyperLogLogPlus in project Gaffer by gchq.

the class DataGenerator9 method createCardinalityEntity.

private Entity createCardinalityEntity(final Object source, final Object destination, final Edge edge) {
    final HyperLogLogPlus hllp = new HyperLogLogPlus(5, 5);
    hllp.offer(destination);
    return new Entity.Builder().vertex(source).group("Cardinality").property("edgeGroup", CollectionUtil.treeSet(edge.getGroup())).property("hllp", hllp).property("count", 1).build();
}
Also used : HyperLogLogPlus(com.clearspring.analytics.stream.cardinality.HyperLogLogPlus)

Aggregations

HyperLogLogPlus (com.clearspring.analytics.stream.cardinality.HyperLogLogPlus)58 Test (org.junit.jupiter.api.Test)19 Test (org.junit.Test)14 Entity (uk.gov.gchq.gaffer.data.element.Entity)8 AggregateFunctionTest (uk.gov.gchq.gaffer.function.AggregateFunctionTest)6 User (uk.gov.gchq.gaffer.user.User)6 Edge (uk.gov.gchq.gaffer.data.element.Edge)5 Element (uk.gov.gchq.gaffer.data.element.Element)5 Graph (uk.gov.gchq.gaffer.graph.Graph)5 FunctionTest (uk.gov.gchq.koryphe.function.FunctionTest)5 ArrayList (java.util.ArrayList)4 AddElements (uk.gov.gchq.gaffer.operation.impl.add.AddElements)4 HashSet (java.util.HashSet)3 View (uk.gov.gchq.gaffer.data.elementdefinition.view.View)3 SerialisationException (uk.gov.gchq.gaffer.exception.SerialisationException)3 OperationChain (uk.gov.gchq.gaffer.operation.OperationChain)3 EntitySeed (uk.gov.gchq.gaffer.operation.data.EntitySeed)3 GetAllElements (uk.gov.gchq.gaffer.operation.impl.get.GetAllElements)3 CardinalityMergeException (com.clearspring.analytics.stream.cardinality.CardinalityMergeException)2 TreeNode (com.fasterxml.jackson.core.TreeNode)2