Search in sources :

Example 46 with HyperLogLogPlus

use of com.clearspring.analytics.stream.cardinality.HyperLogLogPlus in project gaffer-doc by gchq.

the class HyperLogLogPlusWalkthrough method run.

@Override
public CloseableIterable<? extends Element> run() throws OperationException {
    // / [graph] create a graph using our schema and store properties
    // ---------------------------------------------------------
    final Graph graph = new Graph.Builder().config(getDefaultGraphConfig()).addSchemas(StreamUtil.openStreams(getClass(), schemaPath)).storeProperties(getDefaultStoreProperties()).build();
    // ---------------------------------------------------------
    // [user] Create a user
    // ---------------------------------------------------------
    final User user = new User("user01");
    // ---------------------------------------------------------
    // [add] addElements - add the edges to the graph
    // ---------------------------------------------------------
    final Set<String> dummyData = Collections.singleton("");
    final OperationChain<Void> addOpChain = new OperationChain.Builder().first(new GenerateElements.Builder<String>().generator(new HyperLogLogPlusElementGenerator()).input(dummyData).build()).then(new AddElements()).build();
    graph.execute(addOpChain, user);
    // ---------------------------------------------------------
    print("Added 1000 entities for vertex A, each time w\nith a HyperLogLogPlus containing a vertex that A was seen in an edge with");
    // [get] Get all entities
    // ---------------------------------------------------------
    CloseableIterable<? extends Element> allEntities = graph.execute(new GetAllElements(), user);
    // ---------------------------------------------------------
    print("\nAll edges:");
    for (final Element entity : allEntities) {
        print("GET_ALL_ENTITIES_RESULT", entity.toString());
    }
    // [get the approximate degree of a] Get the entity for A and print out the estimate of the degree
    // ---------------------------------------------------------
    final GetElements query = new GetElements.Builder().input(new EntitySeed("A")).build();
    final Element element;
    try (final CloseableIterable<? extends Element> elements = graph.execute(query, user)) {
        element = elements.iterator().next();
    }
    final HyperLogLogPlus hyperLogLogPlus = (HyperLogLogPlus) element.getProperty("approxCardinality");
    final double approxDegree = hyperLogLogPlus.cardinality();
    final String degreeEstimate = "Entity A has approximate degree " + approxDegree;
    // ---------------------------------------------------------
    print("\nEntity A with an estimate of its degree");
    print("GET_APPROX_DEGREE_FOR_ENTITY_A", degreeEstimate);
    return null;
}
Also used : AddElements(uk.gov.gchq.gaffer.operation.impl.add.AddElements) User(uk.gov.gchq.gaffer.user.User) HyperLogLogPlusElementGenerator(uk.gov.gchq.gaffer.doc.properties.generator.HyperLogLogPlusElementGenerator) Element(uk.gov.gchq.gaffer.data.element.Element) GetElements(uk.gov.gchq.gaffer.operation.impl.get.GetElements) Graph(uk.gov.gchq.gaffer.graph.Graph) HyperLogLogPlus(com.clearspring.analytics.stream.cardinality.HyperLogLogPlus) OperationChain(uk.gov.gchq.gaffer.operation.OperationChain) GetAllElements(uk.gov.gchq.gaffer.operation.impl.get.GetAllElements) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed)

Example 47 with HyperLogLogPlus

use of com.clearspring.analytics.stream.cardinality.HyperLogLogPlus in project gaffer-doc by gchq.

the class HyperLogLogPlusElementGenerator method _apply.

@Override
public Iterable<Element> _apply(final String line) {
    final List<Element> elements = new ArrayList<>();
    for (int i = 0; i < 1000; i++) {
        final HyperLogLogPlus hyperLogLogPlus = new HyperLogLogPlus(8, 8);
        hyperLogLogPlus.offer("B" + i);
        final Entity entity = new Entity.Builder().group("cardinality").vertex("A").property("approxCardinality", hyperLogLogPlus).build();
        elements.add(entity);
    }
    return elements;
}
Also used : Entity(uk.gov.gchq.gaffer.data.element.Entity) HyperLogLogPlus(com.clearspring.analytics.stream.cardinality.HyperLogLogPlus) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayList(java.util.ArrayList)

Example 48 with HyperLogLogPlus

use of com.clearspring.analytics.stream.cardinality.HyperLogLogPlus in project beam by apache.

the class ApproximateDistinctTest method testCoder.

@Test
public void testCoder() throws Exception {
    HyperLogLogPlus hllp = new HyperLogLogPlus(12, 18);
    for (int i = 0; i < 10; i++) {
        hllp.offer(i);
    }
    CoderProperties.coderDecodeEncodeEqual(ApproximateDistinct.HyperLogLogPlusCoder.of(), hllp);
}
Also used : HyperLogLogPlus(com.clearspring.analytics.stream.cardinality.HyperLogLogPlus) Test(org.junit.Test)

Example 49 with HyperLogLogPlus

use of com.clearspring.analytics.stream.cardinality.HyperLogLogPlus in project Gaffer by gchq.

the class RoadTrafficElementGenerator method createCardinality.

protected Entity createCardinality(final Object source, final Object destination, final Edge edge) {
    final HyperLogLogPlus hllp = new HyperLogLogPlus(5, 5);
    hllp.offer(destination);
    return new Entity.Builder().vertex(source).group("Cardinality").property("edgeGroup", CollectionUtil.treeSet(edge.getGroup())).property("hllp", hllp).property("count", 1L).build();
}
Also used : HyperLogLogPlus(com.clearspring.analytics.stream.cardinality.HyperLogLogPlus)

Example 50 with HyperLogLogPlus

use of com.clearspring.analytics.stream.cardinality.HyperLogLogPlus in project Gaffer by gchq.

the class GetDataFrameOfElementsHandlerTest method checkCanDealWithNonStandardProperties.

@Test
public void checkCanDealWithNonStandardProperties() throws OperationException {
    final Graph graph = getGraph("/schema-DataFrame/elementsNonstandardTypes.json", getElementsWithNonStandardProperties());
    // Edges group - check get correct edges
    GetDataFrameOfElements dfOperation = new GetDataFrameOfElements.Builder().view(new View.Builder().edge(EDGE_GROUP).build()).build();
    Dataset<Row> dataFrame = graph.execute(dfOperation, new User());
    Set<Row> results = new HashSet<>(dataFrame.collectAsList());
    final Set<Row> expectedRows = new HashSet<>();
    final MutableList<Object> fields1 = new MutableList<>();
    Map<String, Long> freqMap = Map$.MODULE$.empty();
    freqMap.put("Y", 1000L);
    freqMap.put("Z", 10000L);
    fields1.appendElem(EDGE_GROUP);
    fields1.appendElem("B");
    fields1.appendElem("C");
    fields1.appendElem(true);
    fields1.appendElem(null);
    fields1.appendElem(freqMap);
    final HyperLogLogPlus hllpp = new HyperLogLogPlus(5, 5);
    hllpp.offer("AAA");
    hllpp.offer("BBB");
    fields1.appendElem(hllpp.cardinality());
    expectedRows.add(Row$.MODULE$.fromSeq(fields1));
    assertEquals(expectedRows, results);
    // Entities group - check get correct entities
    dfOperation = new GetDataFrameOfElements.Builder().view(new View.Builder().entity(ENTITY_GROUP).build()).build();
    dataFrame = graph.execute(dfOperation, new User());
    results.clear();
    results.addAll(dataFrame.collectAsList());
    expectedRows.clear();
    fields1.clear();
    freqMap.clear();
    freqMap.put("W", 10L);
    freqMap.put("X", 100L);
    fields1.appendElem(ENTITY_GROUP);
    fields1.appendElem("A");
    fields1.appendElem(freqMap);
    final HyperLogLogPlus hllpp2 = new HyperLogLogPlus(5, 5);
    hllpp2.offer("AAA");
    fields1.appendElem(hllpp2.cardinality());
    expectedRows.add(Row$.MODULE$.fromSeq(fields1));
    assertEquals(expectedRows, results);
}
Also used : GetDataFrameOfElements(uk.gov.gchq.gaffer.spark.operation.dataframe.GetDataFrameOfElements) User(uk.gov.gchq.gaffer.user.User) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) Graph(uk.gov.gchq.gaffer.graph.Graph) MutableList(scala.collection.mutable.MutableList) HyperLogLogPlus(com.clearspring.analytics.stream.cardinality.HyperLogLogPlus) Row(org.apache.spark.sql.Row) HashSet(java.util.HashSet) Test(org.junit.jupiter.api.Test)

Aggregations

HyperLogLogPlus (com.clearspring.analytics.stream.cardinality.HyperLogLogPlus)63 Test (org.junit.jupiter.api.Test)19 Test (org.junit.Test)14 Entity (uk.gov.gchq.gaffer.data.element.Entity)8 User (uk.gov.gchq.gaffer.user.User)6 Edge (uk.gov.gchq.gaffer.data.element.Edge)5 Element (uk.gov.gchq.gaffer.data.element.Element)5 AggregateFunctionTest (uk.gov.gchq.gaffer.function.AggregateFunctionTest)5 Graph (uk.gov.gchq.gaffer.graph.Graph)5 FunctionTest (uk.gov.gchq.koryphe.function.FunctionTest)5 ArrayList (java.util.ArrayList)4 AddElements (uk.gov.gchq.gaffer.operation.impl.add.AddElements)4 HashSet (java.util.HashSet)3 View (uk.gov.gchq.gaffer.data.elementdefinition.view.View)3 SerialisationException (uk.gov.gchq.gaffer.exception.SerialisationException)3 GetAllElements (uk.gov.gchq.gaffer.operation.impl.get.GetAllElements)3 CardinalityMergeException (com.clearspring.analytics.stream.cardinality.CardinalityMergeException)2 TreeNode (com.fasterxml.jackson.core.TreeNode)2 TextNode (com.fasterxml.jackson.databind.node.TextNode)2 ServerLongAnyRow (com.tencent.angel.ps.storage.vector.ServerLongAnyRow)2