use of com.clearspring.analytics.stream.cardinality.HyperLogLogPlus in project gaffer-doc by gchq.
the class HyperLogLogPlusWalkthrough method run.
@Override
public CloseableIterable<? extends Element> run() throws OperationException {
// / [graph] create a graph using our schema and store properties
// ---------------------------------------------------------
final Graph graph = new Graph.Builder().config(getDefaultGraphConfig()).addSchemas(StreamUtil.openStreams(getClass(), schemaPath)).storeProperties(getDefaultStoreProperties()).build();
// ---------------------------------------------------------
// [user] Create a user
// ---------------------------------------------------------
final User user = new User("user01");
// ---------------------------------------------------------
// [add] addElements - add the edges to the graph
// ---------------------------------------------------------
final Set<String> dummyData = Collections.singleton("");
final OperationChain<Void> addOpChain = new OperationChain.Builder().first(new GenerateElements.Builder<String>().generator(new HyperLogLogPlusElementGenerator()).input(dummyData).build()).then(new AddElements()).build();
graph.execute(addOpChain, user);
// ---------------------------------------------------------
print("Added 1000 entities for vertex A, each time w\nith a HyperLogLogPlus containing a vertex that A was seen in an edge with");
// [get] Get all entities
// ---------------------------------------------------------
CloseableIterable<? extends Element> allEntities = graph.execute(new GetAllElements(), user);
// ---------------------------------------------------------
print("\nAll edges:");
for (final Element entity : allEntities) {
print("GET_ALL_ENTITIES_RESULT", entity.toString());
}
// [get the approximate degree of a] Get the entity for A and print out the estimate of the degree
// ---------------------------------------------------------
final GetElements query = new GetElements.Builder().input(new EntitySeed("A")).build();
final Element element;
try (final CloseableIterable<? extends Element> elements = graph.execute(query, user)) {
element = elements.iterator().next();
}
final HyperLogLogPlus hyperLogLogPlus = (HyperLogLogPlus) element.getProperty("approxCardinality");
final double approxDegree = hyperLogLogPlus.cardinality();
final String degreeEstimate = "Entity A has approximate degree " + approxDegree;
// ---------------------------------------------------------
print("\nEntity A with an estimate of its degree");
print("GET_APPROX_DEGREE_FOR_ENTITY_A", degreeEstimate);
return null;
}
use of com.clearspring.analytics.stream.cardinality.HyperLogLogPlus in project gaffer-doc by gchq.
the class HyperLogLogPlusElementGenerator method _apply.
@Override
public Iterable<Element> _apply(final String line) {
final List<Element> elements = new ArrayList<>();
for (int i = 0; i < 1000; i++) {
final HyperLogLogPlus hyperLogLogPlus = new HyperLogLogPlus(8, 8);
hyperLogLogPlus.offer("B" + i);
final Entity entity = new Entity.Builder().group("cardinality").vertex("A").property("approxCardinality", hyperLogLogPlus).build();
elements.add(entity);
}
return elements;
}
use of com.clearspring.analytics.stream.cardinality.HyperLogLogPlus in project beam by apache.
the class ApproximateDistinctTest method testCoder.
@Test
public void testCoder() throws Exception {
HyperLogLogPlus hllp = new HyperLogLogPlus(12, 18);
for (int i = 0; i < 10; i++) {
hllp.offer(i);
}
CoderProperties.coderDecodeEncodeEqual(ApproximateDistinct.HyperLogLogPlusCoder.of(), hllp);
}
use of com.clearspring.analytics.stream.cardinality.HyperLogLogPlus in project Gaffer by gchq.
the class RoadTrafficElementGenerator method createCardinality.
protected Entity createCardinality(final Object source, final Object destination, final Edge edge) {
final HyperLogLogPlus hllp = new HyperLogLogPlus(5, 5);
hllp.offer(destination);
return new Entity.Builder().vertex(source).group("Cardinality").property("edgeGroup", CollectionUtil.treeSet(edge.getGroup())).property("hllp", hllp).property("count", 1L).build();
}
use of com.clearspring.analytics.stream.cardinality.HyperLogLogPlus in project Gaffer by gchq.
the class GetDataFrameOfElementsHandlerTest method checkCanDealWithNonStandardProperties.
@Test
public void checkCanDealWithNonStandardProperties() throws OperationException {
final Graph graph = getGraph("/schema-DataFrame/elementsNonstandardTypes.json", getElementsWithNonStandardProperties());
// Edges group - check get correct edges
GetDataFrameOfElements dfOperation = new GetDataFrameOfElements.Builder().view(new View.Builder().edge(EDGE_GROUP).build()).build();
Dataset<Row> dataFrame = graph.execute(dfOperation, new User());
Set<Row> results = new HashSet<>(dataFrame.collectAsList());
final Set<Row> expectedRows = new HashSet<>();
final MutableList<Object> fields1 = new MutableList<>();
Map<String, Long> freqMap = Map$.MODULE$.empty();
freqMap.put("Y", 1000L);
freqMap.put("Z", 10000L);
fields1.appendElem(EDGE_GROUP);
fields1.appendElem("B");
fields1.appendElem("C");
fields1.appendElem(true);
fields1.appendElem(null);
fields1.appendElem(freqMap);
final HyperLogLogPlus hllpp = new HyperLogLogPlus(5, 5);
hllpp.offer("AAA");
hllpp.offer("BBB");
fields1.appendElem(hllpp.cardinality());
expectedRows.add(Row$.MODULE$.fromSeq(fields1));
assertEquals(expectedRows, results);
// Entities group - check get correct entities
dfOperation = new GetDataFrameOfElements.Builder().view(new View.Builder().entity(ENTITY_GROUP).build()).build();
dataFrame = graph.execute(dfOperation, new User());
results.clear();
results.addAll(dataFrame.collectAsList());
expectedRows.clear();
fields1.clear();
freqMap.clear();
freqMap.put("W", 10L);
freqMap.put("X", 100L);
fields1.appendElem(ENTITY_GROUP);
fields1.appendElem("A");
fields1.appendElem(freqMap);
final HyperLogLogPlus hllpp2 = new HyperLogLogPlus(5, 5);
hllpp2.offer("AAA");
fields1.appendElem(hllpp2.cardinality());
expectedRows.add(Row$.MODULE$.fromSeq(fields1));
assertEquals(expectedRows, results);
}
Aggregations