use of com.yahoo.sketches.theta.CompactSketch in project sketches-pig by DataSketches.
the class Union method exec.
// @formatter:off
/**
**********************************************************************************************
* Top-level exec function.
* This method accepts an input Tuple containing a Bag of one or more inner <b>Sketch Tuples</b>
* and returns a single updated <b>Sketch</b> as a <b>Sketch Tuple</b>.
*
* <p>If a large number of calls are anticipated, leveraging either the <i>Algebraic</i> or
* <i>Accumulator</i> interfaces is recommended. Pig normally handles this automatically.
*
* <p>Internally, this method presents the inner <b>Sketch Tuples</b> to a new <b>Union</b>.
* The result is returned as a <b>Sketch Tuple</b>
*
* <p><b>Input Tuple</b>
* <ul>
* <li>Tuple: TUPLE (Must contain only one field)
* <ul>
* <li>index 0: DataBag: BAG (May contain 0 or more Inner Tuples)
* <ul>
* <li>index 0: Tuple: TUPLE <b>Sketch Tuple</b></li>
* <li>...</li>
* <li>index n-1: Tuple: TUPLE <b>Sketch Tuple</b></li>
* </ul>
* </li>
* </ul>
* </li>
* </ul>
*
* <b>Sketch Tuple</b>
* <ul>
* <li>Tuple: TUPLE (Contains exactly 1 field)
* <ul>
* <li>index 0: DataByteArray: BYTEARRAY = The serialization of a Sketch object.</li>
* </ul>
* </li>
* </ul>
*
* @param inputTuple A tuple containing a single bag, containing Sketch Tuples.
* @return Sketch Tuple. If inputTuple is null or empty, returns empty sketch (8 bytes).
* @see "org.apache.pig.EvalFunc.exec(org.apache.pig.data.Tuple)"
*/
// @formatter:on
// TOP LEVEL EXEC
@Override
public Tuple exec(final Tuple inputTuple) throws IOException {
// throws is in API
// The exec is a stateless function. It operates on the input and returns a result.
// It can only call static functions.
final com.yahoo.sketches.theta.Union union = SetOperation.builder().setP(p_).setSeed(seed_).setResizeFactor(RF).setNominalEntries(nomEntries_).buildUnion();
final DataBag bag = extractBag(inputTuple);
if (bag == null) {
// Configured with parent
return emptyCompactOrderedSketchTuple_;
}
updateUnion(bag, union);
final CompactSketch compactSketch = union.getResult(true, null);
return compactOrderedSketchToTuple(compactSketch);
}
use of com.yahoo.sketches.theta.CompactSketch in project Gaffer by gchq.
the class LoadAndQuery13 method run.
public Iterable<Entity> run() throws OperationException {
// [user] Create a user
// ---------------------------------------------------------
final User user = new User("user01");
// ---------------------------------------------------------
// [graph] create a graph using our schema and store properties
// ---------------------------------------------------------
final Graph graph = new Graph.Builder().addSchemas(getSchemas()).storeProperties(getStoreProperties()).build();
// ---------------------------------------------------------
// [add] add the edges to the graph
// ---------------------------------------------------------
final Set<String> dummyData = Collections.singleton("");
final OperationChain addOpChain = new OperationChain.Builder().first(new GenerateElements.Builder<String>().generator(new DataGenerator13()).objects(dummyData).build()).then(new AddElements()).build();
graph.execute(addOpChain, user);
// ---------------------------------------------------------
log("Added 1000 edges A-B0, A-B1, ..., A-B999 on 10/1/17. For each edge we create an Entity with a union sketch" + " containing a string of the source and destination from the edge. Added 500 edges A-B750, A-B751, " + "..., A-B1249 for day 11/1/17. Again for each edge we create an Entity with a union sketch.");
// [get entities] Get the entities for separate days
// ---------------------------------------------------------
final GetAllEntities get = new GetAllEntities();
final Iterable<Entity> entities = graph.execute(get, user);
for (final Entity entity : entities) {
log("GET_ENTITIES", entity.toString());
}
// ---------------------------------------------------------
// [get estimate separate days] Get the estimates out of the sketches for the separate days
// ---------------------------------------------------------
final GetAllEntities getAllEntities2 = new GetAllEntities();
final Iterable<Entity> allEntities2 = graph.execute(getAllEntities2, user);
final Iterator<Entity> it = allEntities2.iterator();
final Entity entityDay1 = it.next();
final CompactSketch sketchDay1 = ((Union) entityDay1.getProperty("size")).getResult();
final Entity entityDay2 = it.next();
final CompactSketch sketchDay2 = ((Union) entityDay2.getProperty("size")).getResult();
final double estimateDay1 = sketchDay1.getEstimate();
final double estimateDay2 = sketchDay2.getEstimate();
// ---------------------------------------------------------
log("\nThe estimates for the different days");
log("GET_ESTIMATE_OVER_SEPARATE_DAYS", "" + estimateDay1);
log("GET_ESTIMATE_OVER_SEPARATE_DAYS", "" + estimateDay2);
// [get intersection] Get the number of edges in common across the two days
// ---------------------------------------------------------
final Intersection intersection = Sketches.setOperationBuilder().buildIntersection();
intersection.update(sketchDay1);
intersection.update(sketchDay2);
final double intersectionSizeEstimate = intersection.getResult().getEstimate();
// ---------------------------------------------------------
log("\nThe estimate of the number of edges in common across the different days");
log("PRINT_ESTIMATE", "" + intersectionSizeEstimate);
// [get union across all days] Get the total number edges across the two days
// ---------------------------------------------------------
final GetAllEntities getAllEntities = new GetAllEntities.Builder().view(new View.Builder().entity("size", new ViewElementDefinition.Builder().groupBy().build()).build()).build();
final Iterable<Entity> allEntities = graph.execute(getAllEntities, user);
final Entity entity = allEntities.iterator().next();
final double unionSizeEstimate = ((Union) entity.getProperty("size")).getResult().getEstimate();
// ---------------------------------------------------------
log("\nThe estimate of the number of edges across the different days");
log("UNION_ESTIMATE", "" + unionSizeEstimate);
return null;
}
use of com.yahoo.sketches.theta.CompactSketch in project sketches-pig by DataSketches.
the class Intersect method exec.
// @formatter:off
/**
**********************************************************************************************
* Top-level exec function.
* This method accepts an input Tuple containing a Bag of one or more inner <b>Sketch Tuples</b>
* and returns a single updated <b>Sketch</b> as a <b>Sketch Tuple</b>.
*
* <p>If a large number of calls are anticipated, leveraging either the <i>Algebraic</i> or
* <i>Accumulator</i> interfaces is recommended. Pig normally handles this automatically.
*
* <p>Internally, this method presents the inner <b>Sketch Tuples</b> to a new <b>Intersection</b>.
* The result is returned as a <b>Sketch Tuple</b>
*
* <p><b>Input Tuple</b>
* <ul>
* <li>Tuple: TUPLE (Must contain only one field)
* <ul>
* <li>index 0: DataBag: BAG (May contain 0 or more Inner Tuples)
* <ul>
* <li>index 0: Tuple: TUPLE <b>Sketch Tuple</b></li>
* <li>...</li>
* <li>index n-1: Tuple: TUPLE <b>Sketch Tuple</b></li>
* </ul>
* </li>
* </ul>
* </li>
* </ul>
*
* <b>Sketch Tuple</b>
* <ul>
* <li>Tuple: TUPLE (Contains exactly 1 field)
* <ul>
* <li>index 0: DataByteArray: BYTEARRAY = The serialization of a Sketch object.</li>
* </ul>
* </li>
* </ul>
*
* @param inputTuple A tuple containing a single bag, containing Sketch Tuples.
* @return Sketch Tuple. If inputTuple is null or empty, returns empty sketch (8 bytes).
* @see "org.apache.pig.EvalFunc.exec(org.apache.pig.data.Tuple)"
*/
// @formatter:on
// TOP LEVEL EXEC
@Override
public Tuple exec(final Tuple inputTuple) throws IOException {
// throws is in API
// The exec is a stateless function. It operates on the input and returns a result.
// It can only call static functions.
final Intersection intersection = SetOperation.builder().setSeed(seed_).buildIntersection();
final DataBag bag = extractBag(inputTuple);
if (bag == null) {
// Configured with parent
return emptyCompactOrderedSketchTuple_;
}
updateIntersection(bag, intersection, seed_);
final CompactSketch compactSketch = intersection.getResult(true, null);
return compactOrderedSketchToTuple(compactSketch);
}
Aggregations