use of com.yahoo.sketches.theta.Union in project Gaffer by gchq.
the class UnionAggregatorTest method testCloneOfBusySketch.
@Test
public void testCloneOfBusySketch() {
final UnionAggregator unionAggregator = new UnionAggregator();
unionAggregator.init();
for (int i = 0; i < 100; i++) {
final Union union = SetOperation.builder().buildUnion();
for (int j = 0; j < 100; j++) {
union.update(Math.random());
}
unionAggregator._aggregate(union);
}
final UnionAggregator clone = unionAggregator.statelessClone();
assertNotSame(unionAggregator, clone);
clone._aggregate(union1);
assertEquals(union1.getResult().getEstimate(), ((Union) clone.state()[0]).getResult().getEstimate(), DELTA);
}
use of com.yahoo.sketches.theta.Union in project Gaffer by gchq.
the class UnionSerialiserTest method testSerialiser.
private void testSerialiser(final Union union) {
final double estimate = union.getResult().getEstimate();
final byte[] unionSerialised;
try {
unionSerialised = SERIALISER.serialise(union);
} catch (final SerialisationException exception) {
fail("A SerialisationException occurred");
return;
}
final Union unionDeserialised;
try {
unionDeserialised = SERIALISER.deserialise(unionSerialised);
} catch (final SerialisationException exception) {
fail("A SerialisationException occurred");
return;
}
assertEquals(estimate, unionDeserialised.getResult().getEstimate(), DELTA);
}
use of com.yahoo.sketches.theta.Union in project Gaffer by gchq.
the class UnionSerialiserTest method testSerialiseAndDeserialise.
@Test
public void testSerialiseAndDeserialise() {
final Union union = SetOperation.builder().buildUnion();
union.update(1.0D);
union.update(2.0D);
union.update(3.0D);
testSerialiser(union);
final Union emptyUnion = SetOperation.builder().buildUnion();
testSerialiser(emptyUnion);
}
use of com.yahoo.sketches.theta.Union in project Gaffer by gchq.
the class DataGenerator13 method getElements.
@Override
public Iterable<Element> getElements(final String line) {
final Set<Element> elements = new HashSet<>();
// On day 10/1/17 there are 1000 edges A-B0, A-B1, ..., A-B999.
// For each edge we create an Entity with a union sketch containing the source and destination from the edge
final Date midnight9th = LoadAndQuery8.getDate("09/01/17");
final Date midnight10th = LoadAndQuery8.getDate("10/01/17");
for (int i = 0; i < 1000; i++) {
final Edge edge = new Edge.Builder().group("red").source("A").dest("B" + i).property("startDate", midnight9th).property("endDate", midnight10th).property("count", 1L).build();
elements.add(edge);
final Union union = Sketches.setOperationBuilder().buildUnion();
union.update("A-B" + i);
final Entity entity = new Entity.Builder().group("size").vertex("graph").property("startDate", midnight9th).property("endDate", midnight10th).property("size", union).build();
elements.add(entity);
}
// On day 11/1/17 there are 500 edges A-B750, A-B751, ..., A-B1249.
final Date midnight11th = LoadAndQuery8.getDate("11/01/17");
for (int i = 750; i < 1250; i++) {
final Edge edge = new Edge.Builder().group("red").source("A").dest("B" + i).property("startDate", midnight10th).property("endDate", midnight11th).property("count", 1L).build();
elements.add(edge);
final Union union = Sketches.setOperationBuilder().buildUnion();
union.update("A-B" + i);
final Entity entity = new Entity.Builder().group("size").vertex("graph").property("startDate", midnight10th).property("endDate", midnight11th).property("size", union).build();
elements.add(entity);
}
return elements;
}
use of com.yahoo.sketches.theta.Union in project sketches-pig by DataSketches.
the class DataToSketch method exec.
// @formatter:off
/**
***********************************************************************************************
* Top-level exec function.
* This method accepts an input Tuple containing a Bag of one or more inner <b>Datum Tuples</b>
* and returns a single updated <b>Sketch</b> as a <b>Sketch Tuple</b>.
*
* <p>If a large number of calls is anticipated, leveraging either the <i>Algebraic</i> or
* <i>Accumulator</i> interfaces is recommended. Pig normally handles this automatically.
*
* <p>Internally, this method presents the inner <b>Datum Tuples</b> to a new <b>Sketch</b>,
* which is returned as a <b>Sketch Tuple</b>
*
* <p><b>Input Tuple</b>
* <ul>
* <li>Tuple: TUPLE (Must contain only one field)
* <ul>
* <li>index 0: DataBag: BAG (May contain 0 or more Inner Tuples)
* <ul>
* <li>index 0: Tuple: TUPLE <b>Datum Tuple</b></li>
* <li>...</li>
* <li>index n-1: Tuple: TUPLE <b>Datum Tuple</b></li>
* </ul>
* </li>
* </ul>
* </li>
* </ul>
*
* <b>Datum Tuple</b>
* <ul>
* <li>Tuple: TUPLE (Must contain only one field)
* <ul>
* <li>index 0: Java data type : Pig DataType: may be any one of:
* <ul>
* <li>Byte: BYTE</li>
* <li>Integer: INTEGER</li>
* <li>Long: LONG</li>
* <li>Float: FLOAT</li>
* <li>Double: DOUBLE</li>
* <li>String: CHARARRAY</li>
* <li>DataByteArray: BYTEARRAY</li>
* </ul>
* </li>
* </ul>
* </li>
* </ul>
*
* <b>Sketch Tuple</b>
* <ul>
* <li>Tuple: TUPLE (Contains exactly 1 field)
* <ul>
* <li>index 0: DataByteArray: BYTEARRAY = The serialization of a Sketch object.</li>
* </ul>
* </li>
* </ul>
*
* @param inputTuple A tuple containing a single bag, containing Datum Tuples.
* @return Sketch Tuple. If inputTuple is null or empty, returns empty sketch (8 bytes).
* @see "org.apache.pig.EvalFunc.exec(org.apache.pig.data.Tuple)"
* @throws IOException from Pig.
*/
// @formatter:on
// TOP LEVEL EXEC
@Override
public Tuple exec(final Tuple inputTuple) throws IOException {
// throws is in API
// The exec is a stateless function. It operates on the input and returns a result.
// It can only call static functions.
final Union union = newUnion(nomEntries_, p_, seed_);
final DataBag bag = extractBag(inputTuple);
if (bag == null) {
// Configured with parent
return emptyCompactOrderedSketchTuple_;
}
// updates union with all elements of the bag
updateUnion(bag, union);
final CompactSketch compOrdSketch = union.getResult(true, null);
return compactOrderedSketchToTuple(compOrdSketch);
}
Aggregations