Search in sources :

Example 6 with Union

use of com.yahoo.sketches.theta.Union in project Gaffer by gchq.

the class UnionAggregatorTest method testCloneOfBusySketch.

@Test
public void testCloneOfBusySketch() {
    final UnionAggregator unionAggregator = new UnionAggregator();
    unionAggregator.init();
    for (int i = 0; i < 100; i++) {
        final Union union = SetOperation.builder().buildUnion();
        for (int j = 0; j < 100; j++) {
            union.update(Math.random());
        }
        unionAggregator._aggregate(union);
    }
    final UnionAggregator clone = unionAggregator.statelessClone();
    assertNotSame(unionAggregator, clone);
    clone._aggregate(union1);
    assertEquals(union1.getResult().getEstimate(), ((Union) clone.state()[0]).getResult().getEstimate(), DELTA);
}
Also used : Union(com.yahoo.sketches.theta.Union) Test(org.junit.Test) AggregateFunctionTest(uk.gov.gchq.gaffer.function.AggregateFunctionTest)

Example 7 with Union

use of com.yahoo.sketches.theta.Union in project Gaffer by gchq.

the class UnionSerialiserTest method testSerialiser.

private void testSerialiser(final Union union) {
    final double estimate = union.getResult().getEstimate();
    final byte[] unionSerialised;
    try {
        unionSerialised = SERIALISER.serialise(union);
    } catch (final SerialisationException exception) {
        fail("A SerialisationException occurred");
        return;
    }
    final Union unionDeserialised;
    try {
        unionDeserialised = SERIALISER.deserialise(unionSerialised);
    } catch (final SerialisationException exception) {
        fail("A SerialisationException occurred");
        return;
    }
    assertEquals(estimate, unionDeserialised.getResult().getEstimate(), DELTA);
}
Also used : SerialisationException(uk.gov.gchq.gaffer.exception.SerialisationException) Union(com.yahoo.sketches.theta.Union)

Example 8 with Union

use of com.yahoo.sketches.theta.Union in project Gaffer by gchq.

the class UnionSerialiserTest method testSerialiseAndDeserialise.

@Test
public void testSerialiseAndDeserialise() {
    final Union union = SetOperation.builder().buildUnion();
    union.update(1.0D);
    union.update(2.0D);
    union.update(3.0D);
    testSerialiser(union);
    final Union emptyUnion = SetOperation.builder().buildUnion();
    testSerialiser(emptyUnion);
}
Also used : Union(com.yahoo.sketches.theta.Union) Test(org.junit.Test)

Example 9 with Union

use of com.yahoo.sketches.theta.Union in project Gaffer by gchq.

the class DataGenerator13 method getElements.

@Override
public Iterable<Element> getElements(final String line) {
    final Set<Element> elements = new HashSet<>();
    // On day 10/1/17 there are 1000 edges A-B0, A-B1, ..., A-B999.
    // For each edge we create an Entity with a union sketch containing the source and destination from the edge
    final Date midnight9th = LoadAndQuery8.getDate("09/01/17");
    final Date midnight10th = LoadAndQuery8.getDate("10/01/17");
    for (int i = 0; i < 1000; i++) {
        final Edge edge = new Edge.Builder().group("red").source("A").dest("B" + i).property("startDate", midnight9th).property("endDate", midnight10th).property("count", 1L).build();
        elements.add(edge);
        final Union union = Sketches.setOperationBuilder().buildUnion();
        union.update("A-B" + i);
        final Entity entity = new Entity.Builder().group("size").vertex("graph").property("startDate", midnight9th).property("endDate", midnight10th).property("size", union).build();
        elements.add(entity);
    }
    // On day 11/1/17 there are 500 edges A-B750, A-B751, ..., A-B1249.
    final Date midnight11th = LoadAndQuery8.getDate("11/01/17");
    for (int i = 750; i < 1250; i++) {
        final Edge edge = new Edge.Builder().group("red").source("A").dest("B" + i).property("startDate", midnight10th).property("endDate", midnight11th).property("count", 1L).build();
        elements.add(edge);
        final Union union = Sketches.setOperationBuilder().buildUnion();
        union.update("A-B" + i);
        final Entity entity = new Entity.Builder().group("size").vertex("graph").property("startDate", midnight10th).property("endDate", midnight11th).property("size", union).build();
        elements.add(entity);
    }
    return elements;
}
Also used : Entity(uk.gov.gchq.gaffer.data.element.Entity) Element(uk.gov.gchq.gaffer.data.element.Element) Edge(uk.gov.gchq.gaffer.data.element.Edge) Date(java.util.Date) Union(com.yahoo.sketches.theta.Union) HashSet(java.util.HashSet)

Example 10 with Union

use of com.yahoo.sketches.theta.Union in project sketches-pig by DataSketches.

the class DataToSketch method exec.

// @formatter:off
/**
 ***********************************************************************************************
 * Top-level exec function.
 * This method accepts an input Tuple containing a Bag of one or more inner <b>Datum Tuples</b>
 * and returns a single updated <b>Sketch</b> as a <b>Sketch Tuple</b>.
 *
 * <p>If a large number of calls is anticipated, leveraging either the <i>Algebraic</i> or
 * <i>Accumulator</i> interfaces is recommended. Pig normally handles this automatically.
 *
 * <p>Internally, this method presents the inner <b>Datum Tuples</b> to a new <b>Sketch</b>,
 * which is returned as a <b>Sketch Tuple</b>
 *
 * <p><b>Input Tuple</b>
 * <ul>
 *   <li>Tuple: TUPLE (Must contain only one field)
 *     <ul>
 *       <li>index 0: DataBag: BAG (May contain 0 or more Inner Tuples)
 *         <ul>
 *           <li>index 0: Tuple: TUPLE <b>Datum Tuple</b></li>
 *           <li>...</li>
 *           <li>index n-1: Tuple: TUPLE <b>Datum Tuple</b></li>
 *         </ul>
 *       </li>
 *     </ul>
 *   </li>
 * </ul>
 *
 * <b>Datum Tuple</b>
 * <ul>
 *   <li>Tuple: TUPLE (Must contain only one field)
 *     <ul>
 *       <li>index 0: Java data type : Pig DataType: may be any one of:
 *         <ul>
 *           <li>Byte: BYTE</li>
 *           <li>Integer: INTEGER</li>
 *           <li>Long: LONG</li>
 *           <li>Float: FLOAT</li>
 *           <li>Double: DOUBLE</li>
 *           <li>String: CHARARRAY</li>
 *           <li>DataByteArray: BYTEARRAY</li>
 *         </ul>
 *       </li>
 *     </ul>
 *   </li>
 * </ul>
 *
 * <b>Sketch Tuple</b>
 * <ul>
 *   <li>Tuple: TUPLE (Contains exactly 1 field)
 *     <ul>
 *       <li>index 0: DataByteArray: BYTEARRAY = The serialization of a Sketch object.</li>
 *     </ul>
 *   </li>
 * </ul>
 *
 * @param inputTuple A tuple containing a single bag, containing Datum Tuples.
 * @return Sketch Tuple. If inputTuple is null or empty, returns empty sketch (8 bytes).
 * @see "org.apache.pig.EvalFunc.exec(org.apache.pig.data.Tuple)"
 * @throws IOException from Pig.
 */
// @formatter:on
// TOP LEVEL EXEC
@Override
public Tuple exec(final Tuple inputTuple) throws IOException {
    // throws is in API
    // The exec is a stateless function.  It operates on the input and returns a result.
    // It can only call static functions.
    final Union union = newUnion(nomEntries_, p_, seed_);
    final DataBag bag = extractBag(inputTuple);
    if (bag == null) {
        // Configured with parent
        return emptyCompactOrderedSketchTuple_;
    }
    // updates union with all elements of the bag
    updateUnion(bag, union);
    final CompactSketch compOrdSketch = union.getResult(true, null);
    return compactOrderedSketchToTuple(compOrdSketch);
}
Also used : CompactSketch(com.yahoo.sketches.theta.CompactSketch) DataBag(org.apache.pig.data.DataBag) Union(com.yahoo.sketches.theta.Union)

Aggregations

Union (com.yahoo.sketches.theta.Union)17 Test (org.junit.Test)4 CompactSketch (com.yahoo.sketches.theta.CompactSketch)2 Intersection (com.yahoo.sketches.theta.Intersection)2 Sketch (com.yahoo.sketches.theta.Sketch)2 Test (org.junit.jupiter.api.Test)2 Entity (uk.gov.gchq.gaffer.data.element.Entity)2 AggregateFunctionTest (uk.gov.gchq.gaffer.function.AggregateFunctionTest)2 Memory (com.yahoo.memory.Memory)1 MemoryRegion (com.yahoo.memory.MemoryRegion)1 NativeMemory (com.yahoo.memory.NativeMemory)1 AnotB (com.yahoo.sketches.theta.AnotB)1 GroupByQueryRunnerTest (io.druid.query.groupby.GroupByQueryRunnerTest)1 Date (java.util.Date)1 HashSet (java.util.HashSet)1 DataBag (org.apache.pig.data.DataBag)1 Edge (uk.gov.gchq.gaffer.data.element.Edge)1 Element (uk.gov.gchq.gaffer.data.element.Element)1 DataGenerator13 (uk.gov.gchq.gaffer.example.gettingstarted.generator.DataGenerator13)1 SerialisationException (uk.gov.gchq.gaffer.exception.SerialisationException)1