Search in sources :

Example 6 with Intersect

use of com.yahoo.sketches.pig.theta.Intersect in project sketches-pig by DataSketches.

the class IntersectTest method checkExactTopExec.

@Test
public void checkExactTopExec() throws IOException {
    EvalFunc<Tuple> interFunc = new Intersect();
    EvalFunc<Double> estFunc = new Estimate();
    // create inputTuple and a bag, add bag to inputTuple
    Tuple inputTuple = TupleFactory.getInstance().newTuple(1);
    DataBag bag = BagFactory.getInstance().newDefaultBag();
    inputTuple.set(0, bag);
    // create 4 overlapping sketches of 64 in a bag
    for (int i = 0; i < 4; i++) {
        Tuple dataTuple = TupleFactory.getInstance().newTuple(1);
        dataTuple.set(0, createDbaFromQssRange(256, i * 64, 256));
        bag.add(dataTuple);
    }
    Tuple resultTuple = interFunc.exec(inputTuple);
    assertNotNull(resultTuple);
    assertEquals(resultTuple.size(), 1);
    Double est = estFunc.exec(resultTuple);
    assertEquals(est, 64.0, 0.0);
}
Also used : Intersect(com.yahoo.sketches.pig.theta.Intersect) Estimate(com.yahoo.sketches.pig.theta.Estimate) DataBag(org.apache.pig.data.DataBag) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 7 with Intersect

use of com.yahoo.sketches.pig.theta.Intersect in project sketches-pig by DataSketches.

the class IntersectTest method checkExactAccumulator.

@Test
public void checkExactAccumulator() throws IOException {
    Accumulator<Tuple> interFunc = new Intersect();
    EvalFunc<Double> estFunc = new Estimate();
    // create inputTuple and a bag, add bag to inputTuple
    Tuple inputTuple = TupleFactory.getInstance().newTuple(1);
    DataBag bag = BagFactory.getInstance().newDefaultBag();
    inputTuple.set(0, bag);
    // create 4 distinct sketches of 32 in a bag
    for (int i = 0; i < 4; i++) {
        Tuple dataTuple = TupleFactory.getInstance().newTuple(1);
        dataTuple.set(0, createDbaFromQssRange(256, i * 64, 256));
        bag.add(dataTuple);
    }
    // A tuple, bag with 4 sketches
    interFunc.accumulate(inputTuple);
    Tuple resultTuple = interFunc.getValue();
    assertNotNull(resultTuple);
    assertEquals(resultTuple.size(), 1);
    DataByteArray dba = (DataByteArray) resultTuple.get(0);
    assertTrue(dba.size() > 0);
    Double est = estFunc.exec(resultTuple);
    assertEquals(est, 64.0, 0.0);
}
Also used : Intersect(com.yahoo.sketches.pig.theta.Intersect) Estimate(com.yahoo.sketches.pig.theta.Estimate) DataBag(org.apache.pig.data.DataBag) DataByteArray(org.apache.pig.data.DataByteArray) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 8 with Intersect

use of com.yahoo.sketches.pig.theta.Intersect in project sketches-pig by DataSketches.

the class IntersectTest method checkBadClassCast.

@Test(expectedExceptions = ClassCastException.class)
public void checkBadClassCast() throws IOException {
    Accumulator<Tuple> interFunc = new Intersect();
    // valid size, but null
    Tuple inputTuple = TupleFactory.getInstance().newTuple(1);
    // wrong type. Cannot intersect datums.
    inputTuple.set(0, new Double(1.0));
    // throws ClassCastException
    interFunc.accumulate(inputTuple);
}
Also used : Intersect(com.yahoo.sketches.pig.theta.Intersect) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 9 with Intersect

use of com.yahoo.sketches.pig.theta.Intersect in project sketches-pig by DataSketches.

the class IntersectTest method checkNullEmptyAccumulator.

@Test
public void checkNullEmptyAccumulator() throws IOException {
    Accumulator<Tuple> interFunc = new Intersect();
    EvalFunc<Double> estFunc = new Estimate();
    Tuple inputTuple = null;
    // does nothing
    interFunc.accumulate(inputTuple);
    // invalid size
    inputTuple = TupleFactory.getInstance().newTuple(0);
    // does nothing
    interFunc.accumulate(inputTuple);
    // valid size, but null bag
    inputTuple = TupleFactory.getInstance().newTuple(1);
    // does nothing
    interFunc.accumulate(inputTuple);
    // valid size
    inputTuple = TupleFactory.getInstance().newTuple(1);
    DataBag bag = BagFactory.getInstance().newDefaultBag();
    // correct type, but empty
    inputTuple.set(0, bag);
    // does nothing
    interFunc.accumulate(inputTuple);
    // empty
    Tuple innerTuple = TupleFactory.getInstance().newTuple(0);
    bag.add(innerTuple);
    // does nothing
    interFunc.accumulate(inputTuple);
    // valid size
    inputTuple = TupleFactory.getInstance().newTuple(1);
    bag = BagFactory.getInstance().newDefaultBag();
    // correct type
    inputTuple.set(0, bag);
    // correct size
    innerTuple = TupleFactory.getInstance().newTuple(1);
    // but innerTuple(0) is null
    bag.add(innerTuple);
    // does nothing
    interFunc.accumulate(inputTuple);
    // Must call accumulate at least once before calling getValue.
    // To prove that all the above stuff truely did nothing,
    // we call accumulate once with a valid sketch and affirm that
    // getValue() returns it unaltered.
    // create inputTuple and a bag, add bag to inputTuple
    // valid size
    inputTuple = TupleFactory.getInstance().newTuple(1);
    bag = BagFactory.getInstance().newDefaultBag();
    inputTuple.set(0, bag);
    Tuple dataTuple = TupleFactory.getInstance().newTuple(1);
    dataTuple.set(0, createDbaFromQssRange(256, 0, 64));
    bag.add(dataTuple);
    interFunc.accumulate(inputTuple);
    Tuple resultTuple = interFunc.getValue();
    assertNotNull(resultTuple);
    assertEquals(resultTuple.size(), 1);
    Double est = estFunc.exec(resultTuple);
    assertEquals(est, 64.0, 0.0);
}
Also used : Intersect(com.yahoo.sketches.pig.theta.Intersect) Estimate(com.yahoo.sketches.pig.theta.Estimate) DataBag(org.apache.pig.data.DataBag) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Aggregations

Intersect (com.yahoo.sketches.pig.theta.Intersect)9 Test (org.testng.annotations.Test)9 Tuple (org.apache.pig.data.Tuple)7 Estimate (com.yahoo.sketches.pig.theta.Estimate)4 DataBag (org.apache.pig.data.DataBag)4 DataByteArray (org.apache.pig.data.DataByteArray)1 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)1