Search in sources :

Example 56 with DataBag

use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.

the class DataToSketchTest method testAccumulate.

@Test
public void testAccumulate() throws IOException {
    Accumulator<Tuple> func = new DataToSketch("128");
    Tuple inputTuple = TupleFactory.getInstance().newTuple(1);
    DataBag bag = BagFactory.getInstance().newDefaultBag();
    inputTuple.set(0, bag);
    for (int ii = 0; ii < 64; ii++) {
        Tuple dataTuple = TupleFactory.getInstance().newTuple(1);
        dataTuple.set(0, ii);
        bag.add(dataTuple);
    }
    func.accumulate(inputTuple);
    inputTuple = TupleFactory.getInstance().newTuple(1);
    bag = BagFactory.getInstance().newDefaultBag();
    inputTuple.set(0, bag);
    for (int ii = 0; ii < 27; ii++) {
        Tuple dataTuple = TupleFactory.getInstance().newTuple(1);
        dataTuple.set(0, 64 + ii);
        bag.add(dataTuple);
    }
    func.accumulate(inputTuple);
    Tuple resultTuple = func.getValue();
    assertNotNull(resultTuple);
    assertEquals(resultTuple.size(), 1);
    DataByteArray bytes = (DataByteArray) resultTuple.get(0);
    assertTrue(bytes.size() > 0);
    Sketch sketch = tupleToSketch(resultTuple, seed_);
    assertEquals(sketch.getEstimate(), 91.0, 0.0);
    // after cleanup, the value should always be 0
    func.cleanup();
    resultTuple = func.getValue();
    assertNotNull(resultTuple);
    assertEquals(resultTuple.size(), 1);
    bytes = (DataByteArray) resultTuple.get(0);
    assertTrue(bytes.size() > 0);
    sketch = tupleToSketch(resultTuple, seed_);
    assertEquals(sketch.getEstimate(), 0.0, 0.0);
}
Also used : DataBag(org.apache.pig.data.DataBag) DataToSketch(com.yahoo.sketches.pig.theta.DataToSketch) Sketch(com.yahoo.sketches.theta.Sketch) PigUtil.tupleToSketch(com.yahoo.sketches.pig.theta.PigUtil.tupleToSketch) DataToSketch(com.yahoo.sketches.pig.theta.DataToSketch) DataByteArray(org.apache.pig.data.DataByteArray) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 57 with DataBag

use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.

the class DataToSketchTest method testInitial.

@Test
public void testInitial() throws IOException {
    EvalFunc<Tuple> func = new DataToSketch.Initial("128");
    Tuple inputTuple = TupleFactory.getInstance().newTuple(1);
    DataBag bag = BagFactory.getInstance().newDefaultBag();
    inputTuple.set(0, bag);
    for (int ii = 0; ii < 64; ii++) {
        Tuple dataTuple = TupleFactory.getInstance().newTuple(1);
        dataTuple.set(0, ii);
        bag.add(dataTuple);
    }
    Tuple resultTuple = func.exec(inputTuple);
    assertNotNull(resultTuple);
    assertEquals(resultTuple.size(), 1);
    DataBag resultBag = (DataBag) resultTuple.get(0);
    assertEquals(resultBag.size(), 64);
}
Also used : DataBag(org.apache.pig.data.DataBag) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 58 with DataBag

use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.

the class DataToSketchTest method checkAlgFinalOuterBagEmptyTuples.

@Test
public void checkAlgFinalOuterBagEmptyTuples() throws IOException {
    EvalFunc<Tuple> interFuncFinal = new DataToSketch.IntermediateFinal("256");
    EvalFunc<Double> estFunc = new Estimate();
    Tuple inputTuple = TupleFactory.getInstance().newTuple(1);
    Tuple resultTuple = interFuncFinal.exec(inputTuple);
    assertEquals(estFunc.exec(resultTuple), 0.0, 0.0);
    DataBag bag = BagFactory.getInstance().newDefaultBag();
    // inputTuple.bag0:null
    inputTuple.set(0, bag);
    resultTuple = interFuncFinal.exec(inputTuple);
    assertEquals(estFunc.exec(resultTuple), 0.0, 0.0);
    Tuple innerTuple = TupleFactory.getInstance().newTuple(1);
    bag.add(innerTuple);
    resultTuple = interFuncFinal.exec(inputTuple);
    assertEquals(estFunc.exec(resultTuple), 0.0, 0.0);
}
Also used : Estimate(com.yahoo.sketches.pig.theta.Estimate) DataBag(org.apache.pig.data.DataBag) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 59 with DataBag

use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.

the class DataToSketchTest method textTopExec2.

/*
   * DataToSketch <br>
   * Tests all possible data types: NULL, BYTE, INTEGER, LONG, FLOAT, DOUBLE,
   * BYTEARRAY, CHARARRAY. Tests rejection of a non-simple type.
   */
// still triggers unchecked warning
@SuppressWarnings("unchecked")
@Test
public void textTopExec2() throws IOException {
    TupleFactory tupleFactory = TupleFactory.getInstance();
    BagFactory bagFactory = BagFactory.getInstance();
    String[] ctorArgs = { "128" };
    EvalFunc<Tuple> dataUdf = (EvalFunc<Tuple>) PigContext.instantiateFuncFromSpec(new FuncSpec(udfName, ctorArgs));
    // EvalFunc<Tuple> resultUdf = (EvalFunc<Tuple>)PigContext.
    // instantiateFuncFromSpec(new FuncSpec(resultUdfName));
    Tuple t;
    DataBag bag = bagFactory.newDefaultBag();
    // empty with a null
    bag.add(tupleFactory.newTuple());
    // 1 empty field
    bag.add(tupleFactory.newTuple(1));
    // 1
    t = tupleFactory.newTuple(1);
    t.set(0, new Byte((byte) 1));
    bag.add(t);
    // 2
    t = tupleFactory.newTuple(1);
    // int
    t.set(0, new Integer(2));
    bag.add(t);
    // 3
    t = tupleFactory.newTuple(1);
    t.set(0, new Long(3));
    bag.add(t);
    // 4
    t = tupleFactory.newTuple(1);
    t.set(0, new Float(4));
    bag.add(t);
    // 5
    t = tupleFactory.newTuple(1);
    t.set(0, new Double(5));
    bag.add(t);
    // 6
    t = tupleFactory.newTuple(1);
    byte[] bArr = { 1, 2, 3 };
    t.set(0, new DataByteArray(bArr));
    bag.add(t);
    // -ignore
    t = tupleFactory.newTuple(1);
    // empty
    byte[] bArr2 = new byte[0];
    t.set(0, new DataByteArray(bArr2));
    bag.add(t);
    // 7
    t = tupleFactory.newTuple(1);
    t.set(0, new Double(-0.0));
    bag.add(t);
    // 7 duplicate
    t = tupleFactory.newTuple(1);
    t.set(0, new Double(0.0));
    bag.add(t);
    // 8
    t = tupleFactory.newTuple(1);
    String s = "abcde";
    t.set(0, s);
    bag.add(t);
    // - ignore
    t = tupleFactory.newTuple(1);
    // empty
    String s2 = "";
    t.set(0, s2);
    bag.add(t);
    Tuple in = tupleFactory.newTuple(1);
    in.set(0, bag);
    // should return a sketch
    Tuple resultTuple = dataUdf.exec(in);
    assertNotNull(resultTuple);
    assertEquals(resultTuple.size(), 1);
    DataByteArray bytes = (DataByteArray) resultTuple.get(0);
    assertTrue(bytes.size() > 0);
    Sketch sketch = tupleToSketch(resultTuple, seed_);
    assertEquals(sketch.getEstimate(), 8.0, 0.0);
}
Also used : DataBag(org.apache.pig.data.DataBag) FuncSpec(org.apache.pig.FuncSpec) TupleFactory(org.apache.pig.data.TupleFactory) EvalFunc(org.apache.pig.EvalFunc) BagFactory(org.apache.pig.data.BagFactory) Sketch(com.yahoo.sketches.theta.Sketch) PigUtil.tupleToSketch(com.yahoo.sketches.pig.theta.PigUtil.tupleToSketch) DataToSketch(com.yahoo.sketches.pig.theta.DataToSketch) DataByteArray(org.apache.pig.data.DataByteArray) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 60 with DataBag

use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.

the class DataToSketchTest method checkAlgFinalInnerNotDBA.

@Test(expectedExceptions = IllegalArgumentException.class)
public void checkAlgFinalInnerNotDBA() throws IOException {
    EvalFunc<Tuple> interFuncFinal = new DataToSketch.IntermediateFinal("256");
    EvalFunc<Double> estFunc = new Estimate();
    Tuple inputTuple = TupleFactory.getInstance().newTuple(1);
    Tuple resultTuple = interFuncFinal.exec(inputTuple);
    assertEquals(estFunc.exec(resultTuple), 0.0, 0.0);
    DataBag bag = BagFactory.getInstance().newDefaultBag();
    // inputTuple.bag0:null
    inputTuple.set(0, bag);
    resultTuple = interFuncFinal.exec(inputTuple);
    assertEquals(estFunc.exec(resultTuple), 0.0, 0.0);
    Tuple innerTuple = TupleFactory.getInstance().newTuple(1);
    bag.add(innerTuple);
    // not a DBA
    innerTuple.set(0, new Double(1.0));
    resultTuple = interFuncFinal.exec(inputTuple);
    assertEquals(estFunc.exec(resultTuple), 0.0, 0.0);
}
Also used : Estimate(com.yahoo.sketches.pig.theta.Estimate) DataBag(org.apache.pig.data.DataBag) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Aggregations

DataBag (org.apache.pig.data.DataBag)266 Tuple (org.apache.pig.data.Tuple)223 Test (org.testng.annotations.Test)142 DataByteArray (org.apache.pig.data.DataByteArray)103 IOException (java.io.IOException)20 Estimate (com.yahoo.sketches.pig.theta.Estimate)19 EvalFunc (org.apache.pig.EvalFunc)16 HllSketch (com.yahoo.sketches.hll.HllSketch)14 DoubleSummary (com.yahoo.sketches.tuple.DoubleSummary)13 DoubleSummaryDeserializer (com.yahoo.sketches.tuple.DoubleSummaryDeserializer)13 Test (org.junit.Test)13 ArrayOfStringsSerDe (com.yahoo.sketches.ArrayOfStringsSerDe)12 ArrayOfDoublesSketch (com.yahoo.sketches.tuple.ArrayOfDoublesSketch)12 ExecException (org.apache.pig.backend.executionengine.ExecException)12 ItemsSketch (com.yahoo.sketches.frequencies.ItemsSketch)11 ArrayOfDoublesUpdatableSketchBuilder (com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder)11 Map (java.util.Map)11 ArrayOfDoublesUpdatableSketch (com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketch)10 ArrayList (java.util.ArrayList)10 HashMap (java.util.HashMap)10