Search in sources :

Example 61 with DataBag

use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.

the class DataToSketchTest method checkNotDBAExcep.

@Test
public void checkNotDBAExcep() throws IOException {
    DataToSketch inter = new DataToSketch();
    // create inputTuple and a bag, add bag to inputTuple
    Tuple inputTuple = TupleFactory.getInstance().newTuple(1);
    DataBag bag = BagFactory.getInstance().newDefaultBag();
    inputTuple.set(0, bag);
    Tuple innerTuple = TupleFactory.getInstance().newTuple(1);
    bag.add(innerTuple);
    // add empty tuple
    inter.accumulate(inputTuple);
    // not a DBA
    innerTuple.set(0, new Double(1.0));
    inter = new DataToSketch();
    // add wrong type
    inter.accumulate(inputTuple);
}
Also used : DataBag(org.apache.pig.data.DataBag) DataToSketch(com.yahoo.sketches.pig.theta.DataToSketch) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 62 with DataBag

use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.

the class DataToSketchTest method testTopExec.

@Test
public void testTopExec() throws IOException {
    // empty constructor, size 4096
    EvalFunc<Tuple> func = new DataToSketch();
    Tuple inputTuple = null;
    Tuple resultTuple = func.exec(inputTuple);
    Sketch sketch = tupleToSketch(resultTuple, seed_);
    assertTrue(sketch.isEmpty());
    inputTuple = TupleFactory.getInstance().newTuple(1);
    DataBag bag = BagFactory.getInstance().newDefaultBag();
    inputTuple.set(0, bag);
    for (int ii = 0; ii < 64; ii++) {
        Tuple dataTuple = TupleFactory.getInstance().newTuple(1);
        dataTuple.set(0, ii);
        bag.add(dataTuple);
    }
    resultTuple = func.exec(inputTuple);
    assertNotNull(resultTuple);
    assertEquals(resultTuple.size(), 1);
    DataByteArray bytes = (DataByteArray) resultTuple.get(0);
    assertTrue(bytes.size() > 0);
    sketch = tupleToSketch(resultTuple, seed_);
    assertEquals(sketch.getEstimate(), 64.0, 0.0);
}
Also used : DataBag(org.apache.pig.data.DataBag) DataToSketch(com.yahoo.sketches.pig.theta.DataToSketch) Sketch(com.yahoo.sketches.theta.Sketch) PigUtil.tupleToSketch(com.yahoo.sketches.pig.theta.PigUtil.tupleToSketch) DataToSketch(com.yahoo.sketches.pig.theta.DataToSketch) DataByteArray(org.apache.pig.data.DataByteArray) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 63 with DataBag

use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.

the class UnionDoublesSketchTest method algebraicIntermediateFinalWrongType.

@Test(expectedExceptions = IllegalArgumentException.class)
public void algebraicIntermediateFinalWrongType() throws Exception {
    EvalFunc<Tuple> func = new UnionDoublesSketch.IntermediateFinal();
    DataBag bag = bagFactory.newDefaultBag();
    // this bag must have tuples with either bags or data byte arrays
    bag.add(tupleFactory.newTuple(1.0));
    func.exec(tupleFactory.newTuple(bag));
}
Also used : DataBag(org.apache.pig.data.DataBag) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 64 with DataBag

use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.

the class UnionDoublesSketchTest method execNormalCase.

@Test
public void execNormalCase() throws Exception {
    EvalFunc<Tuple> func = new UnionDoublesSketch();
    DataBag bag = bagFactory.newDefaultBag();
    UpdateDoublesSketch inputSketch = DoublesSketch.builder().build();
    inputSketch.update(1.0);
    bag.add(tupleFactory.newTuple(new DataByteArray(inputSketch.toByteArray())));
    Tuple resultTuple = func.exec(tupleFactory.newTuple(bag));
    DoublesSketch sketch = getSketch(resultTuple);
    Assert.assertFalse(sketch.isEmpty());
    Assert.assertEquals(sketch.getN(), 1);
}
Also used : DoublesSketch(com.yahoo.sketches.quantiles.DoublesSketch) UpdateDoublesSketch(com.yahoo.sketches.quantiles.UpdateDoublesSketch) DataBag(org.apache.pig.data.DataBag) UpdateDoublesSketch(com.yahoo.sketches.quantiles.UpdateDoublesSketch) DataByteArray(org.apache.pig.data.DataByteArray) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 65 with DataBag

use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.

the class UnionDoublesSketchTest method accumulator.

@Test
public void accumulator() throws Exception {
    Accumulator<Tuple> func = new UnionDoublesSketch();
    // no input yet
    Tuple resultTuple = func.getValue();
    DoublesSketch sketch = getSketch(resultTuple);
    Assert.assertTrue(sketch.isEmpty());
    // null input tuple
    func.accumulate(null);
    resultTuple = func.getValue();
    sketch = getSketch(resultTuple);
    Assert.assertTrue(sketch.isEmpty());
    // empty input tuple
    func.accumulate(tupleFactory.newTuple());
    resultTuple = func.getValue();
    sketch = getSketch(resultTuple);
    Assert.assertTrue(sketch.isEmpty());
    // empty bag
    func.accumulate(tupleFactory.newTuple(bagFactory.newDefaultBag()));
    resultTuple = func.getValue();
    sketch = getSketch(resultTuple);
    Assert.assertTrue(sketch.isEmpty());
    // normal case
    DataBag bag = bagFactory.newDefaultBag();
    UpdateDoublesSketch inputSketch = DoublesSketch.builder().build();
    inputSketch.update(1.0);
    bag.add(tupleFactory.newTuple(new DataByteArray(inputSketch.toByteArray())));
    func.accumulate(tupleFactory.newTuple(bag));
    func.accumulate(tupleFactory.newTuple(bag));
    resultTuple = func.getValue();
    sketch = getSketch(resultTuple);
    Assert.assertFalse(sketch.isEmpty());
    Assert.assertEquals(sketch.getN(), 2);
    // cleanup
    func.cleanup();
    resultTuple = func.getValue();
    sketch = getSketch(resultTuple);
    Assert.assertTrue(sketch.isEmpty());
}
Also used : DoublesSketch(com.yahoo.sketches.quantiles.DoublesSketch) UpdateDoublesSketch(com.yahoo.sketches.quantiles.UpdateDoublesSketch) DataBag(org.apache.pig.data.DataBag) UpdateDoublesSketch(com.yahoo.sketches.quantiles.UpdateDoublesSketch) DataByteArray(org.apache.pig.data.DataByteArray) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Aggregations

DataBag (org.apache.pig.data.DataBag)266 Tuple (org.apache.pig.data.Tuple)223 Test (org.testng.annotations.Test)142 DataByteArray (org.apache.pig.data.DataByteArray)103 IOException (java.io.IOException)20 Estimate (com.yahoo.sketches.pig.theta.Estimate)19 EvalFunc (org.apache.pig.EvalFunc)16 HllSketch (com.yahoo.sketches.hll.HllSketch)14 DoubleSummary (com.yahoo.sketches.tuple.DoubleSummary)13 DoubleSummaryDeserializer (com.yahoo.sketches.tuple.DoubleSummaryDeserializer)13 Test (org.junit.Test)13 ArrayOfStringsSerDe (com.yahoo.sketches.ArrayOfStringsSerDe)12 ArrayOfDoublesSketch (com.yahoo.sketches.tuple.ArrayOfDoublesSketch)12 ExecException (org.apache.pig.backend.executionengine.ExecException)12 ItemsSketch (com.yahoo.sketches.frequencies.ItemsSketch)11 ArrayOfDoublesUpdatableSketchBuilder (com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder)11 Map (java.util.Map)11 ArrayOfDoublesUpdatableSketch (com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketch)10 ArrayList (java.util.ArrayList)10 HashMap (java.util.HashMap)10