Search in sources :

Example 71 with DataBag

use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.

the class UnionSketchTest method algebraicIntermediateFromIntermediate.

@Test
public void algebraicIntermediateFromIntermediate() throws Exception {
    @SuppressWarnings("unchecked") EvalFunc<Tuple> func = (EvalFunc<Tuple>) Class.forName(new UnionSketch().getIntermed()).newInstance();
    HllSketch inputSketch = new HllSketch(12);
    inputSketch.update("a");
    inputSketch.update("b");
    DataBag bag = bagFactory.newDefaultBag();
    bag.add(tupleFactory.newTuple(new DataByteArray(inputSketch.toCompactByteArray())));
    Tuple result = func.exec(tupleFactory.newTuple(bag));
    HllSketch sketch = DataToSketchTest.getSketch((DataByteArray) result.get(0));
    Assert.assertFalse(sketch.isEmpty());
    Assert.assertEquals(sketch.getEstimate(), 2.0, 0.01);
}
Also used : HllSketch(com.yahoo.sketches.hll.HllSketch) DataBag(org.apache.pig.data.DataBag) EvalFunc(org.apache.pig.EvalFunc) DataByteArray(org.apache.pig.data.DataByteArray) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 72 with DataBag

use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.

the class UnionSketchTest method algebraicFinalFromInitial.

@Test
public void algebraicFinalFromInitial() throws Exception {
    @SuppressWarnings("unchecked") EvalFunc<DataByteArray> func = (EvalFunc<DataByteArray>) Class.forName(new UnionSketch().getFinal()).getConstructor(String.class, String.class).newInstance("10", "HLL_6");
    HllSketch inputSketch = new HllSketch(12);
    inputSketch.update(1);
    inputSketch.update(2);
    inputSketch.update(3);
    DataBag outerBag = bagFactory.newDefaultBag();
    DataBag innerBag = bagFactory.newDefaultBag();
    innerBag.add(tupleFactory.newTuple(new DataByteArray(inputSketch.toCompactByteArray())));
    outerBag.add(tupleFactory.newTuple(innerBag));
    DataByteArray result = func.exec(tupleFactory.newTuple(outerBag));
    HllSketch sketch = DataToSketchTest.getSketch(result);
    Assert.assertFalse(sketch.isEmpty());
    Assert.assertEquals(sketch.getEstimate(), 3.0, 0.01);
    Assert.assertEquals(sketch.getLgConfigK(), 10);
    Assert.assertEquals(sketch.getTgtHllType(), TgtHllType.HLL_6);
}
Also used : HllSketch(com.yahoo.sketches.hll.HllSketch) DataBag(org.apache.pig.data.DataBag) EvalFunc(org.apache.pig.EvalFunc) DataByteArray(org.apache.pig.data.DataByteArray) Test(org.testng.annotations.Test)

Example 73 with DataBag

use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.

the class UnionSketchTest method algebraicIntermediateFromInitial.

@Test
public void algebraicIntermediateFromInitial() throws Exception {
    @SuppressWarnings("unchecked") EvalFunc<Tuple> func = (EvalFunc<Tuple>) Class.forName(new UnionSketch().getIntermed()).getConstructor(String.class, String.class).newInstance("10", "HLL_6");
    HllSketch inputSketch = new HllSketch(12);
    inputSketch.update(1);
    inputSketch.update(2);
    inputSketch.update(3);
    DataBag outerBag = bagFactory.newDefaultBag();
    DataBag innerBag = bagFactory.newDefaultBag();
    innerBag.add(tupleFactory.newTuple(new DataByteArray(inputSketch.toCompactByteArray())));
    outerBag.add(tupleFactory.newTuple(innerBag));
    Tuple result = func.exec(tupleFactory.newTuple(outerBag));
    HllSketch sketch = DataToSketchTest.getSketch((DataByteArray) result.get(0));
    Assert.assertFalse(sketch.isEmpty());
    Assert.assertEquals(sketch.getEstimate(), 3.0, 0.01);
    Assert.assertEquals(sketch.getLgConfigK(), 10);
    Assert.assertEquals(sketch.getTgtHllType(), TgtHllType.HLL_6);
}
Also used : HllSketch(com.yahoo.sketches.hll.HllSketch) DataBag(org.apache.pig.data.DataBag) EvalFunc(org.apache.pig.EvalFunc) DataByteArray(org.apache.pig.data.DataByteArray) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 74 with DataBag

use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.

the class DataToDoublesSketchTest method algebraicInitial.

@Test
public void algebraicInitial() throws Exception {
    EvalFunc<Tuple> func = new DataToDoublesSketch.Initial();
    DataBag bag = bagFactory.newDefaultBag();
    bag.add(tupleFactory.newTuple());
    Tuple resultTuple = func.exec(tupleFactory.newTuple(bag));
    Assert.assertNotNull(resultTuple);
    Assert.assertEquals(resultTuple.size(), 1);
    Assert.assertTrue(resultTuple.get(0) instanceof DataBag);
    Assert.assertEquals(((DataBag) resultTuple.get(0)).size(), 1);
}
Also used : DataBag(org.apache.pig.data.DataBag) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 75 with DataBag

use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.

the class DataToDoublesSketchTest method algebraicIntermediateFinalWrongType.

@Test(expectedExceptions = IllegalArgumentException.class)
public void algebraicIntermediateFinalWrongType() throws Exception {
    EvalFunc<Tuple> func = new DataToDoublesSketch.IntermediateFinal();
    DataBag bag = bagFactory.newDefaultBag();
    // this bag must have tuples with either bags or data byte arrays
    bag.add(tupleFactory.newTuple(1.0));
    func.exec(tupleFactory.newTuple(bag));
}
Also used : DataBag(org.apache.pig.data.DataBag) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Aggregations

DataBag (org.apache.pig.data.DataBag)266 Tuple (org.apache.pig.data.Tuple)223 Test (org.testng.annotations.Test)142 DataByteArray (org.apache.pig.data.DataByteArray)103 IOException (java.io.IOException)20 Estimate (com.yahoo.sketches.pig.theta.Estimate)19 EvalFunc (org.apache.pig.EvalFunc)16 HllSketch (com.yahoo.sketches.hll.HllSketch)14 DoubleSummary (com.yahoo.sketches.tuple.DoubleSummary)13 DoubleSummaryDeserializer (com.yahoo.sketches.tuple.DoubleSummaryDeserializer)13 Test (org.junit.Test)13 ArrayOfStringsSerDe (com.yahoo.sketches.ArrayOfStringsSerDe)12 ArrayOfDoublesSketch (com.yahoo.sketches.tuple.ArrayOfDoublesSketch)12 ExecException (org.apache.pig.backend.executionengine.ExecException)12 ItemsSketch (com.yahoo.sketches.frequencies.ItemsSketch)11 ArrayOfDoublesUpdatableSketchBuilder (com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder)11 Map (java.util.Map)11 ArrayOfDoublesUpdatableSketch (com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketch)10 ArrayList (java.util.ArrayList)10 HashMap (java.util.HashMap)10