Search in sources :

Example 31 with EvalFunc

use of org.apache.pig.EvalFunc in project sketches-pig by DataSketches.

the class UnionSketchTest method algebraicFinalFromIntermediate.

@Test
public void algebraicFinalFromIntermediate() throws Exception {
    @SuppressWarnings("unchecked") EvalFunc<DataByteArray> func = (EvalFunc<DataByteArray>) Class.forName(new UnionSketch().getFinal()).newInstance();
    HllSketch inputSketch = new HllSketch(12);
    inputSketch.update("a");
    inputSketch.update("b");
    DataBag bag = bagFactory.newDefaultBag();
    bag.add(tupleFactory.newTuple(new DataByteArray(inputSketch.toCompactByteArray())));
    DataByteArray result = func.exec(tupleFactory.newTuple(bag));
    HllSketch sketch = DataToSketchTest.getSketch(result);
    Assert.assertFalse(sketch.isEmpty());
    Assert.assertEquals(sketch.getEstimate(), 2.0, 0.01);
}
Also used : HllSketch(com.yahoo.sketches.hll.HllSketch) DataBag(org.apache.pig.data.DataBag) EvalFunc(org.apache.pig.EvalFunc) DataByteArray(org.apache.pig.data.DataByteArray) Test(org.testng.annotations.Test)

Example 32 with EvalFunc

use of org.apache.pig.EvalFunc in project sketches-pig by DataSketches.

the class DataToStringsSketchTest method algebraicInitial.

@Test
public void algebraicInitial() throws Exception {
    @SuppressWarnings("unchecked") EvalFunc<Tuple> func = (EvalFunc<Tuple>) Class.forName(new DataToStringsSketch().getInitial()).newInstance();
    DataBag bag = BAG_FACTORY.newDefaultBag();
    bag.add(TUPLE_FACTORY.newTuple());
    Tuple resultTuple = func.exec(TUPLE_FACTORY.newTuple(bag));
    Assert.assertNotNull(resultTuple);
    Assert.assertEquals(resultTuple.size(), 1);
    Assert.assertTrue(resultTuple.get(0) instanceof DataBag);
    Assert.assertEquals(((DataBag) resultTuple.get(0)).size(), 1);
}
Also used : DataBag(org.apache.pig.data.DataBag) EvalFunc(org.apache.pig.EvalFunc) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 33 with EvalFunc

use of org.apache.pig.EvalFunc in project sketches-pig by DataSketches.

the class DataToStringsSketchTest method algebraicIntermediateFinalNormalCase.

@Test
public void algebraicIntermediateFinalNormalCase() throws Exception {
    @SuppressWarnings("unchecked") EvalFunc<Tuple> func = (EvalFunc<Tuple>) Class.forName(new DataToStringsSketch().getIntermed()).newInstance();
    DataBag bag = BAG_FACTORY.newDefaultBag();
    {
        // this is to simulate an output from Initial
        DataBag innerBag = BAG_FACTORY.newDefaultBag();
        innerBag.add(TUPLE_FACTORY.newTuple("a"));
        bag.add(TUPLE_FACTORY.newTuple(innerBag));
    }
    {
        // this is to simulate an output from a prior call of IntermediateFinal
        ItemsSketch<String> qs = ItemsSketch.getInstance(COMPARATOR);
        qs.update("b");
        bag.add(TUPLE_FACTORY.newTuple(new DataByteArray(qs.toByteArray(SER_DE))));
    }
    Tuple resultTuple = func.exec(TUPLE_FACTORY.newTuple(bag));
    ItemsSketch<String> sketch = getSketch(resultTuple);
    Assert.assertFalse(sketch.isEmpty());
    Assert.assertEquals(sketch.getN(), 2);
}
Also used : DataBag(org.apache.pig.data.DataBag) EvalFunc(org.apache.pig.EvalFunc) ItemsSketch(com.yahoo.sketches.quantiles.ItemsSketch) DataByteArray(org.apache.pig.data.DataByteArray) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 34 with EvalFunc

use of org.apache.pig.EvalFunc in project sketches-pig by DataSketches.

the class DataToStringsSketchTest method algebraicIntermediateFinalWrongType.

@Test(expectedExceptions = IllegalArgumentException.class)
public void algebraicIntermediateFinalWrongType() throws Exception {
    @SuppressWarnings("unchecked") EvalFunc<Tuple> func = (EvalFunc<Tuple>) Class.forName(new DataToStringsSketch().getIntermed()).newInstance();
    DataBag bag = BAG_FACTORY.newDefaultBag();
    // this bag must have tuples with either bags or data byte arrays
    bag.add(TUPLE_FACTORY.newTuple("a"));
    func.exec(TUPLE_FACTORY.newTuple(bag));
}
Also used : DataBag(org.apache.pig.data.DataBag) EvalFunc(org.apache.pig.EvalFunc) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 35 with EvalFunc

use of org.apache.pig.EvalFunc in project sketches-pig by DataSketches.

the class UnionStringsSketchTest method algebraicIntermediateFinalNormalCase.

@Test
public void algebraicIntermediateFinalNormalCase() throws Exception {
    @SuppressWarnings("unchecked") EvalFunc<Tuple> func = (EvalFunc<Tuple>) Class.forName(new UnionStringsSketch().getIntermed()).newInstance();
    DataBag bag = BAG_FACTORY.newDefaultBag();
    {
        // this is to simulate an output from Initial
        DataBag innerBag = BAG_FACTORY.newDefaultBag();
        ItemsSketch<String> qs = ItemsSketch.getInstance(COMPARATOR);
        qs.update("a");
        innerBag.add(TUPLE_FACTORY.newTuple(new DataByteArray(qs.toByteArray(SER_DE))));
        bag.add(TUPLE_FACTORY.newTuple(innerBag));
    }
    {
        // this is to simulate an output from a prior call of IntermediateFinal
        ItemsSketch<String> qs = ItemsSketch.getInstance(COMPARATOR);
        qs.update("b");
        bag.add(TUPLE_FACTORY.newTuple(new DataByteArray(qs.toByteArray(SER_DE))));
    }
    Tuple resultTuple = func.exec(TUPLE_FACTORY.newTuple(bag));
    ItemsSketch<String> sketch = getSketch(resultTuple);
    Assert.assertFalse(sketch.isEmpty());
    Assert.assertEquals(sketch.getN(), 2);
}
Also used : DataBag(org.apache.pig.data.DataBag) EvalFunc(org.apache.pig.EvalFunc) ItemsSketch(com.yahoo.sketches.quantiles.ItemsSketch) DataByteArray(org.apache.pig.data.DataByteArray) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Aggregations

EvalFunc (org.apache.pig.EvalFunc)44 Test (org.testng.annotations.Test)44 Tuple (org.apache.pig.data.Tuple)35 HllSketch (com.yahoo.sketches.hll.HllSketch)18 DataBag (org.apache.pig.data.DataBag)16 DataByteArray (org.apache.pig.data.DataByteArray)16 FuncSpec (org.apache.pig.FuncSpec)12 ItemsSketch (com.yahoo.sketches.quantiles.ItemsSketch)2 BagFactory (org.apache.pig.data.BagFactory)2 TupleFactory (org.apache.pig.data.TupleFactory)2 DataToSketch (com.yahoo.sketches.pig.theta.DataToSketch)1 PigUtil.tupleToSketch (com.yahoo.sketches.pig.theta.PigUtil.tupleToSketch)1 Sketch (com.yahoo.sketches.theta.Sketch)1 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)1