Search in sources :

Example 66 with DataBag

use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.

the class UnionDoublesSketchTest method algebraicInitial.

@Test
public void algebraicInitial() throws Exception {
    EvalFunc<Tuple> func = new UnionDoublesSketch.Initial();
    DataBag bag = bagFactory.newDefaultBag();
    bag.add(tupleFactory.newTuple());
    Tuple resultTuple = func.exec(tupleFactory.newTuple(bag));
    Assert.assertNotNull(resultTuple);
    Assert.assertEquals(resultTuple.size(), 1);
    Assert.assertTrue(resultTuple.get(0) instanceof DataBag);
    Assert.assertEquals(((DataBag) resultTuple.get(0)).size(), 1);
}
Also used : DataBag(org.apache.pig.data.DataBag) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 67 with DataBag

use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.

the class UnionStringsSketchTest method algebraicInitial.

@Test
public void algebraicInitial() throws Exception {
    @SuppressWarnings("unchecked") EvalFunc<Tuple> func = (EvalFunc<Tuple>) Class.forName(new UnionStringsSketch().getInitial()).newInstance();
    DataBag bag = BAG_FACTORY.newDefaultBag();
    bag.add(TUPLE_FACTORY.newTuple());
    Tuple resultTuple = func.exec(TUPLE_FACTORY.newTuple(bag));
    Assert.assertNotNull(resultTuple);
    Assert.assertEquals(resultTuple.size(), 1);
    Assert.assertTrue(resultTuple.get(0) instanceof DataBag);
    Assert.assertEquals(((DataBag) resultTuple.get(0)).size(), 1);
}
Also used : DataBag(org.apache.pig.data.DataBag) EvalFunc(org.apache.pig.EvalFunc) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 68 with DataBag

use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.

the class GetVarOptSamplesTest method checkExec.

@Test
public void checkExec() {
    final int k = 10;
    // exact mode
    final int n = 25;
    final GetVarOptSamples udf = new GetVarOptSamples();
    try {
        final VarOptItemsSketch<Tuple> vis = VarOptItemsSketch.newInstance(k);
        double cumWt = 0.0;
        for (int i = 1; i <= n; ++i) {
            final Tuple t = TupleFactory.getInstance().newTuple(2);
            final double wt = 1.0 * i;
            t.set(0, wt);
            t.set(1, i);
            vis.update(t, wt);
            cumWt += wt;
        }
        final DataByteArray dba = new DataByteArray(vis.toByteArray(serDe_));
        final Tuple inputTuple = TupleFactory.getInstance().newTuple(dba);
        final DataBag result = udf.exec(inputTuple);
        double cumResultWt = 0.0;
        for (Tuple sample : result) {
            cumResultWt += (double) sample.get(0);
            final Tuple record = (Tuple) sample.get(1);
            final int id = (int) record.get(1);
            assertTrue(id >= 1 && id <= n);
        }
        assertEquals(cumResultWt, cumWt, EPS);
    } catch (final IOException e) {
        fail("Unexpected IOException" + e.getMessage());
    }
}
Also used : DataBag(org.apache.pig.data.DataBag) IOException(java.io.IOException) DataByteArray(org.apache.pig.data.DataByteArray) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 69 with DataBag

use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.

the class DataToSketchTest method execVariousTypesOfInput.

@Test
public void execVariousTypesOfInput() throws Exception {
    EvalFunc<DataByteArray> func = new DataToSketch();
    DataBag bag = bagFactory.newDefaultBag();
    Tuple tupleWithNull = tupleFactory.newTuple(1);
    tupleWithNull.set(0, null);
    bag.add(tupleWithNull);
    bag.add(tupleFactory.newTuple(new Byte((byte) 1)));
    bag.add(tupleFactory.newTuple(new Integer(2)));
    bag.add(tupleFactory.newTuple(new Long(3)));
    bag.add(tupleFactory.newTuple(new Float(1)));
    bag.add(tupleFactory.newTuple(new Double(2)));
    bag.add(tupleFactory.newTuple(new DataByteArray(new byte[] { (byte) 1 })));
    bag.add(tupleFactory.newTuple("a"));
    DataByteArray result = func.exec(tupleFactory.newTuple(bag));
    HllSketch sketch = getSketch(result);
    Assert.assertFalse(sketch.isEmpty());
    Assert.assertEquals(sketch.getEstimate(), 7.0, 0.01);
}
Also used : HllSketch(com.yahoo.sketches.hll.HllSketch) DataBag(org.apache.pig.data.DataBag) DataByteArray(org.apache.pig.data.DataByteArray) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 70 with DataBag

use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.

the class DataToSketchTest method algebraicFinalFromIntermediate.

@Test
public void algebraicFinalFromIntermediate() throws Exception {
    @SuppressWarnings("unchecked") EvalFunc<DataByteArray> func = (EvalFunc<DataByteArray>) Class.forName(new DataToSketch().getFinal()).newInstance();
    HllSketch inputSketch = new HllSketch(12);
    inputSketch.update("a");
    inputSketch.update("b");
    DataBag bag = bagFactory.newDefaultBag();
    bag.add(tupleFactory.newTuple(new DataByteArray(inputSketch.toCompactByteArray())));
    DataByteArray result = func.exec(tupleFactory.newTuple(bag));
    HllSketch sketch = getSketch(result);
    Assert.assertFalse(sketch.isEmpty());
    Assert.assertEquals(sketch.getEstimate(), 2.0, 0.01);
}
Also used : HllSketch(com.yahoo.sketches.hll.HllSketch) DataBag(org.apache.pig.data.DataBag) EvalFunc(org.apache.pig.EvalFunc) DataByteArray(org.apache.pig.data.DataByteArray) Test(org.testng.annotations.Test)

Aggregations

DataBag (org.apache.pig.data.DataBag)266 Tuple (org.apache.pig.data.Tuple)223 Test (org.testng.annotations.Test)142 DataByteArray (org.apache.pig.data.DataByteArray)103 IOException (java.io.IOException)20 Estimate (com.yahoo.sketches.pig.theta.Estimate)19 EvalFunc (org.apache.pig.EvalFunc)16 HllSketch (com.yahoo.sketches.hll.HllSketch)14 DoubleSummary (com.yahoo.sketches.tuple.DoubleSummary)13 DoubleSummaryDeserializer (com.yahoo.sketches.tuple.DoubleSummaryDeserializer)13 Test (org.junit.Test)13 ArrayOfStringsSerDe (com.yahoo.sketches.ArrayOfStringsSerDe)12 ArrayOfDoublesSketch (com.yahoo.sketches.tuple.ArrayOfDoublesSketch)12 ExecException (org.apache.pig.backend.executionengine.ExecException)12 ItemsSketch (com.yahoo.sketches.frequencies.ItemsSketch)11 ArrayOfDoublesUpdatableSketchBuilder (com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder)11 Map (java.util.Map)11 ArrayOfDoublesUpdatableSketch (com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketch)10 ArrayList (java.util.ArrayList)10 HashMap (java.util.HashMap)10