use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.
the class UnionDoublesSketchTest method algebraicInitial.
@Test
public void algebraicInitial() throws Exception {
EvalFunc<Tuple> func = new UnionDoublesSketch.Initial();
DataBag bag = bagFactory.newDefaultBag();
bag.add(tupleFactory.newTuple());
Tuple resultTuple = func.exec(tupleFactory.newTuple(bag));
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 1);
Assert.assertTrue(resultTuple.get(0) instanceof DataBag);
Assert.assertEquals(((DataBag) resultTuple.get(0)).size(), 1);
}
use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.
the class UnionStringsSketchTest method algebraicInitial.
@Test
public void algebraicInitial() throws Exception {
@SuppressWarnings("unchecked") EvalFunc<Tuple> func = (EvalFunc<Tuple>) Class.forName(new UnionStringsSketch().getInitial()).newInstance();
DataBag bag = BAG_FACTORY.newDefaultBag();
bag.add(TUPLE_FACTORY.newTuple());
Tuple resultTuple = func.exec(TUPLE_FACTORY.newTuple(bag));
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 1);
Assert.assertTrue(resultTuple.get(0) instanceof DataBag);
Assert.assertEquals(((DataBag) resultTuple.get(0)).size(), 1);
}
use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.
the class GetVarOptSamplesTest method checkExec.
@Test
public void checkExec() {
final int k = 10;
// exact mode
final int n = 25;
final GetVarOptSamples udf = new GetVarOptSamples();
try {
final VarOptItemsSketch<Tuple> vis = VarOptItemsSketch.newInstance(k);
double cumWt = 0.0;
for (int i = 1; i <= n; ++i) {
final Tuple t = TupleFactory.getInstance().newTuple(2);
final double wt = 1.0 * i;
t.set(0, wt);
t.set(1, i);
vis.update(t, wt);
cumWt += wt;
}
final DataByteArray dba = new DataByteArray(vis.toByteArray(serDe_));
final Tuple inputTuple = TupleFactory.getInstance().newTuple(dba);
final DataBag result = udf.exec(inputTuple);
double cumResultWt = 0.0;
for (Tuple sample : result) {
cumResultWt += (double) sample.get(0);
final Tuple record = (Tuple) sample.get(1);
final int id = (int) record.get(1);
assertTrue(id >= 1 && id <= n);
}
assertEquals(cumResultWt, cumWt, EPS);
} catch (final IOException e) {
fail("Unexpected IOException" + e.getMessage());
}
}
use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.
the class DataToSketchTest method execVariousTypesOfInput.
@Test
public void execVariousTypesOfInput() throws Exception {
EvalFunc<DataByteArray> func = new DataToSketch();
DataBag bag = bagFactory.newDefaultBag();
Tuple tupleWithNull = tupleFactory.newTuple(1);
tupleWithNull.set(0, null);
bag.add(tupleWithNull);
bag.add(tupleFactory.newTuple(new Byte((byte) 1)));
bag.add(tupleFactory.newTuple(new Integer(2)));
bag.add(tupleFactory.newTuple(new Long(3)));
bag.add(tupleFactory.newTuple(new Float(1)));
bag.add(tupleFactory.newTuple(new Double(2)));
bag.add(tupleFactory.newTuple(new DataByteArray(new byte[] { (byte) 1 })));
bag.add(tupleFactory.newTuple("a"));
DataByteArray result = func.exec(tupleFactory.newTuple(bag));
HllSketch sketch = getSketch(result);
Assert.assertFalse(sketch.isEmpty());
Assert.assertEquals(sketch.getEstimate(), 7.0, 0.01);
}
use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.
the class DataToSketchTest method algebraicFinalFromIntermediate.
@Test
public void algebraicFinalFromIntermediate() throws Exception {
@SuppressWarnings("unchecked") EvalFunc<DataByteArray> func = (EvalFunc<DataByteArray>) Class.forName(new DataToSketch().getFinal()).newInstance();
HllSketch inputSketch = new HllSketch(12);
inputSketch.update("a");
inputSketch.update("b");
DataBag bag = bagFactory.newDefaultBag();
bag.add(tupleFactory.newTuple(new DataByteArray(inputSketch.toCompactByteArray())));
DataByteArray result = func.exec(tupleFactory.newTuple(bag));
HllSketch sketch = getSketch(result);
Assert.assertFalse(sketch.isEmpty());
Assert.assertEquals(sketch.getEstimate(), 2.0, 0.01);
}
Aggregations