use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.
the class DataToSketchTest method testAccumulate.
@Test
public void testAccumulate() throws IOException {
Accumulator<Tuple> func = new DataToSketch("128");
Tuple inputTuple = TupleFactory.getInstance().newTuple(1);
DataBag bag = BagFactory.getInstance().newDefaultBag();
inputTuple.set(0, bag);
for (int ii = 0; ii < 64; ii++) {
Tuple dataTuple = TupleFactory.getInstance().newTuple(1);
dataTuple.set(0, ii);
bag.add(dataTuple);
}
func.accumulate(inputTuple);
inputTuple = TupleFactory.getInstance().newTuple(1);
bag = BagFactory.getInstance().newDefaultBag();
inputTuple.set(0, bag);
for (int ii = 0; ii < 27; ii++) {
Tuple dataTuple = TupleFactory.getInstance().newTuple(1);
dataTuple.set(0, 64 + ii);
bag.add(dataTuple);
}
func.accumulate(inputTuple);
Tuple resultTuple = func.getValue();
assertNotNull(resultTuple);
assertEquals(resultTuple.size(), 1);
DataByteArray bytes = (DataByteArray) resultTuple.get(0);
assertTrue(bytes.size() > 0);
Sketch sketch = tupleToSketch(resultTuple, seed_);
assertEquals(sketch.getEstimate(), 91.0, 0.0);
// after cleanup, the value should always be 0
func.cleanup();
resultTuple = func.getValue();
assertNotNull(resultTuple);
assertEquals(resultTuple.size(), 1);
bytes = (DataByteArray) resultTuple.get(0);
assertTrue(bytes.size() > 0);
sketch = tupleToSketch(resultTuple, seed_);
assertEquals(sketch.getEstimate(), 0.0, 0.0);
}
use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.
the class DataToSketchTest method testInitial.
@Test
public void testInitial() throws IOException {
EvalFunc<Tuple> func = new DataToSketch.Initial("128");
Tuple inputTuple = TupleFactory.getInstance().newTuple(1);
DataBag bag = BagFactory.getInstance().newDefaultBag();
inputTuple.set(0, bag);
for (int ii = 0; ii < 64; ii++) {
Tuple dataTuple = TupleFactory.getInstance().newTuple(1);
dataTuple.set(0, ii);
bag.add(dataTuple);
}
Tuple resultTuple = func.exec(inputTuple);
assertNotNull(resultTuple);
assertEquals(resultTuple.size(), 1);
DataBag resultBag = (DataBag) resultTuple.get(0);
assertEquals(resultBag.size(), 64);
}
use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.
the class DataToSketchTest method checkAlgFinalOuterBagEmptyTuples.
@Test
public void checkAlgFinalOuterBagEmptyTuples() throws IOException {
EvalFunc<Tuple> interFuncFinal = new DataToSketch.IntermediateFinal("256");
EvalFunc<Double> estFunc = new Estimate();
Tuple inputTuple = TupleFactory.getInstance().newTuple(1);
Tuple resultTuple = interFuncFinal.exec(inputTuple);
assertEquals(estFunc.exec(resultTuple), 0.0, 0.0);
DataBag bag = BagFactory.getInstance().newDefaultBag();
// inputTuple.bag0:null
inputTuple.set(0, bag);
resultTuple = interFuncFinal.exec(inputTuple);
assertEquals(estFunc.exec(resultTuple), 0.0, 0.0);
Tuple innerTuple = TupleFactory.getInstance().newTuple(1);
bag.add(innerTuple);
resultTuple = interFuncFinal.exec(inputTuple);
assertEquals(estFunc.exec(resultTuple), 0.0, 0.0);
}
use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.
the class DataToSketchTest method textTopExec2.
/*
* DataToSketch <br>
* Tests all possible data types: NULL, BYTE, INTEGER, LONG, FLOAT, DOUBLE,
* BYTEARRAY, CHARARRAY. Tests rejection of a non-simple type.
*/
// still triggers unchecked warning
@SuppressWarnings("unchecked")
@Test
public void textTopExec2() throws IOException {
TupleFactory tupleFactory = TupleFactory.getInstance();
BagFactory bagFactory = BagFactory.getInstance();
String[] ctorArgs = { "128" };
EvalFunc<Tuple> dataUdf = (EvalFunc<Tuple>) PigContext.instantiateFuncFromSpec(new FuncSpec(udfName, ctorArgs));
// EvalFunc<Tuple> resultUdf = (EvalFunc<Tuple>)PigContext.
// instantiateFuncFromSpec(new FuncSpec(resultUdfName));
Tuple t;
DataBag bag = bagFactory.newDefaultBag();
// empty with a null
bag.add(tupleFactory.newTuple());
// 1 empty field
bag.add(tupleFactory.newTuple(1));
// 1
t = tupleFactory.newTuple(1);
t.set(0, new Byte((byte) 1));
bag.add(t);
// 2
t = tupleFactory.newTuple(1);
// int
t.set(0, new Integer(2));
bag.add(t);
// 3
t = tupleFactory.newTuple(1);
t.set(0, new Long(3));
bag.add(t);
// 4
t = tupleFactory.newTuple(1);
t.set(0, new Float(4));
bag.add(t);
// 5
t = tupleFactory.newTuple(1);
t.set(0, new Double(5));
bag.add(t);
// 6
t = tupleFactory.newTuple(1);
byte[] bArr = { 1, 2, 3 };
t.set(0, new DataByteArray(bArr));
bag.add(t);
// -ignore
t = tupleFactory.newTuple(1);
// empty
byte[] bArr2 = new byte[0];
t.set(0, new DataByteArray(bArr2));
bag.add(t);
// 7
t = tupleFactory.newTuple(1);
t.set(0, new Double(-0.0));
bag.add(t);
// 7 duplicate
t = tupleFactory.newTuple(1);
t.set(0, new Double(0.0));
bag.add(t);
// 8
t = tupleFactory.newTuple(1);
String s = "abcde";
t.set(0, s);
bag.add(t);
// - ignore
t = tupleFactory.newTuple(1);
// empty
String s2 = "";
t.set(0, s2);
bag.add(t);
Tuple in = tupleFactory.newTuple(1);
in.set(0, bag);
// should return a sketch
Tuple resultTuple = dataUdf.exec(in);
assertNotNull(resultTuple);
assertEquals(resultTuple.size(), 1);
DataByteArray bytes = (DataByteArray) resultTuple.get(0);
assertTrue(bytes.size() > 0);
Sketch sketch = tupleToSketch(resultTuple, seed_);
assertEquals(sketch.getEstimate(), 8.0, 0.0);
}
use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.
the class DataToSketchTest method checkAlgFinalInnerNotDBA.
@Test(expectedExceptions = IllegalArgumentException.class)
public void checkAlgFinalInnerNotDBA() throws IOException {
EvalFunc<Tuple> interFuncFinal = new DataToSketch.IntermediateFinal("256");
EvalFunc<Double> estFunc = new Estimate();
Tuple inputTuple = TupleFactory.getInstance().newTuple(1);
Tuple resultTuple = interFuncFinal.exec(inputTuple);
assertEquals(estFunc.exec(resultTuple), 0.0, 0.0);
DataBag bag = BagFactory.getInstance().newDefaultBag();
// inputTuple.bag0:null
inputTuple.set(0, bag);
resultTuple = interFuncFinal.exec(inputTuple);
assertEquals(estFunc.exec(resultTuple), 0.0, 0.0);
Tuple innerTuple = TupleFactory.getInstance().newTuple(1);
bag.add(innerTuple);
// not a DBA
innerTuple.set(0, new Double(1.0));
resultTuple = interFuncFinal.exec(inputTuple);
assertEquals(estFunc.exec(resultTuple), 0.0, 0.0);
}
Aggregations