use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.
the class DataToFrequentStringsSketchTest method execWrongCountType.
@Test(expectedExceptions = ClassCastException.class)
public void execWrongCountType() throws Exception {
EvalFunc<Tuple> func = new DataToFrequentStringsSketch("8");
DataBag bag = BagFactory.getInstance().newDefaultBag();
// integer count is not supported
bag.add(PigUtil.objectsToTuple("a", 1));
Tuple inputTuple = PigUtil.objectsToTuple(bag);
func.exec(inputTuple);
}
use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.
the class DataToFrequentStringsSketchTest method algebraicInitial.
@Test
public void algebraicInitial() throws Exception {
EvalFunc<Tuple> func = new DataToFrequentStringsSketch.Initial(null);
Tuple inputTuple = TupleFactory.getInstance().newTuple(1);
DataBag bag = BagFactory.getInstance().newDefaultBag();
bag.add(PigUtil.objectsToTuple(null, null));
bag.add(PigUtil.objectsToTuple(null, null));
bag.add(PigUtil.objectsToTuple(null, null));
inputTuple.set(0, bag);
Tuple resultTuple = func.exec(inputTuple);
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 1);
DataBag resultBag = (DataBag) resultTuple.get(0);
Assert.assertEquals(resultBag.size(), 3);
}
use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.
the class FrequentStringsSketchToEstimatesTest method estimation.
@Test
public void estimation() throws Exception {
ItemsSketch<String> sketch = new ItemsSketch<String>(8);
sketch.update("1", 1000);
sketch.update("2", 500);
sketch.update("3", 200);
sketch.update("4", 100);
sketch.update("5", 50);
sketch.update("6", 20);
sketch.update("7", 10);
sketch.update("8", 5);
sketch.update("9", 2);
sketch.update("10");
Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe())));
EvalFunc<DataBag> func1 = new FrequentStringsSketchToEstimates("NO_FALSE_POSITIVES");
DataBag bag1 = func1.exec(inputTuple);
Assert.assertNotNull(bag1);
Assert.assertTrue(bag1.size() < 10);
EvalFunc<DataBag> func2 = new FrequentStringsSketchToEstimates("NO_FALSE_NEGATIVES");
DataBag bag2 = func2.exec(inputTuple);
Assert.assertNotNull(bag2);
Assert.assertTrue(bag2.size() < 10);
Assert.assertTrue(bag1.size() < bag2.size());
}
use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.
the class FrequentStringsSketchToEstimatesTest method schema.
@Test
public void schema() throws Exception {
EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates();
Schema schema = func.outputSchema(null);
Assert.assertNotNull(schema);
Assert.assertEquals(schema.size(), 1);
Assert.assertEquals(schema.getField(0).type, DataType.BAG);
Assert.assertEquals(schema.getField(0).schema.size(), 1);
Assert.assertEquals(schema.getField(0).schema.getField(0).type, DataType.TUPLE);
Assert.assertEquals(schema.getField(0).schema.getField(0).schema.size(), 4);
Assert.assertEquals(schema.getField(0).schema.getField(0).schema.getField(0).type, DataType.CHARARRAY);
Assert.assertEquals(schema.getField(0).schema.getField(0).schema.getField(1).type, DataType.LONG);
Assert.assertEquals(schema.getField(0).schema.getField(0).schema.getField(2).type, DataType.LONG);
Assert.assertEquals(schema.getField(0).schema.getField(0).schema.getField(3).type, DataType.LONG);
}
use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.
the class FrequentStringsSketchToEstimatesTest method nullInput.
@Test
public void nullInput() throws Exception {
EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates();
DataBag bag = func.exec(null);
Assert.assertNull(bag);
}
Aggregations