use of com.yahoo.sketches.pig.theta.DataToSketch in project sketches-pig by DataSketches.
the class DataToSketchTest method testTopExec.
@Test
public void testTopExec() throws IOException {
// empty constructor, size 4096
EvalFunc<Tuple> func = new DataToSketch();
Tuple inputTuple = null;
Tuple resultTuple = func.exec(inputTuple);
Sketch sketch = tupleToSketch(resultTuple, seed_);
assertTrue(sketch.isEmpty());
inputTuple = TupleFactory.getInstance().newTuple(1);
DataBag bag = BagFactory.getInstance().newDefaultBag();
inputTuple.set(0, bag);
for (int ii = 0; ii < 64; ii++) {
Tuple dataTuple = TupleFactory.getInstance().newTuple(1);
dataTuple.set(0, ii);
bag.add(dataTuple);
}
resultTuple = func.exec(inputTuple);
assertNotNull(resultTuple);
assertEquals(resultTuple.size(), 1);
DataByteArray bytes = (DataByteArray) resultTuple.get(0);
assertTrue(bytes.size() > 0);
sketch = tupleToSketch(resultTuple, seed_);
assertEquals(sketch.getEstimate(), 64.0, 0.0);
}
use of com.yahoo.sketches.pig.theta.DataToSketch in project sketches-pig by DataSketches.
the class DataToSketchTest method outputSchemaTest.
@Test
public void outputSchemaTest() throws IOException {
EvalFunc<Tuple> udf = new DataToSketch("512");
Schema inputSchema = null;
Schema nullOutputSchema = null;
Schema outputSchema = null;
Schema outputInnerSchema = null;
Schema.FieldSchema outputOuterFs0 = null;
Schema.FieldSchema outputInnerFs0 = null;
// CHARARRAY is one of several possible inner types
inputSchema = Schema.generateNestedSchema(DataType.BAG, DataType.CHARARRAY);
nullOutputSchema = udf.outputSchema(null);
outputSchema = udf.outputSchema(inputSchema);
outputOuterFs0 = outputSchema.getField(0);
outputInnerSchema = outputOuterFs0.schema;
outputInnerFs0 = outputInnerSchema.getField(0);
Assert.assertNull(nullOutputSchema, "Should be null");
Assert.assertNotNull(outputOuterFs0, "outputSchema.getField(0) may not be null");
String expected = "tuple";
String result = DataType.findTypeName(outputOuterFs0.type);
Assert.assertEquals(result, expected);
expected = "bytearray";
Assert.assertNotNull(outputInnerFs0, "innerSchema.getField(0) may not be null");
result = DataType.findTypeName(outputInnerFs0.type);
Assert.assertEquals(result, expected);
// print schemas
// @formatter:off
StringBuilder sb = new StringBuilder();
sb.append("input schema: ").append(inputSchema).append(LS).append("output schema: ").append(outputSchema).append(LS).append("outputOuterFs: ").append(outputOuterFs0).append(", type: ").append(DataType.findTypeName(outputOuterFs0.type)).append(LS).append("outputInnerSchema: ").append(outputInnerSchema).append(LS).append("outputInnerFs0: ").append(outputInnerFs0).append(", type: ").append(DataType.findTypeName(outputInnerFs0.type)).append(LS);
println(sb.toString());
// @formatter:on
// end print schemas
}
use of com.yahoo.sketches.pig.theta.DataToSketch in project sketches-pig by DataSketches.
the class DataToSketchTest method checkSmall.
@Test
public void checkSmall() throws IOException {
EvalFunc<Tuple> func = new DataToSketch("32");
Tuple inputTuple = null;
Tuple resultTuple = func.exec(inputTuple);
Sketch sketch = tupleToSketch(resultTuple, seed_);
assertTrue(sketch.isEmpty());
inputTuple = TupleFactory.getInstance().newTuple(1);
DataBag bag = BagFactory.getInstance().newDefaultBag();
inputTuple.set(0, bag);
int u = 32;
for (int ii = 0; ii < u; ii++) {
Tuple dataTuple = TupleFactory.getInstance().newTuple(1);
dataTuple.set(0, ii);
bag.add(dataTuple);
}
resultTuple = func.exec(inputTuple);
assertNotNull(resultTuple);
assertEquals(resultTuple.size(), 1);
DataByteArray bytes = (DataByteArray) resultTuple.get(0);
assertTrue(bytes.size() > 0);
sketch = tupleToSketch(resultTuple, seed_);
assertEquals(sketch.getEstimate(), u, 0.0);
}
Aggregations