use of com.yahoo.sketches.hll.HllSketch in project sketches-pig by DataSketches.
the class SketchToEstimateAndErrorBounds method exec.
@Override
public Tuple exec(final Tuple sketchTuple) throws IOException {
if ((sketchTuple == null) || (sketchTuple.size() == 0)) {
return null;
}
final DataByteArray dba = (DataByteArray) sketchTuple.get(0);
final HllSketch sketch = HllSketch.wrap(Memory.wrap(dba.get()));
final Tuple outputTuple = TupleFactory.getInstance().newTuple(3);
outputTuple.set(0, Double.valueOf(sketch.getEstimate()));
outputTuple.set(1, Double.valueOf(sketch.getLowerBound(2)));
outputTuple.set(2, Double.valueOf(sketch.getUpperBound(2)));
return outputTuple;
}
use of com.yahoo.sketches.hll.HllSketch in project sketches-pig by DataSketches.
the class SketchToString method exec.
@Override
public String exec(final Tuple sketchTuple) throws IOException {
if ((sketchTuple == null) || (sketchTuple.size() == 0)) {
return null;
}
final DataByteArray dba = (DataByteArray) sketchTuple.get(0);
final HllSketch sketch = HllSketch.wrap(Memory.wrap(dba.get()));
return sketch.toString(true, hllDetail_, auxDetail_);
}
use of com.yahoo.sketches.hll.HllSketch in project sketches-pig by DataSketches.
the class UnionSketch method exec.
/**
* Top-level exec function.
* This method accepts an input Tuple containing a Bag of one or more inner <b>Sketch Tuples</b>
* and returns a single serialized HllSketch as a DataByteArray.
*
* <b>Sketch Tuple</b> is a Tuple containing a single DataByteArray (BYTEARRAY in Pig), which
* is a serialized HllSketch.
*
* @param inputTuple A tuple containing a single bag, containing Sketch Tuples.
* @return serialized HllSketch
* @see "org.apache.pig.EvalFunc.exec(org.apache.pig.data.Tuple)"
* @throws IOException from Pig.
*/
@Override
public DataByteArray exec(final Tuple inputTuple) throws IOException {
if (isFirstCall_) {
Logger.getLogger(getClass()).info("Exec was used");
isFirstCall_ = false;
}
if (inputTuple == null || inputTuple.size() == 0) {
if (emptySketch_ == null) {
emptySketch_ = new DataByteArray(new HllSketch(lgK_, tgtHllType_).toCompactByteArray());
}
return emptySketch_;
}
final Union union = new Union(lgK_);
final DataBag bag = (DataBag) inputTuple.get(0);
updateUnion(bag, union);
return new DataByteArray(union.getResult(tgtHllType_).toCompactByteArray());
}
use of com.yahoo.sketches.hll.HllSketch in project sketches-pig by DataSketches.
the class DataToSketchTest method algebraicIntermediateEmptyInputTuple.
@Test
public void algebraicIntermediateEmptyInputTuple() throws Exception {
@SuppressWarnings("unchecked") EvalFunc<Tuple> func = (EvalFunc<Tuple>) Class.forName(new DataToSketch().getIntermed()).getConstructor(String.class).newInstance("10");
Tuple result = func.exec(tupleFactory.newTuple());
HllSketch sketch = getSketch((DataByteArray) result.get(0));
Assert.assertTrue(sketch.isEmpty());
Assert.assertEquals(sketch.getLgConfigK(), 10);
}
use of com.yahoo.sketches.hll.HllSketch in project sketches-pig by DataSketches.
the class DataToSketchTest method execVariousTypesOfInput.
@Test
public void execVariousTypesOfInput() throws Exception {
EvalFunc<DataByteArray> func = new DataToSketch();
DataBag bag = bagFactory.newDefaultBag();
Tuple tupleWithNull = tupleFactory.newTuple(1);
tupleWithNull.set(0, null);
bag.add(tupleWithNull);
bag.add(tupleFactory.newTuple(new Byte((byte) 1)));
bag.add(tupleFactory.newTuple(new Integer(2)));
bag.add(tupleFactory.newTuple(new Long(3)));
bag.add(tupleFactory.newTuple(new Float(1)));
bag.add(tupleFactory.newTuple(new Double(2)));
bag.add(tupleFactory.newTuple(new DataByteArray(new byte[] { (byte) 1 })));
bag.add(tupleFactory.newTuple("a"));
DataByteArray result = func.exec(tupleFactory.newTuple(bag));
HllSketch sketch = getSketch(result);
Assert.assertFalse(sketch.isEmpty());
Assert.assertEquals(sketch.getEstimate(), 7.0, 0.01);
}
Aggregations