use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.
the class DataToArrayOfDoublesSketchBase method exec.
@Override
public Tuple exec(final Tuple inputTuple) throws IOException {
if (isFirstCall_) {
// this is to see in the log which way was used by Pig
Logger.getLogger(getClass()).info("exec is used");
isFirstCall_ = false;
}
if ((inputTuple == null) || (inputTuple.size() == 0)) {
return null;
}
if (inputTuple.size() != 1) {
throw new IllegalArgumentException("Input tuple must have 1 bag");
}
final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(sketchSize_).setSamplingProbability(samplingProbability_).setNumberOfValues(numValues_).build();
final DataBag bag = (DataBag) inputTuple.get(0);
updateSketch(bag, sketch, numValues_);
return Util.tupleFactory.newTuple(new DataByteArray(sketch.compact().toByteArray()));
}
use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.
the class DataToSketch method accumulate.
@Override
public void accumulate(final Tuple inputTuple) throws IOException {
if (isFirstCall_) {
// this is to see in the log which way was used by Pig
Logger.getLogger(getClass()).info("accumulate is used");
isFirstCall_ = false;
}
if (accumSketch_ == null) {
accumSketch_ = sketchBuilder_.build();
}
if (inputTuple.size() != 1) {
throw new IllegalArgumentException("Input tuple must have 1 bag");
}
final DataBag bag = (DataBag) inputTuple.get(0);
updateSketch(bag, accumSketch_);
}
use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.
the class DataToSketch method exec.
@Override
public Tuple exec(final Tuple inputTuple) throws IOException {
if (isFirstCall_) {
// this is to see in the log which way was used by Pig
Logger.getLogger(getClass()).info("exec is used");
isFirstCall_ = false;
}
if ((inputTuple == null) || (inputTuple.size() == 0)) {
return null;
}
if (inputTuple.size() != 1) {
throw new IllegalArgumentException("Input tuple must have 1 bag");
}
final UpdatableSketch<U, S> sketch = sketchBuilder_.build();
final DataBag bag = (DataBag) inputTuple.get(0);
updateSketch(bag, sketch);
return Util.tupleFactory.newTuple(new DataByteArray(sketch.compact().toByteArray()));
}
use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.
the class DataToSketchAlgebraicIntermediateFinal method exec.
@Override
public Tuple exec(final Tuple inputTuple) throws IOException {
if (isFirstCall_) {
// this is to see in the log which way was used by Pig
Logger.getLogger(getClass()).info("algebraic is used");
isFirstCall_ = false;
}
final Union<S> union = new Union<S>(sketchSize_, summarySetOps_);
final DataBag bag = (DataBag) inputTuple.get(0);
if (bag == null) {
throw new IllegalArgumentException("InputTuple.Field0: Bag may not be null");
}
for (final Tuple dataTuple : bag) {
final Object item = dataTuple.get(0);
if (item instanceof DataBag) {
// this is a bag from the Initial function.
// just insert each item of the tuple into the sketch
final UpdatableSketch<U, S> sketch = sketchBuilder_.build();
DataToSketch.updateSketch((DataBag) item, sketch);
union.update(sketch);
} else if (item instanceof DataByteArray) {
// This is a sketch from a prior call to the
// Intermediate function. merge it with the
// current sketch.
final Sketch<S> incomingSketch = Util.deserializeSketchFromTuple(dataTuple, summaryDeserializer_);
union.update(incomingSketch);
} else {
// we should never get here.
throw new IllegalArgumentException("InputTuple.Field0: Bag contains unrecognized types: " + item.getClass().getName());
}
}
return Util.tupleFactory.newTuple(new DataByteArray(union.getResult().toByteArray()));
}
use of org.apache.pig.data.DataBag in project sketches-pig by DataSketches.
the class UnionDoublesSketch method exec.
// @formatter:off
/**
* Top-level exec function.
* This method accepts an input Tuple containing a Bag of one or more inner <b>Sketch Tuples</b>
* and returns a single updated <b>Sketch</b> as a <b>Sketch Tuple</b>.
*
* <p>If a large number of calls are anticipated, leveraging either the <i>Algebraic</i> or
* <i>Accumulator</i> interfaces is recommended. Pig normally handles this automatically.
*
* <p>Internally, this method presents the inner <b>Sketch Tuples</b> to a new <b>Union</b>.
* The result is returned as a <b>Sketch Tuple</b>
*
* <p>Types are in the form: Java data type: Pig DataType
*
* <p><b>Input Tuple</b>
* <ul>
* <li>Tuple: TUPLE (Must contain only one field)
* <ul>
* <li>index 0: DataBag: BAG (May contain 0 or more Inner Tuples)
* <ul>
* <li>index 0: Tuple: TUPLE <b>Sketch Tuple</b></li>
* <li>...</li>
* <li>index n-1: Tuple: TUPLE <b>Sketch Tuple</b></li>
* </ul>
* </li>
* </ul>
* </li>
* </ul>
*
* <b>Sketch Tuple</b>
* <ul>
* <li>Tuple: TUPLE (Contains exactly 1 field)
* <ul>
* <li>index 0: DataByteArray: BYTEARRAY = The serialization of a Sketch object.</li>
* </ul>
* </li>
* </ul>
*
* @param inputTuple A tuple containing a single bag, containing Sketch Tuples.
* @return Sketch Tuple. If inputTuple is null or empty, returns empty sketch.
* @see "org.apache.pig.EvalFunc.exec(org.apache.pig.data.Tuple)"
*/
// @formatter:on
// TOP LEVEL EXEC
@Override
public Tuple exec(final Tuple inputTuple) throws IOException {
// The exec is a stateless function. It operates on the input and returns a result.
if (inputTuple != null && inputTuple.size() > 0) {
final DoublesUnion union = unionBuilder_.build();
final DataBag bag = (DataBag) inputTuple.get(0);
updateUnion(bag, union);
final DoublesSketch resultSketch = union.getResultAndReset();
if (resultSketch != null) {
return tupleFactory_.newTuple(new DataByteArray(resultSketch.toByteArray(true)));
}
}
// return empty sketch
return tupleFactory_.newTuple(new DataByteArray(unionBuilder_.build().getResult().toByteArray(true)));
}
Aggregations