use of com.yahoo.sketches.hll.Union in project Gaffer by gchq.
the class HllUnionKryoSerializerTest method getTestObject.
@Override
public Union getTestObject() {
final Union union = new Union(15);
union.update("A");
union.update("B");
union.update("C");
return union;
}
use of com.yahoo.sketches.hll.Union in project sketches-pig by DataSketches.
the class AlgebraicFinal method exec.
@Override
public DataByteArray exec(final Tuple inputTuple) throws IOException {
if (isFirstCall_) {
Logger.getLogger(getClass()).info("Algebraic was used");
isFirstCall_ = false;
}
if (inputTuple == null || inputTuple.size() == 0) {
return getEmptySketch();
}
final Union union = new Union(lgK_);
final DataBag outerBag = (DataBag) inputTuple.get(0);
if (outerBag == null) {
return getEmptySketch();
}
for (final Tuple dataTuple : outerBag) {
// inputTuple.bag0.dataTupleN.f0
final Object f0 = dataTuple.get(0);
if (f0 == null) {
continue;
}
if (f0 instanceof DataBag) {
// inputTuple.bag0.dataTupleN.f0:bag
final DataBag innerBag = (DataBag) f0;
if (innerBag.size() == 0) {
continue;
}
// If field 0 of a dataTuple is a Bag, all innerTuples of this inner bag
// will be passed into the union.
// It is due to system bagged outputs from multiple mapper Initial functions.
// The Intermediate stage was bypassed.
updateUnion(innerBag, union);
} else if (f0 instanceof DataByteArray) {
// inputTuple.bag0.dataTupleN.f0:DBA
// If field 0 of a dataTuple is a DataByteArray, we assume it is a sketch
// due to system bagged outputs from multiple mapper Intermediate functions.
// Each dataTuple.DBA:sketch will merged into the union.
final DataByteArray dba = (DataByteArray) f0;
union.update(HllSketch.wrap(Memory.wrap(dba.get())));
} else {
// we should never get here
throw new IllegalArgumentException("dataTuple.Field0 is not a DataBag or DataByteArray: " + f0.getClass().getName());
}
}
return new DataByteArray(union.getResult(tgtHllType_).toCompactByteArray());
}
use of com.yahoo.sketches.hll.Union in project sketches-pig by DataSketches.
the class DataToSketch method exec.
/**
* Top-level exec function.
* This method accepts an input Tuple containing a Bag of one or more inner <b>Datum Tuples</b>
* and returns a single serialized HllSketch as a DataByteArray.
*
* <b>Datum Tuple</b> is a Tuple containing a single field, which can be one of the following
* (Java type: Pig type):
* <ul>
* <li>Byte: BYTE</li>
* <li>Integer: INTEGER</li>
* <li>Long: LONG</li>
* <li>Float: FLOAT</li>
* <li>Double: DOUBLE</li>
* <li>String: CHARARRAY</li>
* <li>DataByteArray: BYTEARRAY</li>
* </ul>
*
* @param inputTuple A tuple containing a single bag, containing Datum Tuples.
* @return serialized HllSketch
* @see "org.apache.pig.EvalFunc.exec(org.apache.pig.data.Tuple)"
* @throws IOException from Pig.
*/
@Override
public DataByteArray exec(final Tuple inputTuple) throws IOException {
if (isFirstCall_) {
Logger.getLogger(getClass()).info("Exec was used");
isFirstCall_ = false;
}
if (inputTuple == null || inputTuple.size() == 0) {
if (emptySketch_ == null) {
emptySketch_ = new DataByteArray(new HllSketch(lgK_, tgtHllType_).toCompactByteArray());
}
return emptySketch_;
}
final Union union = new Union(lgK_);
final DataBag bag = (DataBag) inputTuple.get(0);
updateUnion(bag, union);
return new DataByteArray(union.getResult(tgtHllType_).toCompactByteArray());
}
use of com.yahoo.sketches.hll.Union in project Gaffer by gchq.
the class HllSketchAggregator method _apply.
@Override
protected HllSketch _apply(final HllSketch a, final HllSketch b) {
final Union union = new Union(a.getLgConfigK());
union.update(a);
union.update(b);
return union.getResult();
}
use of com.yahoo.sketches.hll.Union in project Gaffer by gchq.
the class HllUnionAggregatorTest method testAggregate.
@Test
public void testAggregate() {
final HllUnionAggregator sketchAggregator = new HllUnionAggregator();
Union currentSketch = new Union(15);
currentSketch.update("A");
currentSketch.update("B");
assertEquals(2.0D, currentSketch.getEstimate(), DELTA);
Union newSketch = new Union(15);
newSketch.update("C");
newSketch.update("D");
currentSketch = sketchAggregator.apply(currentSketch, newSketch);
assertEquals(4.0D, currentSketch.getEstimate(), DELTA);
}
Aggregations