use of org.apache.datasketches.sampling.VarOptItemsSamples in project sketches-pig by DataSketches.
the class VarOptCommonImpl method createDataBagFromSketch.
// Produces a DataBag containing the samples from the input sketch
static DataBag createDataBagFromSketch(final VarOptItemsSketch<Tuple> sketch) {
final DataBag output = BAG_FACTORY.newDefaultBag();
final VarOptItemsSamples<Tuple> samples = sketch.getSketchSamples();
try {
// create (weight, item) tuples to add to output bag
for (final VarOptItemsSamples<Tuple>.WeightedSample ws : samples) {
final Tuple weightedSample = TUPLE_FACTORY.newTuple(2);
weightedSample.set(0, ws.getWeight());
weightedSample.set(1, ws.getItem());
output.add(weightedSample);
}
} catch (final ExecException e) {
throw new RuntimeException("Pig error: " + e.getMessage(), e);
}
return output;
}
use of org.apache.datasketches.sampling.VarOptItemsSamples in project sketches-pig by DataSketches.
the class VarOptCommonAlgebraicTest method rawTuplesToSketchTupleExec.
// exec: sketches generally in sampling mode
@SuppressWarnings("unused")
@Test
public void rawTuplesToSketchTupleExec() {
final int k = 5;
final int wtIdx = 1;
final VarOptCommonImpl.RawTuplesToSketchTuple udf;
udf = new VarOptCommonImpl.RawTuplesToSketchTuple(Integer.toString(k), Integer.toString(wtIdx));
char id = 'a';
double wt = 1.0;
final DataBag inputBag = BagFactory.getInstance().newDefaultBag();
try {
for (int i = 0; i < k + 1; ++i) {
final Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, Character.toString(id));
t.set(1, wt);
inputBag.add(t);
++id;
wt += 1.0;
}
} catch (final ExecException e) {
fail("Unexpected ExecException creating input data");
}
try {
// degenerate input first
Tuple result = udf.exec(null);
assertNull(result);
Tuple inputTuple = TupleFactory.getInstance().newTuple(0);
result = udf.exec(inputTuple);
assertNull(result);
inputTuple = TupleFactory.getInstance().newTuple(1);
inputTuple.set(0, null);
result = udf.exec(inputTuple);
assertNull(result);
// now test real input
inputTuple.set(0, inputBag);
result = udf.exec(inputTuple);
assertEquals(result.size(), 1);
final DataByteArray dba = (DataByteArray) result.get(0);
final VarOptItemsSketch<Tuple> vis;
vis = VarOptItemsSketch.heapify(Memory.wrap(dba.get()), serDe_);
assertEquals(vis.getN(), k + 1);
assertEquals(vis.getK(), k);
// just validating the original weights are within the expected range
for (VarOptItemsSamples<Tuple>.WeightedSample ws : vis.getSketchSamples()) {
final Tuple t = ws.getItem();
assertTrue((double) t.get(wtIdx) >= 1.0);
assertTrue((double) t.get(wtIdx) <= k + 1.0);
}
} catch (final IOException e) {
fail("Unexpected IOException calling exec()");
}
}
Aggregations