use of org.apache.pig.backend.executionengine.ExecException in project sketches-pig by DataSketches.
the class VarOptCommonAlgebraicTest method unionSketchesExec.
@Test
public void unionSketchesExec() {
// Only difference between UnionSketchesAsTuple and UnionSketchesAsByteArray is that one wraps
// the resulting serialized sketch in a tuple. If the union result is still in exact mode, the
// two sketches should be identical.
final int numSketches = 3;
// numSketches * numItemsPerSketch should be < k here
final int numItemsPerSketch = 10;
final int k = 100;
final String kStr = Integer.toString(k);
final VarOptCommonImpl.UnionSketchesAsTuple udfTuple;
final VarOptCommonImpl.UnionSketchesAsByteArray udfBA;
udfTuple = new VarOptCommonImpl.UnionSketchesAsTuple(kStr);
udfBA = new VarOptCommonImpl.UnionSketchesAsByteArray(kStr);
char id = 'a';
double wt = 1.0;
final DataBag inputBag = BagFactory.getInstance().newDefaultBag();
final VarOptItemsUnion<Tuple> union = VarOptItemsUnion.newInstance(k);
final VarOptItemsSketch<Tuple> vis = VarOptItemsSketch.newInstance(k);
// compare against at the end.
try {
for (int j = 0; j < numSketches; ++j) {
vis.reset();
for (int i = 0; i < numItemsPerSketch; ++i) {
final Tuple t = TupleFactory.getInstance().newTuple(2);
t.set(0, Character.toString(id));
t.set(1, wt);
vis.update(t, wt);
++id;
wt += 1.0;
}
final Tuple wrapper = TupleFactory.getInstance().newTuple(1);
wrapper.set(0, new DataByteArray(vis.toByteArray(serDe_)));
inputBag.add(wrapper);
union.update(vis);
}
} catch (final ExecException e) {
fail("Unexpected ExecException creating input data");
}
try {
final Tuple inputTuple = TupleFactory.getInstance().newTuple(1);
inputTuple.set(0, inputBag);
final DataByteArray outArray = udfBA.exec(inputTuple);
final VarOptItemsSketch<Tuple> sketch1 = VarOptItemsSketch.heapify(Memory.wrap(outArray.get()), serDe_);
final Tuple outTuple = udfTuple.exec(inputTuple);
final DataByteArray dba = (DataByteArray) outTuple.get(0);
final VarOptItemsSketch<Tuple> sketch2 = VarOptItemsSketch.heapify(Memory.wrap(dba.get()), serDe_);
final VarOptItemsSketch<Tuple> expectedResult = union.getResult();
compareResults(sketch1, expectedResult);
compareResults(sketch2, expectedResult);
} catch (final IOException e) {
fail("Unexpected IOException calling exec()");
}
}
use of org.apache.pig.backend.executionengine.ExecException in project sketches-pig by DataSketches.
the class ReservoirUnion method accumulate.
// We could overload exec() for easy cases, but we still need to compare the incoming
// reservoir's k vs max k and possibly downsample.
@Override
public void accumulate(final Tuple inputTuple) throws IOException {
if (inputTuple == null || inputTuple.size() < 1 || inputTuple.isNull(0)) {
return;
}
final DataBag reservoirs = (DataBag) inputTuple.get(0);
if (union_ == null) {
union_ = ReservoirItemsUnion.newInstance(maxK_);
}
try {
for (Tuple t : reservoirs) {
// if t == null or t.size() < 3, we'll throw an exception
final long n = (long) t.get(0);
final int k = (int) t.get(1);
final DataBag sampleBag = (DataBag) t.get(2);
final ArrayList<Tuple> samples = ReservoirSampling.dataBagToArrayList(sampleBag);
union_.update(n, k, samples);
}
} catch (final IndexOutOfBoundsException e) {
throw new ExecException("Cannot update union with given reservoir", e);
}
}
use of org.apache.pig.backend.executionengine.ExecException in project sketches-pig by DataSketches.
the class ReservoirSampling method createResultTuple.
static Tuple createResultTuple(final long n, final int k, final DataBag samples) {
final Tuple output = TupleFactory.getInstance().newTuple(3);
try {
output.set(0, n);
output.set(1, k);
output.set(2, samples);
} catch (final ExecException e) {
throw new RuntimeException("Pig error: " + e.getMessage(), e);
}
return output;
}
use of org.apache.pig.backend.executionengine.ExecException in project pigeon by aseldawy.
the class Area method exec.
@Override
public Double exec(Tuple input) throws IOException {
OGCGeometry geom = null;
try {
Object v = input.get(0);
geom = geometryParser.parseGeom(v);
return geom.getEsriGeometry().calculateArea2D();
} catch (ExecException ee) {
throw new GeoException(geom, ee);
}
}
use of org.apache.pig.backend.executionengine.ExecException in project pigeon by aseldawy.
the class Boundary method exec.
@Override
public DataByteArray exec(Tuple input) throws IOException {
Geometry geom = null;
try {
Object v = input.get(0);
geom = GEOMETRY_PARSER.parseGeom(v);
Geometry boundary = geom.getBoundary();
return new DataByteArray(WKB_WRITER.write(boundary));
} catch (ExecException e) {
throw new GeoException(geom, e);
}
}
Aggregations