use of com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder in project sketches-pig by DataSketches.
the class ArrayOfDoublesSketchToNumberOfRetainedEntriesTest method emptyInputSketch.
@Test
public void emptyInputSketch() throws Exception {
EvalFunc<Integer> func = new ArrayOfDoublesSketchToNumberOfRetainedEntries();
ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build();
Integer result = func.exec(tupleFactory.newTuple(new DataByteArray(sketch.compact().toByteArray())));
Assert.assertNotNull(result);
Assert.assertEquals((int) result, 0);
}
use of com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder in project sketches-pig by DataSketches.
the class DataToArrayOfDoublesSketchBase method exec.
@Override
public Tuple exec(final Tuple inputTuple) throws IOException {
if (isFirstCall_) {
// this is to see in the log which way was used by Pig
Logger.getLogger(getClass()).info("exec is used");
isFirstCall_ = false;
}
if ((inputTuple == null) || (inputTuple.size() == 0)) {
return null;
}
if (inputTuple.size() != 1) {
throw new IllegalArgumentException("Input tuple must have 1 bag");
}
final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(sketchSize_).setSamplingProbability(samplingProbability_).setNumberOfValues(numValues_).build();
final DataBag bag = (DataBag) inputTuple.get(0);
updateSketch(bag, sketch, numValues_);
return Util.tupleFactory.newTuple(new DataByteArray(sketch.compact().toByteArray()));
}
use of com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder in project sketches-pig by DataSketches.
the class ArrayOfDoublesSketchesToPValueEstimatesTest method smokerDatasetSingleMetric.
/**
* Check p-value for the smoker data set. Single metric.
* @throws Exception
*/
@Test
public void smokerDatasetSingleMetric() throws Exception {
EvalFunc<Tuple> func = new ArrayOfDoublesSketchesToPValueEstimates();
// Create the two sketches
ArrayOfDoublesUpdatableSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(1).setNominalEntries(1).build();
ArrayOfDoublesUpdatableSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(1).setNominalEntries(1).build();
// Sample dataset (smoker/non-smoker brain size)
double[] groupA = { 7.3, 6.5, 5.2, 6.3, 7.0, 5.9, 5.2, 5.0, 4.7, 5.7, 5.7, 3.3, 5.0, 4.6, 4.8, 3.8, 4.6 };
double[] groupB = { 4.2, 4.0, 2.6, 4.9, 4.4, 4.4, 5.5, 5.1, 5.1, 3.2, 3.9, 3.2, 4.9, 4.3, 4.8, 2.4, 5.5, 5.5, 3.7 };
// Add values to A sketch
for (int i = 0; i < groupA.length; i++) {
sketchA.update(i, new double[] { groupA[i] });
}
// Add values to B sketch
for (int i = 0; i < groupB.length; i++) {
sketchB.update(i, new double[] { groupB[i] });
}
// Convert to a tuple and execute the UDF
Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketchA.compact().toByteArray()), new DataByteArray(sketchB.compact().toByteArray()));
Tuple resultTuple = func.exec(inputTuple);
// Should get 1 p-value back
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 1);
// Check p-value values, with a delta
Assert.assertEquals((double) resultTuple.get(0), 0.0043, 0.0001);
}
use of com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder in project sketches-pig by DataSketches.
the class ArrayOfDoublesSketchesToPValueEstimatesTest method twoEmptySketches.
/**
* Check input of two empty sketches.
* @throws Exception
*/
@Test
public void twoEmptySketches() throws Exception {
EvalFunc<Tuple> func = new ArrayOfDoublesSketchesToPValueEstimates();
ArrayOfDoublesUpdatableSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().build();
ArrayOfDoublesUpdatableSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().build();
Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketchA.compact().toByteArray()), new DataByteArray(sketchB.compact().toByteArray()));
Tuple resultTuple = func.exec(inputTuple);
Assert.assertNull(resultTuple);
}
use of com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder in project sketches-pig by DataSketches.
the class ArrayOfDoublesSketchesToPValueEstimatesTest method sketchWithSingleValue.
/**
* Check with sketch having only one input.
* @throws Exception
*/
@Test
public void sketchWithSingleValue() throws Exception {
EvalFunc<Tuple> func = new ArrayOfDoublesSketchesToPValueEstimates();
// Create the two sketches
ArrayOfDoublesUpdatableSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(1).setNominalEntries(128).build();
ArrayOfDoublesUpdatableSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(1).setNominalEntries(128).build();
// Sample dataset
double[] groupA = { 7.3, 6.5, 5.2, 6.3, 7.0, 5.9, 5.2, 5.0, 4.7, 5.7, 5.7, 3.3, 5.0, 4.6, 4.8, 3.8, 4.6 };
double[] groupB = { 5.0 };
// Add values to A sketch
for (int i = 0; i < groupA.length; i++) {
sketchA.update(i, new double[] { groupA[i] });
}
// Add values to B sketch
for (int i = 0; i < groupB.length; i++) {
sketchB.update(i, new double[] { groupB[i] });
}
// Convert to a tuple and execute the UDF
Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketchA.compact().toByteArray()), new DataByteArray(sketchB.compact().toByteArray()));
Tuple resultTuple = func.exec(inputTuple);
// Should get null back, as one of the sketches had fewer than 2 items
Assert.assertNull(resultTuple);
}
Aggregations