use of com.yahoo.sketches.tuple.ArrayOfDoublesSketch in project sketches-pig by DataSketches.
the class ArrayOfDoublesSketchToQuantilesSketch method exec.
@Override
public DataByteArray exec(final Tuple input) throws IOException {
if ((input == null) || (input.size() == 0)) {
return null;
}
final DataByteArray dba = (DataByteArray) input.get(0);
final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get()));
int column = 1;
if (input.size() > 1) {
column = (int) input.get(1);
if ((column < 1) || (column > sketch.getNumValues())) {
throw new IllegalArgumentException("Column number out of range. The given sketch has " + sketch.getNumValues() + " columns");
}
}
final DoublesSketchBuilder builder = DoublesSketch.builder();
if (k > 0) {
builder.setK(k);
}
final UpdateDoublesSketch qs = builder.build();
final ArrayOfDoublesSketchIterator it = sketch.iterator();
while (it.next()) {
qs.update(it.getValues()[column - 1]);
}
return new DataByteArray(qs.compact().toByteArray());
}
use of com.yahoo.sketches.tuple.ArrayOfDoublesSketch in project sketches-pig by DataSketches.
the class ArrayOfDoublesSketchesToPValueEstimates method exec.
@Override
public Tuple exec(final Tuple input) throws IOException {
if ((input == null) || (input.size() != 2)) {
return null;
}
// Get the two sketches
final DataByteArray dbaA = (DataByteArray) input.get(0);
final DataByteArray dbaB = (DataByteArray) input.get(1);
final ArrayOfDoublesSketch sketchA = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dbaA.get()));
final ArrayOfDoublesSketch sketchB = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dbaB.get()));
// Check that the size of the arrays in the sketches are the same
if (sketchA.getNumValues() != sketchB.getNumValues()) {
throw new IllegalArgumentException("Both sketches must have the same number of values");
}
// Store the number of metrics
final int numMetrics = sketchA.getNumValues();
// If the sketches contain fewer than 2 values, the p-value can't be calculated
if (sketchA.getRetainedEntries() < 2 || sketchB.getRetainedEntries() < 2) {
return null;
}
// Get the statistical summary from each sketch
final SummaryStatistics[] summariesA = ArrayOfDoublesSketchStats.sketchToSummaryStatistics(sketchA);
final SummaryStatistics[] summariesB = ArrayOfDoublesSketchStats.sketchToSummaryStatistics(sketchB);
// Calculate the p-values
final TTest tTest = new TTest();
final Tuple pValues = TupleFactory.getInstance().newTuple(numMetrics);
for (int i = 0; i < numMetrics; i++) {
// Pass the sampled values for each metric
pValues.set(i, tTest.tTest(summariesA[i], summariesB[i]));
}
return pValues;
}
use of com.yahoo.sketches.tuple.ArrayOfDoublesSketch in project sketches-pig by DataSketches.
the class DataToArrayOfDoublesSketchTest method accumulator.
@Test
public void accumulator() throws Exception {
Accumulator<Tuple> func = new DataToArrayOfDoublesSketch("32", "1");
Tuple inputTuple = TupleFactory.getInstance().newTuple(1);
DataBag bag = BagFactory.getInstance().newDefaultBag();
bag.add(PigUtil.objectsToTuple("a", 1.0));
inputTuple.set(0, bag);
func.accumulate(inputTuple);
inputTuple = TupleFactory.getInstance().newTuple(1);
bag = BagFactory.getInstance().newDefaultBag();
bag.add(PigUtil.objectsToTuple("b", 1.0));
bag.add(PigUtil.objectsToTuple("a", 2.0));
bag.add(PigUtil.objectsToTuple("b", 2.0));
inputTuple.set(0, bag);
func.accumulate(inputTuple);
Tuple resultTuple = func.getValue();
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 1);
DataByteArray bytes = (DataByteArray) resultTuple.get(0);
Assert.assertTrue(bytes.size() > 0);
ArrayOfDoublesSketch sketch = ArrayOfDoublesSketches.heapifySketch(Memory.wrap(bytes.get()));
Assert.assertEquals(sketch.getEstimate(), 2.0, 0.0);
for (double[] values : sketch.getValues()) {
Assert.assertEquals(values[0], 3.0);
}
// after cleanup, the value should always be 0
func.cleanup();
resultTuple = func.getValue();
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 1);
bytes = (DataByteArray) resultTuple.get(0);
Assert.assertTrue(bytes.size() > 0);
ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketches.heapifySketch(Memory.wrap(bytes.get()));
Assert.assertEquals(sketch2.getEstimate(), 0.0, 0.0);
}
use of com.yahoo.sketches.tuple.ArrayOfDoublesSketch in project sketches-pig by DataSketches.
the class DataToArrayOfDoublesSketchTest method algebraicIntermediateFinalWithSampling.
@Test
public void algebraicIntermediateFinalWithSampling() throws Exception {
EvalFunc<Tuple> func = new DataToArrayOfDoublesSketch.IntermediateFinal("1024", "0.5", "1");
DataBag bag = BagFactory.getInstance().newDefaultBag();
int uniques = 10000;
for (int i = 0; i < uniques; i++) bag.add(PigUtil.objectsToTuple(i, 1.0));
Tuple resultTuple = func.exec(PigUtil.objectsToTuple(PigUtil.tuplesToBag(PigUtil.objectsToTuple(bag))));
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 1);
DataByteArray bytes = (DataByteArray) resultTuple.get(0);
Assert.assertTrue(bytes.size() > 0);
ArrayOfDoublesSketch sketch = ArrayOfDoublesSketches.heapifySketch(Memory.wrap(bytes.get()));
Assert.assertEquals(sketch.getEstimate(), uniques, uniques * 0.01);
}
use of com.yahoo.sketches.tuple.ArrayOfDoublesSketch in project sketches-pig by DataSketches.
the class UnionArrayOfDoublesSketchTest method accumulatorNullSketch.
@Test
public void accumulatorNullSketch() throws Exception {
Accumulator<Tuple> func = new UnionArrayOfDoublesSketch();
func.accumulate(PigUtil.objectsToTuple(PigUtil.tuplesToBag(PigUtil.objectsToTuple((Object) null))));
Tuple resultTuple = func.getValue();
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 1);
DataByteArray bytes = (DataByteArray) resultTuple.get(0);
Assert.assertTrue(bytes.size() > 0);
ArrayOfDoublesSketch sketch = ArrayOfDoublesSketches.heapifySketch(Memory.wrap(bytes.get()));
Assert.assertEquals(sketch.getEstimate(), 0.0);
}
Aggregations