use of com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder in project sketches-pig by DataSketches.
the class ArrayOfDoublesSketchToVariancesTest method emptyInputSketch.
@Test
public void emptyInputSketch() throws Exception {
EvalFunc<Tuple> func = new ArrayOfDoublesSketchToVariances();
ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build();
Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.compact().toByteArray()));
Tuple resultTuple = func.exec(inputTuple);
Assert.assertNull(resultTuple);
}
use of com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder in project sketches-pig by DataSketches.
the class ArrayOfDoublesSketchToVariancesTest method oneEntryInputSketch.
@Test
public void oneEntryInputSketch() throws Exception {
EvalFunc<Tuple> func = new ArrayOfDoublesSketchToVariances();
ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build();
sketch.update(1, new double[] { 1 });
Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.compact().toByteArray()));
Tuple resultTuple = func.exec(inputTuple);
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 1);
Assert.assertEquals(resultTuple.get(0), 0.0);
}
use of com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder in project sketches-pig by DataSketches.
the class ArrayOfDoublesSketchToVariancesTest method manyEntriesTwoValuesInputSketch.
@Test
public void manyEntriesTwoValuesInputSketch() throws Exception {
EvalFunc<Tuple> func = new ArrayOfDoublesSketchToVariances();
ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build();
Random rand = new Random(0);
// to saturate the sketch with default number of nominal entries (4K)
int numKeys = 10000;
for (int i = 0; i < numKeys; i++) {
// two random values normally distributed with standard deviations of 1 and 10
sketch.update(i, new double[] { rand.nextGaussian(), rand.nextGaussian() * 10.0 });
}
Assert.assertTrue(sketch.getRetainedEntries() >= 4096);
Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.compact().toByteArray()));
Tuple resultTuple = func.exec(inputTuple);
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 2);
Assert.assertEquals((double) resultTuple.get(0), 1.0, 0.04);
// squared standard deviation within 4%
Assert.assertEquals((double) resultTuple.get(1), 100.0, 100.0 * 0.04);
}
use of com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder in project sketches-pig by DataSketches.
the class ArrayOfDoublesSketchesToPValueEstimatesTest method twoMetrics.
/**
* Check p-value for two metrics at the same time.
* @throws Exception
*/
@Test
public void twoMetrics() throws Exception {
EvalFunc<Tuple> func = new ArrayOfDoublesSketchesToPValueEstimates();
// Create the two sketches
ArrayOfDoublesUpdatableSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).setNominalEntries(128).build();
ArrayOfDoublesUpdatableSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).setNominalEntries(128).build();
// Sample dataset (smoker/non-smoker brain size)
double[] groupA = { 7.3, 6.5, 5.2, 6.3, 7.0, 5.9, 5.2, 5.0, 4.7, 5.7, 5.7, 3.3, 5.0, 4.6, 4.8, 3.8, 4.6 };
double[] groupB = { 4.2, 4.0, 2.6, 4.9, 4.4, 4.4, 5.5, 5.1, 5.1, 3.2, 3.9, 3.2, 4.9, 4.3, 4.8, 2.4, 5.5, 5.5, 3.7 };
// Add values to A sketch
for (int i = 0; i < groupA.length; i++) {
sketchA.update(i, new double[] { groupA[i], i });
}
// Add values to B sketch
for (int i = 0; i < groupB.length; i++) {
sketchB.update(i, new double[] { groupB[i], i });
}
// Convert to a tuple and execute the UDF
Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketchA.compact().toByteArray()), new DataByteArray(sketchB.compact().toByteArray()));
Tuple resultTuple = func.exec(inputTuple);
// Should get 2 p-values back
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 2);
// Check expected p-value values, with a delta
Assert.assertEquals((double) resultTuple.get(0), 0.0043, 0.0001);
Assert.assertEquals((double) resultTuple.get(1), 0.58, 0.01);
}
use of com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder in project sketches-pig by DataSketches.
the class ArrayOfDoublesSketchesToPValueEstimatesTest method largeDataSet.
/**
* Check p-value for a large data set.
* @throws Exception
*/
@Test
public void largeDataSet() throws Exception {
EvalFunc<Tuple> func = new ArrayOfDoublesSketchesToPValueEstimates();
// Create the two sketches
ArrayOfDoublesUpdatableSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(1).setNominalEntries(16000).build();
ArrayOfDoublesUpdatableSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(1).setNominalEntries(16000).build();
// Number of values to use.
int n = 100000;
int bShift = 1000;
double[] a = new double[n];
double[] b = new double[n];
// Random number generator
Random rand = new Random(41L);
// Add values to A sketch
for (int i = 0; i < n; i++) {
double val = rand.nextGaussian();
sketchA.update(i, new double[] { val });
a[i] = val;
}
// Add values to B sketch
for (int i = 0; i < n; i++) {
double val = rand.nextGaussian() + bShift;
sketchB.update(i, new double[] { val });
b[i] = val;
}
TTest tTest = new TTest();
double expectedPValue = tTest.tTest(a, b);
// Convert to a tuple and execute the UDF
Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketchA.compact().toByteArray()), new DataByteArray(sketchB.compact().toByteArray()));
Tuple resultTuple = func.exec(inputTuple);
// Should get 1 p-value back
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 1);
// Check p-value values, with a delta
Assert.assertEquals((double) resultTuple.get(0), expectedPValue, 0.01);
}
Aggregations