use of com.yahoo.sketches.ArrayOfStringsSerDe in project sketches-core by DataSketches.
the class ItemsSketchTest method checkToFromByteArray2.
private static void checkToFromByteArray2(int k, int n) {
ItemsSketch<String> is = buildStringIS(k, n);
byte[] byteArr;
Memory mem;
ItemsSketch<String> is2;
ArrayOfStringsSerDe serDe = new ArrayOfStringsSerDe();
//ordered
byteArr = is.toByteArray(true, serDe);
mem = Memory.wrap(byteArr);
is2 = ItemsSketch.getInstance(mem, Comparator.naturalOrder(), serDe);
for (double f = 0.1; f < 0.95; f += 0.1) {
assertEquals(is.getQuantile(f), is2.getQuantile(f));
}
//Not-ordered
byteArr = is.toByteArray(false, serDe);
mem = Memory.wrap(byteArr);
is2 = ItemsSketch.getInstance(mem, Comparator.naturalOrder(), serDe);
for (double f = 0.1; f < 0.95; f += 0.1) {
assertEquals(is.getQuantile(f), is2.getQuantile(f));
}
}
use of com.yahoo.sketches.ArrayOfStringsSerDe in project sketches-core by DataSketches.
the class ItemsSketchTest method empty.
@Test
public void empty() {
ItemsSketch<String> sketch = ItemsSketch.getInstance(128, Comparator.naturalOrder());
Assert.assertNotNull(sketch);
Assert.assertTrue(sketch.isEmpty());
Assert.assertEquals(sketch.getN(), 0);
Assert.assertEquals(sketch.getRetainedItems(), 0);
Assert.assertNull(sketch.getMinValue());
Assert.assertNull(sketch.getMaxValue());
Assert.assertNull(sketch.getQuantile(0.5));
Assert.assertNull(sketch.getQuantiles(2));
Assert.assertNull(sketch.getQuantiles(new double[] { 0.0, 1.0 }));
byte[] byteArr = sketch.toByteArray(new ArrayOfStringsSerDe());
Assert.assertEquals(byteArr.length, 8);
{
double[] pmf = sketch.getPMF(new String[0]);
Assert.assertEquals(pmf.length, 1);
Assert.assertEquals(pmf[0], Double.NaN);
}
{
double[] pmf = sketch.getPMF(new String[] { "a" });
Assert.assertEquals(pmf.length, 2);
Assert.assertEquals(pmf[0], Double.NaN);
Assert.assertEquals(pmf[1], Double.NaN);
}
{
double[] cdf = sketch.getCDF(new String[0]);
Assert.assertEquals(cdf.length, 1);
Assert.assertEquals(cdf[0], Double.NaN);
}
{
double[] cdf = sketch.getCDF(new String[] { "a" });
Assert.assertEquals(cdf.length, 2);
Assert.assertEquals(cdf[0], Double.NaN);
Assert.assertEquals(cdf[1], Double.NaN);
}
}
use of com.yahoo.sketches.ArrayOfStringsSerDe in project sketches-core by DataSketches.
the class ItemsSketchTest method serializeDeserializeUft8Strings.
@Test
public void serializeDeserializeUft8Strings() {
ItemsSketch<String> sketch1 = new ItemsSketch<String>(1 << LG_MIN_MAP_SIZE);
sketch1.update("aaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
sketch1.update("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb");
sketch1.update("ccccccccccccccccccccccccccccc");
sketch1.update("ddddddddddddddddddddddddddddd");
byte[] bytes = sketch1.toByteArray(new ArrayOfStringsSerDe());
ItemsSketch<String> sketch2 = ItemsSketch.getInstance(Memory.wrap(bytes), new ArrayOfStringsSerDe());
sketch2.update("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb");
sketch2.update("ccccccccccccccccccccccccccccc");
sketch2.update("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb");
Assert.assertFalse(sketch2.isEmpty());
Assert.assertEquals(sketch2.getNumActiveItems(), 4);
Assert.assertEquals(sketch2.getStreamLength(), 7);
Assert.assertEquals(sketch2.getEstimate("aaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), 1);
Assert.assertEquals(sketch2.getEstimate("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb"), 3);
Assert.assertEquals(sketch2.getEstimate("ccccccccccccccccccccccccccccc"), 2);
Assert.assertEquals(sketch2.getEstimate("ddddddddddddddddddddddddddddd"), 1);
}
use of com.yahoo.sketches.ArrayOfStringsSerDe in project sketches-pig by DataSketches.
the class GetKFromStringsSketch method exec.
@Override
public Integer exec(final Tuple input) throws IOException {
if (input.size() != 1) {
throw new IllegalArgumentException("expected one input");
}
if (!(input.get(0) instanceof DataByteArray)) {
throw new IllegalArgumentException("expected a DataByteArray as a sketch, got " + input.get(0).getClass().getSimpleName());
}
final DataByteArray dba = (DataByteArray) input.get(0);
final ItemsSketch<String> sketch = ItemsSketch.getInstance(Memory.wrap(dba.get()), Comparator.naturalOrder(), new ArrayOfStringsSerDe());
return sketch.getK();
}
use of com.yahoo.sketches.ArrayOfStringsSerDe in project sketches-pig by DataSketches.
the class GetPmfFromStringsSketch method exec.
@Override
public Tuple exec(final Tuple input) throws IOException {
if (input.size() < 2) {
throw new IllegalArgumentException("expected two or more inputs: sketch and list of split points");
}
if (!(input.get(0) instanceof DataByteArray)) {
throw new IllegalArgumentException("expected a DataByteArray as a sketch, got " + input.get(0).getClass().getSimpleName());
}
final DataByteArray dba = (DataByteArray) input.get(0);
final ItemsSketch<String> sketch = ItemsSketch.getInstance(Memory.wrap(dba.get()), Comparator.naturalOrder(), new ArrayOfStringsSerDe());
final String[] splitPoints = new String[input.size() - 1];
for (int i = 1; i < input.size(); i++) {
if (!(input.get(i) instanceof String)) {
throw new IllegalArgumentException("expected a string value as a split point, got " + input.get(i).getClass().getSimpleName());
}
splitPoints[i - 1] = (String) input.get(i);
}
final double[] pmf = sketch.getPMF(splitPoints);
if (pmf == null) {
return null;
}
return Util.doubleArrayToTuple(pmf);
}
Aggregations