Search in sources :

Example 6 with ArrayOfStringsSerDe

use of com.yahoo.sketches.ArrayOfStringsSerDe in project sketches-core by DataSketches.

the class ItemsSketchTest method checkToFromByteArray2.

private static void checkToFromByteArray2(int k, int n) {
    ItemsSketch<String> is = buildStringIS(k, n);
    byte[] byteArr;
    Memory mem;
    ItemsSketch<String> is2;
    ArrayOfStringsSerDe serDe = new ArrayOfStringsSerDe();
    //ordered
    byteArr = is.toByteArray(true, serDe);
    mem = Memory.wrap(byteArr);
    is2 = ItemsSketch.getInstance(mem, Comparator.naturalOrder(), serDe);
    for (double f = 0.1; f < 0.95; f += 0.1) {
        assertEquals(is.getQuantile(f), is2.getQuantile(f));
    }
    //Not-ordered
    byteArr = is.toByteArray(false, serDe);
    mem = Memory.wrap(byteArr);
    is2 = ItemsSketch.getInstance(mem, Comparator.naturalOrder(), serDe);
    for (double f = 0.1; f < 0.95; f += 0.1) {
        assertEquals(is.getQuantile(f), is2.getQuantile(f));
    }
}
Also used : ArrayOfStringsSerDe(com.yahoo.sketches.ArrayOfStringsSerDe) Memory(com.yahoo.memory.Memory) WritableMemory(com.yahoo.memory.WritableMemory)

Example 7 with ArrayOfStringsSerDe

use of com.yahoo.sketches.ArrayOfStringsSerDe in project sketches-core by DataSketches.

the class ItemsSketchTest method empty.

@Test
public void empty() {
    ItemsSketch<String> sketch = ItemsSketch.getInstance(128, Comparator.naturalOrder());
    Assert.assertNotNull(sketch);
    Assert.assertTrue(sketch.isEmpty());
    Assert.assertEquals(sketch.getN(), 0);
    Assert.assertEquals(sketch.getRetainedItems(), 0);
    Assert.assertNull(sketch.getMinValue());
    Assert.assertNull(sketch.getMaxValue());
    Assert.assertNull(sketch.getQuantile(0.5));
    Assert.assertNull(sketch.getQuantiles(2));
    Assert.assertNull(sketch.getQuantiles(new double[] { 0.0, 1.0 }));
    byte[] byteArr = sketch.toByteArray(new ArrayOfStringsSerDe());
    Assert.assertEquals(byteArr.length, 8);
    {
        double[] pmf = sketch.getPMF(new String[0]);
        Assert.assertEquals(pmf.length, 1);
        Assert.assertEquals(pmf[0], Double.NaN);
    }
    {
        double[] pmf = sketch.getPMF(new String[] { "a" });
        Assert.assertEquals(pmf.length, 2);
        Assert.assertEquals(pmf[0], Double.NaN);
        Assert.assertEquals(pmf[1], Double.NaN);
    }
    {
        double[] cdf = sketch.getCDF(new String[0]);
        Assert.assertEquals(cdf.length, 1);
        Assert.assertEquals(cdf[0], Double.NaN);
    }
    {
        double[] cdf = sketch.getCDF(new String[] { "a" });
        Assert.assertEquals(cdf.length, 2);
        Assert.assertEquals(cdf[0], Double.NaN);
        Assert.assertEquals(cdf[1], Double.NaN);
    }
}
Also used : ArrayOfStringsSerDe(com.yahoo.sketches.ArrayOfStringsSerDe) Test(org.testng.annotations.Test)

Example 8 with ArrayOfStringsSerDe

use of com.yahoo.sketches.ArrayOfStringsSerDe in project sketches-core by DataSketches.

the class ItemsSketchTest method serializeDeserializeUft8Strings.

@Test
public void serializeDeserializeUft8Strings() {
    ItemsSketch<String> sketch1 = new ItemsSketch<String>(1 << LG_MIN_MAP_SIZE);
    sketch1.update("aaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
    sketch1.update("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb");
    sketch1.update("ccccccccccccccccccccccccccccc");
    sketch1.update("ddddddddddddddddddddddddddddd");
    byte[] bytes = sketch1.toByteArray(new ArrayOfStringsSerDe());
    ItemsSketch<String> sketch2 = ItemsSketch.getInstance(Memory.wrap(bytes), new ArrayOfStringsSerDe());
    sketch2.update("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb");
    sketch2.update("ccccccccccccccccccccccccccccc");
    sketch2.update("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb");
    Assert.assertFalse(sketch2.isEmpty());
    Assert.assertEquals(sketch2.getNumActiveItems(), 4);
    Assert.assertEquals(sketch2.getStreamLength(), 7);
    Assert.assertEquals(sketch2.getEstimate("aaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), 1);
    Assert.assertEquals(sketch2.getEstimate("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb"), 3);
    Assert.assertEquals(sketch2.getEstimate("ccccccccccccccccccccccccccccc"), 2);
    Assert.assertEquals(sketch2.getEstimate("ddddddddddddddddddddddddddddd"), 1);
}
Also used : ArrayOfStringsSerDe(com.yahoo.sketches.ArrayOfStringsSerDe) Test(org.testng.annotations.Test)

Example 9 with ArrayOfStringsSerDe

use of com.yahoo.sketches.ArrayOfStringsSerDe in project sketches-pig by DataSketches.

the class GetKFromStringsSketch method exec.

@Override
public Integer exec(final Tuple input) throws IOException {
    if (input.size() != 1) {
        throw new IllegalArgumentException("expected one input");
    }
    if (!(input.get(0) instanceof DataByteArray)) {
        throw new IllegalArgumentException("expected a DataByteArray as a sketch, got " + input.get(0).getClass().getSimpleName());
    }
    final DataByteArray dba = (DataByteArray) input.get(0);
    final ItemsSketch<String> sketch = ItemsSketch.getInstance(Memory.wrap(dba.get()), Comparator.naturalOrder(), new ArrayOfStringsSerDe());
    return sketch.getK();
}
Also used : ArrayOfStringsSerDe(com.yahoo.sketches.ArrayOfStringsSerDe) DataByteArray(org.apache.pig.data.DataByteArray)

Example 10 with ArrayOfStringsSerDe

use of com.yahoo.sketches.ArrayOfStringsSerDe in project sketches-pig by DataSketches.

the class GetPmfFromStringsSketch method exec.

@Override
public Tuple exec(final Tuple input) throws IOException {
    if (input.size() < 2) {
        throw new IllegalArgumentException("expected two or more inputs: sketch and list of split points");
    }
    if (!(input.get(0) instanceof DataByteArray)) {
        throw new IllegalArgumentException("expected a DataByteArray as a sketch, got " + input.get(0).getClass().getSimpleName());
    }
    final DataByteArray dba = (DataByteArray) input.get(0);
    final ItemsSketch<String> sketch = ItemsSketch.getInstance(Memory.wrap(dba.get()), Comparator.naturalOrder(), new ArrayOfStringsSerDe());
    final String[] splitPoints = new String[input.size() - 1];
    for (int i = 1; i < input.size(); i++) {
        if (!(input.get(i) instanceof String)) {
            throw new IllegalArgumentException("expected a string value as a split point, got " + input.get(i).getClass().getSimpleName());
        }
        splitPoints[i - 1] = (String) input.get(i);
    }
    final double[] pmf = sketch.getPMF(splitPoints);
    if (pmf == null) {
        return null;
    }
    return Util.doubleArrayToTuple(pmf);
}
Also used : ArrayOfStringsSerDe(com.yahoo.sketches.ArrayOfStringsSerDe) DataByteArray(org.apache.pig.data.DataByteArray)

Aggregations

ArrayOfStringsSerDe (com.yahoo.sketches.ArrayOfStringsSerDe)43 Test (org.testng.annotations.Test)37 DataByteArray (org.apache.pig.data.DataByteArray)23 Tuple (org.apache.pig.data.Tuple)19 WritableMemory (com.yahoo.memory.WritableMemory)15 DataBag (org.apache.pig.data.DataBag)12 Memory (com.yahoo.memory.Memory)10 ItemsSketch (com.yahoo.sketches.frequencies.ItemsSketch)9 SketchesArgumentException (com.yahoo.sketches.SketchesArgumentException)1