Search in sources :

Example 16 with ArrayOfStringsSerDe

use of com.yahoo.sketches.ArrayOfStringsSerDe in project sketches-pig by DataSketches.

the class FrequentStringsSketchToEstimatesTest method exact.

@Test
public void exact() throws Exception {
    EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates();
    ItemsSketch<String> sketch = new ItemsSketch<String>(8);
    sketch.update("a");
    sketch.update("a");
    sketch.update("b");
    Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe())));
    DataBag bag = func.exec(inputTuple);
    Assert.assertNotNull(bag);
    Assert.assertEquals(bag.size(), 2);
    Iterator<Tuple> it = bag.iterator();
    Tuple tuple1 = it.next();
    Assert.assertEquals(tuple1.size(), 4);
    Assert.assertEquals((String) tuple1.get(0), "a");
    Assert.assertEquals((long) tuple1.get(1), 2L);
    Assert.assertEquals((long) tuple1.get(2), 2L);
    Assert.assertEquals((long) tuple1.get(3), 2L);
    Tuple tuple2 = it.next();
    Assert.assertEquals(tuple2.size(), 4);
    Assert.assertEquals((String) tuple2.get(0), "b");
    Assert.assertEquals((long) tuple2.get(1), 1L);
    Assert.assertEquals((long) tuple2.get(2), 1L);
    Assert.assertEquals((long) tuple2.get(3), 1L);
}
Also used : ArrayOfStringsSerDe(com.yahoo.sketches.ArrayOfStringsSerDe) DataBag(org.apache.pig.data.DataBag) ItemsSketch(com.yahoo.sketches.frequencies.ItemsSketch) DataByteArray(org.apache.pig.data.DataByteArray) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 17 with ArrayOfStringsSerDe

use of com.yahoo.sketches.ArrayOfStringsSerDe in project sketches-pig by DataSketches.

the class UnionFrequentStringsSketchTest method accumulatorNullSketch.

@Test
public void accumulatorNullSketch() throws Exception {
    Accumulator<Tuple> func = new UnionFrequentStringsSketch("8");
    func.accumulate(PigUtil.objectsToTuple(PigUtil.tuplesToBag(PigUtil.objectsToTuple((Object) null))));
    Tuple resultTuple = func.getValue();
    Assert.assertNotNull(resultTuple);
    Assert.assertEquals(resultTuple.size(), 1);
    DataByteArray bytes = (DataByteArray) resultTuple.get(0);
    Assert.assertTrue(bytes.size() > 0);
    ItemsSketch<String> sketch = ItemsSketch.getInstance(Memory.wrap(bytes.get()), new ArrayOfStringsSerDe());
    Assert.assertTrue(sketch.isEmpty());
    Assert.assertEquals(sketch.getNumActiveItems(), 0);
}
Also used : ArrayOfStringsSerDe(com.yahoo.sketches.ArrayOfStringsSerDe) DataByteArray(org.apache.pig.data.DataByteArray) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 18 with ArrayOfStringsSerDe

use of com.yahoo.sketches.ArrayOfStringsSerDe in project sketches-pig by DataSketches.

the class UnionFrequentStringsSketchTest method accumulatorNotABag.

@Test
public void accumulatorNotABag() throws Exception {
    Accumulator<Tuple> func = new UnionFrequentStringsSketch("8");
    func.accumulate(PigUtil.objectsToTuple((Object) null));
    Tuple resultTuple = func.getValue();
    Assert.assertNotNull(resultTuple);
    Assert.assertEquals(resultTuple.size(), 1);
    DataByteArray bytes = (DataByteArray) resultTuple.get(0);
    Assert.assertTrue(bytes.size() > 0);
    ItemsSketch<String> sketch = ItemsSketch.getInstance(Memory.wrap(bytes.get()), new ArrayOfStringsSerDe());
    Assert.assertTrue(sketch.isEmpty());
    Assert.assertEquals(sketch.getNumActiveItems(), 0);
}
Also used : ArrayOfStringsSerDe(com.yahoo.sketches.ArrayOfStringsSerDe) DataByteArray(org.apache.pig.data.DataByteArray) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 19 with ArrayOfStringsSerDe

use of com.yahoo.sketches.ArrayOfStringsSerDe in project sketches-pig by DataSketches.

the class UnionFrequentStringsSketchTest method accumulator.

@Test
public void accumulator() throws Exception {
    Accumulator<Tuple> func = new UnionFrequentStringsSketch("8");
    DataBag bag = BagFactory.getInstance().newDefaultBag();
    {
        ItemsSketch<String> sketch = new ItemsSketch<String>(8);
        sketch.update("a");
        sketch.update("b");
        bag.add(PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe()))));
    }
    func.accumulate(PigUtil.objectsToTuple(bag));
    bag = BagFactory.getInstance().newDefaultBag();
    {
        ItemsSketch<String> sketch = new ItemsSketch<String>(8);
        sketch.update("a");
        sketch.update("b");
        bag.add(PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe()))));
    }
    func.accumulate(PigUtil.objectsToTuple(bag));
    Tuple resultTuple = func.getValue();
    Assert.assertNotNull(resultTuple);
    Assert.assertEquals(resultTuple.size(), 1);
    DataByteArray bytes = (DataByteArray) resultTuple.get(0);
    Assert.assertTrue(bytes.size() > 0);
    ItemsSketch<String> sketch = ItemsSketch.getInstance(Memory.wrap(bytes.get()), new ArrayOfStringsSerDe());
    Assert.assertFalse(sketch.isEmpty());
    Assert.assertEquals(sketch.getNumActiveItems(), 2);
    Assert.assertEquals(sketch.getEstimate("a"), 2);
    Assert.assertEquals(sketch.getEstimate("b"), 2);
}
Also used : ArrayOfStringsSerDe(com.yahoo.sketches.ArrayOfStringsSerDe) DataBag(org.apache.pig.data.DataBag) ItemsSketch(com.yahoo.sketches.frequencies.ItemsSketch) DataByteArray(org.apache.pig.data.DataByteArray) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Example 20 with ArrayOfStringsSerDe

use of com.yahoo.sketches.ArrayOfStringsSerDe in project sketches-pig by DataSketches.

the class UnionFrequentStringsSketchTest method accumulatorNullInput.

@Test
public void accumulatorNullInput() throws Exception {
    Accumulator<Tuple> func = new UnionFrequentStringsSketch("8");
    func.accumulate(null);
    Tuple resultTuple = func.getValue();
    Assert.assertNotNull(resultTuple);
    Assert.assertEquals(resultTuple.size(), 1);
    DataByteArray bytes = (DataByteArray) resultTuple.get(0);
    Assert.assertTrue(bytes.size() > 0);
    ItemsSketch<String> sketch = ItemsSketch.getInstance(Memory.wrap(bytes.get()), new ArrayOfStringsSerDe());
    Assert.assertTrue(sketch.isEmpty());
    Assert.assertEquals(sketch.getNumActiveItems(), 0);
}
Also used : ArrayOfStringsSerDe(com.yahoo.sketches.ArrayOfStringsSerDe) DataByteArray(org.apache.pig.data.DataByteArray) Tuple(org.apache.pig.data.Tuple) Test(org.testng.annotations.Test)

Aggregations

ArrayOfStringsSerDe (com.yahoo.sketches.ArrayOfStringsSerDe)43 Test (org.testng.annotations.Test)37 DataByteArray (org.apache.pig.data.DataByteArray)23 Tuple (org.apache.pig.data.Tuple)19 WritableMemory (com.yahoo.memory.WritableMemory)15 DataBag (org.apache.pig.data.DataBag)12 Memory (com.yahoo.memory.Memory)10 ItemsSketch (com.yahoo.sketches.frequencies.ItemsSketch)9 SketchesArgumentException (com.yahoo.sketches.SketchesArgumentException)1