Search in sources :

Example 26 with UpdateSketch

use of org.apache.datasketches.theta.UpdateSketch in project sketches-core by DataSketches.

the class JaccardSimilarityTest method checkExactMode2.

@Test
public void checkExactMode2() {
    // tuple, theta
    int k = 1 << 12;
    int u = k;
    double threshold = 0.9999;
    println("Exact Mode, minK: " + k + "\t Th: " + threshold);
    final UpdatableSketch<Double, DoubleSummary> measured = tupleBldr.setNominalEntries(k).build();
    final UpdateSketch expected = thetaBldr.setNominalEntries(k).build();
    for (int i = 0; i < (u - 1); i++) {
        // one short
        measured.update(i, constSummary);
        expected.update(i);
    }
    double[] jResults = jaccard(measured, expected, factory.newSummary(), dsso);
    boolean state = jResults[1] > threshold;
    println(state + "\t" + jaccardString(jResults));
    assertTrue(state);
    state = exactlyEqual(measured, expected, factory.newSummary(), dsso);
    assertTrue(state);
    // now exactly k entries
    measured.update(u - 1, constSummary);
    // now exactly k entries but differs by one
    expected.update(u);
    jResults = jaccard(measured, expected, factory.newSummary(), dsso);
    state = jResults[1] > threshold;
    println(state + "\t" + jaccardString(jResults));
    assertFalse(state);
    state = exactlyEqual(measured, expected, factory.newSummary(), dsso);
    assertFalse(state);
    println("");
}
Also used : DoubleSummary(org.apache.datasketches.tuple.adouble.DoubleSummary) UpdateSketch(org.apache.datasketches.theta.UpdateSketch) JaccardSimilarity.similarityTest(org.apache.datasketches.tuple.JaccardSimilarity.similarityTest) Test(org.testng.annotations.Test) JaccardSimilarity.dissimilarityTest(org.apache.datasketches.tuple.JaccardSimilarity.dissimilarityTest)

Example 27 with UpdateSketch

use of org.apache.datasketches.theta.UpdateSketch in project sketches-core by DataSketches.

the class TupleExamples2Test method example3.

@Test
public void example3() {
    // stateless: tuple1, tuple2, use dsso2
    // Load source sketches
    final UpdatableSketch<Double, DoubleSummary> tupleSk = tupleBldr.build();
    final UpdateSketch thetaSk = thetaBldr.build();
    for (int i = 1; i <= 12; i++) {
        tupleSk.update(i, 1.0);
        thetaSk.update(i + 3);
    }
    // Union
    final Union<DoubleSummary> union = new Union<>(dsso2);
    final CompactSketch<DoubleSummary> ucsk = union.union(tupleSk, thetaSk, ufactory.newSummary().update(1.0));
    int entries = ucsk.getRetainedEntries();
    println("Union: " + entries);
    final SketchIterator<DoubleSummary> uiter = ucsk.iterator();
    int counter = 1;
    int twos = 0;
    int ones = 0;
    while (uiter.next()) {
        final int i = (int) uiter.getSummary().getValue();
        // 9 entries = 2, 6 entries = 1
        println(counter++ + ", " + i);
        if (i == 1) {
            ones++;
        }
        if (i == 2) {
            twos++;
        }
    }
    assertEquals(ones, 6);
    assertEquals(twos, 9);
    // Intersection
    final Intersection<DoubleSummary> inter = new Intersection<>(dsso2);
    final CompactSketch<DoubleSummary> icsk = inter.intersect(tupleSk, thetaSk, ufactory.newSummary().update(1.0));
    entries = icsk.getRetainedEntries();
    println("Intersection: " + entries);
    final SketchIterator<DoubleSummary> iiter = icsk.iterator();
    counter = 1;
    while (iiter.next()) {
        final int i = (int) iiter.getSummary().getValue();
        // 9 entries = 2
        println(counter++ + ", " + i);
        assertEquals(i, 1);
    }
}
Also used : DoubleSummary(org.apache.datasketches.tuple.adouble.DoubleSummary) UpdateSketch(org.apache.datasketches.theta.UpdateSketch) Test(org.testng.annotations.Test)

Example 28 with UpdateSketch

use of org.apache.datasketches.theta.UpdateSketch in project sketches-core by DataSketches.

the class TupleExamples2Test method example1.

@Test
public void example1() {
    // stateful: tuple, theta, use dsso2
    // Load source sketches
    final UpdatableSketch<Double, DoubleSummary> tupleSk = tupleBldr.build();
    final UpdateSketch thetaSk = thetaBldr.build();
    for (int i = 1; i <= 12; i++) {
        tupleSk.update(i, 1.0);
        thetaSk.update(i + 3);
    }
    // Union
    final Union<DoubleSummary> union = new Union<>(dsso2);
    union.union(tupleSk);
    union.union(thetaSk, ufactory.newSummary().update(1.0));
    final CompactSketch<DoubleSummary> ucsk = union.getResult();
    int entries = ucsk.getRetainedEntries();
    println("Union Stateful: tuple, theta: " + entries);
    final SketchIterator<DoubleSummary> uiter = ucsk.iterator();
    int counter = 1;
    int twos = 0;
    int ones = 0;
    while (uiter.next()) {
        final int i = (int) uiter.getSummary().getValue();
        // 9 entries = 2, 6 entries = 1
        println(counter++ + ", " + i);
        if (i == 1) {
            ones++;
        }
        if (i == 2) {
            twos++;
        }
    }
    assertEquals(ones, 6);
    assertEquals(twos, 9);
    // Intersection
    final Intersection<DoubleSummary> inter = new Intersection<>(dsso2);
    inter.intersect(tupleSk);
    inter.intersect(thetaSk, ifactory.newSummary().update(1.0));
    final CompactSketch<DoubleSummary> icsk = inter.getResult();
    entries = icsk.getRetainedEntries();
    println("Intersection Stateful: tuple, theta: " + entries);
    final SketchIterator<DoubleSummary> iiter = icsk.iterator();
    counter = 1;
    while (iiter.next()) {
        final int i = (int) iiter.getSummary().getValue();
        // 9 entries = 1
        println(counter++ + ", " + i);
        assertEquals(i, 1);
    }
}
Also used : DoubleSummary(org.apache.datasketches.tuple.adouble.DoubleSummary) UpdateSketch(org.apache.datasketches.theta.UpdateSketch) Test(org.testng.annotations.Test)

Example 29 with UpdateSketch

use of org.apache.datasketches.theta.UpdateSketch in project sketches-core by DataSketches.

the class AdoubleAnotBTest method aNotBEstimationOverlap.

@Test
public void aNotBEstimationOverlap() {
    final UpdatableSketch<Double, DoubleSummary> sketchA = buildUpdatableTuple();
    for (int i = 0; i < 8192; i++) {
        sketchA.update(i, 1.0);
    }
    final UpdatableSketch<Double, DoubleSummary> sketchB = buildUpdatableTuple();
    for (int i = 0; i < 4096; i++) {
        sketchB.update(i, 1.0);
    }
    final UpdateSketch skThetaB = buildUpdateTheta();
    for (int i = 0; i < 4096; i++) {
        skThetaB.update(i);
    }
    final AnotB<DoubleSummary> aNotB = new AnotB<>();
    results.set(2123, false, 4096.0, 0.03, 1.0);
    threeMethodsWithTheta(aNotB, sketchA, sketchB, skThetaB, results);
    // same thing, but compact sketches
    threeMethodsWithTheta(aNotB, sketchA.compact(), sketchB.compact(), skThetaB.compact(), results);
}
Also used : AnotB(org.apache.datasketches.tuple.AnotB) UpdateSketch(org.apache.datasketches.theta.UpdateSketch) Test(org.testng.annotations.Test)

Example 30 with UpdateSketch

use of org.apache.datasketches.theta.UpdateSketch in project sketches-core by DataSketches.

the class AdoubleAnotBTest method aNotBExactEmpty.

@Test
public void aNotBExactEmpty() {
    final UpdatableSketch<Double, DoubleSummary> sketchA = buildUpdatableTuple();
    sketchA.update(1, 1.0);
    sketchA.update(2, 1.0);
    final UpdatableSketch<Double, DoubleSummary> sketchB = buildUpdatableTuple();
    final UpdateSketch skThetaB = buildUpdateTheta();
    final AnotB<DoubleSummary> aNotB = new AnotB<>();
    results.set(2, false, 2.0, 0.0, 1.0);
    threeMethodsWithTheta(aNotB, sketchA, sketchB, skThetaB, results);
    // same thing, but compact sketches
    threeMethodsWithTheta(aNotB, sketchA.compact(), sketchB.compact(), skThetaB.compact(), results);
}
Also used : AnotB(org.apache.datasketches.tuple.AnotB) UpdateSketch(org.apache.datasketches.theta.UpdateSketch) Test(org.testng.annotations.Test)

Aggregations

UpdateSketch (org.apache.datasketches.theta.UpdateSketch)46 Test (org.testng.annotations.Test)42 DoubleSummary (org.apache.datasketches.tuple.adouble.DoubleSummary)12 AnotB (org.apache.datasketches.tuple.AnotB)6 JaccardSimilarity.dissimilarityTest (org.apache.datasketches.tuple.JaccardSimilarity.dissimilarityTest)6 JaccardSimilarity.similarityTest (org.apache.datasketches.tuple.JaccardSimilarity.similarityTest)6 UpdateSketchBuilder (org.apache.datasketches.theta.UpdateSketchBuilder)5 Intersection (org.apache.datasketches.tuple.Intersection)4 MapBasedRow (org.apache.druid.data.input.MapBasedRow)3 TestColumnSelectorFactory (org.apache.druid.query.groupby.epinephelinae.TestColumnSelectorFactory)3 Test (org.junit.Test)3 SketchesArgumentException (org.apache.datasketches.SketchesArgumentException)2 IntegerSummary (org.apache.datasketches.tuple.aninteger.IntegerSummary)2 GroupByQueryRunnerTest (org.apache.druid.query.groupby.GroupByQueryRunnerTest)2 SketchesStateException (org.apache.datasketches.SketchesStateException)1 CompactSketch (org.apache.datasketches.theta.CompactSketch)1 Intersection (org.apache.datasketches.theta.Intersection)1 Union (org.apache.datasketches.tuple.Union)1 SketchHolder (org.apache.druid.query.aggregation.datasketches.theta.SketchHolder)1 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)1