use of org.apache.datasketches.theta.UpdateSketch in project sketches-core by DataSketches.
the class JaccardSimilarityTest method checkExactMode2.
@Test
public void checkExactMode2() {
// tuple, theta
int k = 1 << 12;
int u = k;
double threshold = 0.9999;
println("Exact Mode, minK: " + k + "\t Th: " + threshold);
final UpdatableSketch<Double, DoubleSummary> measured = tupleBldr.setNominalEntries(k).build();
final UpdateSketch expected = thetaBldr.setNominalEntries(k).build();
for (int i = 0; i < (u - 1); i++) {
// one short
measured.update(i, constSummary);
expected.update(i);
}
double[] jResults = jaccard(measured, expected, factory.newSummary(), dsso);
boolean state = jResults[1] > threshold;
println(state + "\t" + jaccardString(jResults));
assertTrue(state);
state = exactlyEqual(measured, expected, factory.newSummary(), dsso);
assertTrue(state);
// now exactly k entries
measured.update(u - 1, constSummary);
// now exactly k entries but differs by one
expected.update(u);
jResults = jaccard(measured, expected, factory.newSummary(), dsso);
state = jResults[1] > threshold;
println(state + "\t" + jaccardString(jResults));
assertFalse(state);
state = exactlyEqual(measured, expected, factory.newSummary(), dsso);
assertFalse(state);
println("");
}
use of org.apache.datasketches.theta.UpdateSketch in project sketches-core by DataSketches.
the class TupleExamples2Test method example3.
@Test
public void example3() {
// stateless: tuple1, tuple2, use dsso2
// Load source sketches
final UpdatableSketch<Double, DoubleSummary> tupleSk = tupleBldr.build();
final UpdateSketch thetaSk = thetaBldr.build();
for (int i = 1; i <= 12; i++) {
tupleSk.update(i, 1.0);
thetaSk.update(i + 3);
}
// Union
final Union<DoubleSummary> union = new Union<>(dsso2);
final CompactSketch<DoubleSummary> ucsk = union.union(tupleSk, thetaSk, ufactory.newSummary().update(1.0));
int entries = ucsk.getRetainedEntries();
println("Union: " + entries);
final SketchIterator<DoubleSummary> uiter = ucsk.iterator();
int counter = 1;
int twos = 0;
int ones = 0;
while (uiter.next()) {
final int i = (int) uiter.getSummary().getValue();
// 9 entries = 2, 6 entries = 1
println(counter++ + ", " + i);
if (i == 1) {
ones++;
}
if (i == 2) {
twos++;
}
}
assertEquals(ones, 6);
assertEquals(twos, 9);
// Intersection
final Intersection<DoubleSummary> inter = new Intersection<>(dsso2);
final CompactSketch<DoubleSummary> icsk = inter.intersect(tupleSk, thetaSk, ufactory.newSummary().update(1.0));
entries = icsk.getRetainedEntries();
println("Intersection: " + entries);
final SketchIterator<DoubleSummary> iiter = icsk.iterator();
counter = 1;
while (iiter.next()) {
final int i = (int) iiter.getSummary().getValue();
// 9 entries = 2
println(counter++ + ", " + i);
assertEquals(i, 1);
}
}
use of org.apache.datasketches.theta.UpdateSketch in project sketches-core by DataSketches.
the class TupleExamples2Test method example1.
@Test
public void example1() {
// stateful: tuple, theta, use dsso2
// Load source sketches
final UpdatableSketch<Double, DoubleSummary> tupleSk = tupleBldr.build();
final UpdateSketch thetaSk = thetaBldr.build();
for (int i = 1; i <= 12; i++) {
tupleSk.update(i, 1.0);
thetaSk.update(i + 3);
}
// Union
final Union<DoubleSummary> union = new Union<>(dsso2);
union.union(tupleSk);
union.union(thetaSk, ufactory.newSummary().update(1.0));
final CompactSketch<DoubleSummary> ucsk = union.getResult();
int entries = ucsk.getRetainedEntries();
println("Union Stateful: tuple, theta: " + entries);
final SketchIterator<DoubleSummary> uiter = ucsk.iterator();
int counter = 1;
int twos = 0;
int ones = 0;
while (uiter.next()) {
final int i = (int) uiter.getSummary().getValue();
// 9 entries = 2, 6 entries = 1
println(counter++ + ", " + i);
if (i == 1) {
ones++;
}
if (i == 2) {
twos++;
}
}
assertEquals(ones, 6);
assertEquals(twos, 9);
// Intersection
final Intersection<DoubleSummary> inter = new Intersection<>(dsso2);
inter.intersect(tupleSk);
inter.intersect(thetaSk, ifactory.newSummary().update(1.0));
final CompactSketch<DoubleSummary> icsk = inter.getResult();
entries = icsk.getRetainedEntries();
println("Intersection Stateful: tuple, theta: " + entries);
final SketchIterator<DoubleSummary> iiter = icsk.iterator();
counter = 1;
while (iiter.next()) {
final int i = (int) iiter.getSummary().getValue();
// 9 entries = 1
println(counter++ + ", " + i);
assertEquals(i, 1);
}
}
use of org.apache.datasketches.theta.UpdateSketch in project sketches-core by DataSketches.
the class AdoubleAnotBTest method aNotBEstimationOverlap.
@Test
public void aNotBEstimationOverlap() {
final UpdatableSketch<Double, DoubleSummary> sketchA = buildUpdatableTuple();
for (int i = 0; i < 8192; i++) {
sketchA.update(i, 1.0);
}
final UpdatableSketch<Double, DoubleSummary> sketchB = buildUpdatableTuple();
for (int i = 0; i < 4096; i++) {
sketchB.update(i, 1.0);
}
final UpdateSketch skThetaB = buildUpdateTheta();
for (int i = 0; i < 4096; i++) {
skThetaB.update(i);
}
final AnotB<DoubleSummary> aNotB = new AnotB<>();
results.set(2123, false, 4096.0, 0.03, 1.0);
threeMethodsWithTheta(aNotB, sketchA, sketchB, skThetaB, results);
// same thing, but compact sketches
threeMethodsWithTheta(aNotB, sketchA.compact(), sketchB.compact(), skThetaB.compact(), results);
}
use of org.apache.datasketches.theta.UpdateSketch in project sketches-core by DataSketches.
the class AdoubleAnotBTest method aNotBExactEmpty.
@Test
public void aNotBExactEmpty() {
final UpdatableSketch<Double, DoubleSummary> sketchA = buildUpdatableTuple();
sketchA.update(1, 1.0);
sketchA.update(2, 1.0);
final UpdatableSketch<Double, DoubleSummary> sketchB = buildUpdatableTuple();
final UpdateSketch skThetaB = buildUpdateTheta();
final AnotB<DoubleSummary> aNotB = new AnotB<>();
results.set(2, false, 2.0, 0.0, 1.0);
threeMethodsWithTheta(aNotB, sketchA, sketchB, skThetaB, results);
// same thing, but compact sketches
threeMethodsWithTheta(aNotB, sketchA.compact(), sketchB.compact(), skThetaB.compact(), results);
}
Aggregations