Search in sources :

Example 16 with Hasher

use of org.apache.commons.collections4.bloomfilter.hasher.Hasher in project commons-collections by apache.

the class SetOperationsTest method estimateSizeTest.

/**
 * Tests that the size estimate is correctly calculated.
 */
@Test
public final void estimateSizeTest() {
    // build a filter
    List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
    Hasher hasher = new StaticHasher(lst.iterator(), shape);
    BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
    assertEquals(1, SetOperations.estimateSize(filter1));
    // the data provided above do not generate an estimate that is equivalent to the
    // actual.
    lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);
    hasher = new StaticHasher(lst.iterator(), shape);
    filter1 = new HasherBloomFilter(hasher, shape);
    assertEquals(1, SetOperations.estimateSize(filter1));
    lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33);
    final Hasher hasher2 = new StaticHasher(lst.iterator(), shape);
    final BloomFilter filter2 = new HasherBloomFilter(hasher2, shape);
    assertEquals(3, SetOperations.estimateSize(filter2));
}
Also used : Hasher(org.apache.commons.collections4.bloomfilter.hasher.Hasher) StaticHasher(org.apache.commons.collections4.bloomfilter.hasher.StaticHasher) StaticHasher(org.apache.commons.collections4.bloomfilter.hasher.StaticHasher) Test(org.junit.jupiter.api.Test)

Example 17 with Hasher

use of org.apache.commons.collections4.bloomfilter.hasher.Hasher in project commons-collections by apache.

the class SetOperationsTest method jaccardSimilarityTest_NoValues.

/**
 * Tests that the Jaccard similarity is correctly calculated when one or
 * both filters are empty
 */
@Test
public final void jaccardSimilarityTest_NoValues() {
    final BloomFilter filter1 = new HasherBloomFilter(shape);
    final BloomFilter filter2 = new HasherBloomFilter(shape);
    // build a filter
    final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
    final Hasher hasher = new StaticHasher(lst.iterator(), shape);
    final BloomFilter filter3 = new HasherBloomFilter(hasher, shape);
    assertEquals(0.0, SetOperations.jaccardSimilarity(filter1, filter2), 0.0001);
    assertEquals(0.0, SetOperations.jaccardSimilarity(filter2, filter1), 0.0001);
    assertEquals(1.0, SetOperations.jaccardSimilarity(filter1, filter3), 0.0001);
    assertEquals(1.0, SetOperations.jaccardSimilarity(filter3, filter1), 0.0001);
}
Also used : Hasher(org.apache.commons.collections4.bloomfilter.hasher.Hasher) StaticHasher(org.apache.commons.collections4.bloomfilter.hasher.StaticHasher) StaticHasher(org.apache.commons.collections4.bloomfilter.hasher.StaticHasher) Test(org.junit.jupiter.api.Test)

Example 18 with Hasher

use of org.apache.commons.collections4.bloomfilter.hasher.Hasher in project commons-collections by apache.

the class SetOperationsTest method testDifferentShapesThrows.

@Test
public void testDifferentShapesThrows() {
    final List<Integer> lst = Arrays.asList(1, 2);
    final Hasher hasher = new StaticHasher(lst.iterator(), shape);
    final BloomFilter filter1 = new HasherBloomFilter(hasher, shape);
    final Shape shape2 = new Shape(testFunction, 3, 72, 18);
    final List<Integer> lst2 = Arrays.asList(2, 3);
    final Hasher hasher2 = new StaticHasher(lst2.iterator(), shape2);
    final BloomFilter filter2 = new HasherBloomFilter(hasher2, shape2);
    assertThrows(IllegalArgumentException.class, () -> SetOperations.cosineDistance(filter1, filter2));
}
Also used : Hasher(org.apache.commons.collections4.bloomfilter.hasher.Hasher) StaticHasher(org.apache.commons.collections4.bloomfilter.hasher.StaticHasher) Shape(org.apache.commons.collections4.bloomfilter.hasher.Shape) StaticHasher(org.apache.commons.collections4.bloomfilter.hasher.StaticHasher) Test(org.junit.jupiter.api.Test)

Example 19 with Hasher

use of org.apache.commons.collections4.bloomfilter.hasher.Hasher in project commons-collections by apache.

the class SetOperationsTest method cosineDistanceTest_NoValues.

/**
 * Tests that the Cosine distance is correctly calculated when one or
 * both filters are empty
 */
@Test
public final void cosineDistanceTest_NoValues() {
    final BloomFilter filter1 = new HasherBloomFilter(shape);
    final BloomFilter filter2 = new HasherBloomFilter(shape);
    // build a filter
    final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
    final Hasher hasher = new StaticHasher(lst.iterator(), shape);
    final BloomFilter filter3 = new HasherBloomFilter(hasher, shape);
    assertEquals(1.0, SetOperations.cosineDistance(filter1, filter2), 0.0001);
    assertEquals(1.0, SetOperations.cosineDistance(filter2, filter1), 0.0001);
    assertEquals(1.0, SetOperations.cosineDistance(filter1, filter3), 0.0001);
    assertEquals(1.0, SetOperations.cosineDistance(filter3, filter1), 0.0001);
}
Also used : Hasher(org.apache.commons.collections4.bloomfilter.hasher.Hasher) StaticHasher(org.apache.commons.collections4.bloomfilter.hasher.StaticHasher) StaticHasher(org.apache.commons.collections4.bloomfilter.hasher.StaticHasher) Test(org.junit.jupiter.api.Test)

Example 20 with Hasher

use of org.apache.commons.collections4.bloomfilter.hasher.Hasher in project commons-collections by apache.

the class SetOperationsTest method jaccardDistanceTest_NoValues.

/**
 * Tests that the Jaccard distance is correctly calculated when one or
 * both filters are empty
 */
@Test
public final void jaccardDistanceTest_NoValues() {
    final BloomFilter filter1 = new HasherBloomFilter(shape);
    final BloomFilter filter2 = new HasherBloomFilter(shape);
    // build a filter
    final List<Integer> lst = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
    final Hasher hasher = new StaticHasher(lst.iterator(), shape);
    final BloomFilter filter3 = new HasherBloomFilter(hasher, shape);
    assertEquals(1.0, SetOperations.jaccardDistance(filter1, filter2), 0.0001);
    assertEquals(1.0, SetOperations.jaccardDistance(filter2, filter1), 0.0001);
    assertEquals(0.0, SetOperations.jaccardDistance(filter1, filter3), 0.0001);
    assertEquals(0.0, SetOperations.jaccardDistance(filter3, filter1), 0.0001);
}
Also used : Hasher(org.apache.commons.collections4.bloomfilter.hasher.Hasher) StaticHasher(org.apache.commons.collections4.bloomfilter.hasher.StaticHasher) StaticHasher(org.apache.commons.collections4.bloomfilter.hasher.StaticHasher) Test(org.junit.jupiter.api.Test)

Aggregations

Hasher (org.apache.commons.collections4.bloomfilter.hasher.Hasher)37 Test (org.junit.jupiter.api.Test)35 StaticHasher (org.apache.commons.collections4.bloomfilter.hasher.StaticHasher)34 Shape (org.apache.commons.collections4.bloomfilter.hasher.Shape)10 OfInt (java.util.PrimitiveIterator.OfInt)2 IntConsumer (java.util.function.IntConsumer)2 DynamicHasher (org.apache.commons.collections4.bloomfilter.hasher.DynamicHasher)2 IteratorChain (org.apache.commons.collections4.iterators.IteratorChain)2 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Set (java.util.Set)1 TreeSet (java.util.TreeSet)1 HashFunctionIdentity (org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity)1 MD5Cyclic (org.apache.commons.collections4.bloomfilter.hasher.function.MD5Cyclic)1 EmptyIterator (org.apache.commons.collections4.iterators.EmptyIterator)1