Search in sources :

Example 1 with BloomFilterSpecification

use of org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification in project asterixdb by apache.

the class BloomFilterTest method singleFieldTest.

@Test
public void singleFieldTest() throws Exception {
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("TESTING BLOOM FILTER");
    }
    IBufferCache bufferCache = harness.getBufferCache();
    int numElements = 100;
    int[] keyFields = { 0 };
    BloomFilter bf = new BloomFilter(bufferCache, harness.getFileMapProvider(), harness.getFileReference(), keyFields);
    double acceptanleFalsePositiveRate = 0.1;
    int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
    BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, acceptanleFalsePositiveRate);
    bf.create();
    bf.activate();
    IIndexBulkLoader builder = bf.createBuilder(numElements, bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
    int fieldCount = 2;
    ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
    ArrayTupleReference tuple = new ArrayTupleReference();
    // generate keys
    int maxKey = 1000;
    TreeSet<Integer> uniqueKeys = new TreeSet<>();
    ArrayList<Integer> keys = new ArrayList<>();
    while (uniqueKeys.size() < numElements) {
        int key = rnd.nextInt() % maxKey;
        uniqueKeys.add(key);
    }
    for (Integer i : uniqueKeys) {
        keys.add(i);
    }
    // Insert tuples in the bloom filter
    for (int i = 0; i < keys.size(); ++i) {
        TupleUtils.createIntegerTuple(tupleBuilder, tuple, keys.get(i), i);
        builder.add(tuple);
    }
    builder.end();
    // Check all the inserted tuples can be found.
    long[] hashes = new long[2];
    for (int i = 0; i < keys.size(); ++i) {
        TupleUtils.createIntegerTuple(tupleBuilder, tuple, keys.get(i), i);
        Assert.assertTrue(bf.contains(tuple, hashes));
    }
    bf.deactivate();
    bf.destroy();
}
Also used : ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) ArrayList(java.util.ArrayList) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) BloomFilter(org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilter) IIndexBulkLoader(org.apache.hyracks.storage.common.IIndexBulkLoader) TreeSet(java.util.TreeSet) BloomFilterSpecification(org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification) IBufferCache(org.apache.hyracks.storage.common.buffercache.IBufferCache) AbstractBloomFilterTest(org.apache.hyracks.storage.am.bloomfilter.util.AbstractBloomFilterTest) Test(org.junit.Test)

Example 2 with BloomFilterSpecification

use of org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification in project asterixdb by apache.

the class BloomFilterTest method multiFieldTest.

@Test
public void multiFieldTest() throws Exception {
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("TESTING BLOOM FILTER");
    }
    IBufferCache bufferCache = harness.getBufferCache();
    int numElements = 10000;
    int[] keyFields = { 2, 4, 1 };
    BloomFilter bf = new BloomFilter(bufferCache, harness.getFileMapProvider(), harness.getFileReference(), keyFields);
    double acceptanleFalsePositiveRate = 0.1;
    int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
    BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, acceptanleFalsePositiveRate);
    bf.create();
    bf.activate();
    IIndexBulkLoader builder = bf.createBuilder(numElements, bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
    int fieldCount = 5;
    ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
    ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
    ArrayTupleReference tuple = new ArrayTupleReference();
    int maxLength = 20;
    ArrayList<String> s1 = new ArrayList<>();
    ArrayList<String> s2 = new ArrayList<>();
    ArrayList<String> s3 = new ArrayList<>();
    ArrayList<String> s4 = new ArrayList<>();
    for (int i = 0; i < numElements; ++i) {
        s1.add(randomString(rnd.nextInt() % maxLength, rnd));
        s2.add(randomString(rnd.nextInt() % maxLength, rnd));
        s3.add(randomString(rnd.nextInt() % maxLength, rnd));
        s4.add(randomString(rnd.nextInt() % maxLength, rnd));
    }
    for (int i = 0; i < numElements; ++i) {
        TupleUtils.createTuple(tupleBuilder, tuple, fieldSerdes, s1.get(i), s2.get(i), i, s3.get(i), s4.get(i));
        builder.add(tuple);
    }
    builder.end();
    long[] hashes = new long[2];
    for (int i = 0; i < numElements; ++i) {
        TupleUtils.createTuple(tupleBuilder, tuple, fieldSerdes, s1.get(i), s2.get(i), i, s3.get(i), s4.get(i));
        Assert.assertTrue(bf.contains(tuple, hashes));
    }
    bf.deactivate();
    bf.destroy();
}
Also used : ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) ArrayList(java.util.ArrayList) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) BloomFilter(org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilter) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IIndexBulkLoader(org.apache.hyracks.storage.common.IIndexBulkLoader) BloomFilterSpecification(org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification) IBufferCache(org.apache.hyracks.storage.common.buffercache.IBufferCache) AbstractBloomFilterTest(org.apache.hyracks.storage.am.bloomfilter.util.AbstractBloomFilterTest) Test(org.junit.Test)

Example 3 with BloomFilterSpecification

use of org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification in project asterixdb by apache.

the class ExternalBTreeWithBuddy method createComponentBulkLoader.

@Override
public ILSMDiskComponentBulkLoader createComponentBulkLoader(ILSMDiskComponent component, float fillFactor, boolean verifyInput, long numElementsHint, boolean checkIfEmptyIndex, boolean withFilter) throws HyracksDataException {
    BloomFilterSpecification bloomFilterSpec = null;
    if (numElementsHint > 0) {
        int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElementsHint);
        bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, bloomFilterFalsePositiveRate);
    }
    if (withFilter && filterFields != null) {
        return new LSMBTreeWithBuddyDiskComponentBulkLoader((LSMBTreeWithBuddyDiskComponent) component, bloomFilterSpec, fillFactor, verifyInput, numElementsHint, checkIfEmptyIndex, filterManager, treeFields, filterFields, MultiComparator.create(component.getLSMComponentFilter().getFilterCmpFactories()));
    } else {
        return new LSMBTreeWithBuddyDiskComponentBulkLoader((LSMBTreeWithBuddyDiskComponent) component, bloomFilterSpec, fillFactor, verifyInput, numElementsHint, checkIfEmptyIndex);
    }
}
Also used : BloomFilterSpecification(org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification)

Example 4 with BloomFilterSpecification

use of org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification in project asterixdb by apache.

the class LSMBTree method createComponentBulkLoader.

@Override
public ILSMDiskComponentBulkLoader createComponentBulkLoader(ILSMDiskComponent component, float fillFactor, boolean verifyInput, long numElementsHint, boolean checkIfEmptyIndex, boolean withFilter) throws HyracksDataException {
    BloomFilterSpecification bloomFilterSpec = null;
    if (hasBloomFilter) {
        int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElementsHint);
        bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, bloomFilterFalsePositiveRate);
    }
    if (withFilter && filterFields != null) {
        return new LSMBTreeDiskComponentBulkLoader((LSMBTreeDiskComponent) component, bloomFilterSpec, fillFactor, verifyInput, numElementsHint, checkIfEmptyIndex, filterManager, treeFields, filterFields, MultiComparator.create(component.getLSMComponentFilter().getFilterCmpFactories()));
    } else {
        return new LSMBTreeDiskComponentBulkLoader((LSMBTreeDiskComponent) component, bloomFilterSpec, fillFactor, verifyInput, numElementsHint, checkIfEmptyIndex);
    }
}
Also used : BloomFilterSpecification(org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification)

Example 5 with BloomFilterSpecification

use of org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification in project asterixdb by apache.

the class LSMInvertedIndex method createComponentBulkLoader.

@Override
public ILSMDiskComponentBulkLoader createComponentBulkLoader(ILSMDiskComponent component, float fillFactor, boolean verifyInput, long numElementsHint, boolean checkIfEmptyIndex, boolean withFilter) throws HyracksDataException {
    BloomFilterSpecification bloomFilterSpec = null;
    if (numElementsHint > 0) {
        int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElementsHint);
        bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, bloomFilterFalsePositiveRate);
    }
    if (withFilter && filterFields != null) {
        return new LSMInvertedIndexDiskComponentBulkLoader((LSMInvertedIndexDiskComponent) component, bloomFilterSpec, fillFactor, verifyInput, numElementsHint, checkIfEmptyIndex, filterManager, treeFields, filterFields, MultiComparator.create(component.getLSMComponentFilter().getFilterCmpFactories()));
    } else {
        return new LSMInvertedIndexDiskComponentBulkLoader((LSMInvertedIndexDiskComponent) component, bloomFilterSpec, fillFactor, verifyInput, numElementsHint, checkIfEmptyIndex);
    }
}
Also used : BloomFilterSpecification(org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification)

Aggregations

BloomFilterSpecification (org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification)7 IIndexBulkLoader (org.apache.hyracks.storage.common.IIndexBulkLoader)3 ArrayList (java.util.ArrayList)2 ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)2 ArrayTupleReference (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference)2 BloomFilter (org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilter)2 AbstractBloomFilterTest (org.apache.hyracks.storage.am.bloomfilter.util.AbstractBloomFilterTest)2 IBufferCache (org.apache.hyracks.storage.common.buffercache.IBufferCache)2 Test (org.junit.Test)2 TreeSet (java.util.TreeSet)1 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)1 ITupleReference (org.apache.hyracks.dataflow.common.data.accessors.ITupleReference)1 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)1 BTree (org.apache.hyracks.storage.am.btree.impls.BTree)1 ILSMIndexOperationContext (org.apache.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext)1 SearchPredicate (org.apache.hyracks.storage.am.rtree.impls.SearchPredicate)1 IIndexCursor (org.apache.hyracks.storage.common.IIndexCursor)1 ISearchPredicate (org.apache.hyracks.storage.common.ISearchPredicate)1