use of org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification in project asterixdb by apache.
the class BloomFilterTest method singleFieldTest.
@Test
public void singleFieldTest() throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("TESTING BLOOM FILTER");
}
IBufferCache bufferCache = harness.getBufferCache();
int numElements = 100;
int[] keyFields = { 0 };
BloomFilter bf = new BloomFilter(bufferCache, harness.getFileMapProvider(), harness.getFileReference(), keyFields);
double acceptanleFalsePositiveRate = 0.1;
int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, acceptanleFalsePositiveRate);
bf.create();
bf.activate();
IIndexBulkLoader builder = bf.createBuilder(numElements, bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
int fieldCount = 2;
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
// generate keys
int maxKey = 1000;
TreeSet<Integer> uniqueKeys = new TreeSet<>();
ArrayList<Integer> keys = new ArrayList<>();
while (uniqueKeys.size() < numElements) {
int key = rnd.nextInt() % maxKey;
uniqueKeys.add(key);
}
for (Integer i : uniqueKeys) {
keys.add(i);
}
// Insert tuples in the bloom filter
for (int i = 0; i < keys.size(); ++i) {
TupleUtils.createIntegerTuple(tupleBuilder, tuple, keys.get(i), i);
builder.add(tuple);
}
builder.end();
// Check all the inserted tuples can be found.
long[] hashes = new long[2];
for (int i = 0; i < keys.size(); ++i) {
TupleUtils.createIntegerTuple(tupleBuilder, tuple, keys.get(i), i);
Assert.assertTrue(bf.contains(tuple, hashes));
}
bf.deactivate();
bf.destroy();
}
use of org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification in project asterixdb by apache.
the class BloomFilterTest method multiFieldTest.
@Test
public void multiFieldTest() throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("TESTING BLOOM FILTER");
}
IBufferCache bufferCache = harness.getBufferCache();
int numElements = 10000;
int[] keyFields = { 2, 4, 1 };
BloomFilter bf = new BloomFilter(bufferCache, harness.getFileMapProvider(), harness.getFileReference(), keyFields);
double acceptanleFalsePositiveRate = 0.1;
int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, acceptanleFalsePositiveRate);
bf.create();
bf.activate();
IIndexBulkLoader builder = bf.createBuilder(numElements, bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
int fieldCount = 5;
ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
int maxLength = 20;
ArrayList<String> s1 = new ArrayList<>();
ArrayList<String> s2 = new ArrayList<>();
ArrayList<String> s3 = new ArrayList<>();
ArrayList<String> s4 = new ArrayList<>();
for (int i = 0; i < numElements; ++i) {
s1.add(randomString(rnd.nextInt() % maxLength, rnd));
s2.add(randomString(rnd.nextInt() % maxLength, rnd));
s3.add(randomString(rnd.nextInt() % maxLength, rnd));
s4.add(randomString(rnd.nextInt() % maxLength, rnd));
}
for (int i = 0; i < numElements; ++i) {
TupleUtils.createTuple(tupleBuilder, tuple, fieldSerdes, s1.get(i), s2.get(i), i, s3.get(i), s4.get(i));
builder.add(tuple);
}
builder.end();
long[] hashes = new long[2];
for (int i = 0; i < numElements; ++i) {
TupleUtils.createTuple(tupleBuilder, tuple, fieldSerdes, s1.get(i), s2.get(i), i, s3.get(i), s4.get(i));
Assert.assertTrue(bf.contains(tuple, hashes));
}
bf.deactivate();
bf.destroy();
}
use of org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification in project asterixdb by apache.
the class ExternalBTreeWithBuddy method createComponentBulkLoader.
@Override
public ILSMDiskComponentBulkLoader createComponentBulkLoader(ILSMDiskComponent component, float fillFactor, boolean verifyInput, long numElementsHint, boolean checkIfEmptyIndex, boolean withFilter) throws HyracksDataException {
BloomFilterSpecification bloomFilterSpec = null;
if (numElementsHint > 0) {
int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElementsHint);
bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, bloomFilterFalsePositiveRate);
}
if (withFilter && filterFields != null) {
return new LSMBTreeWithBuddyDiskComponentBulkLoader((LSMBTreeWithBuddyDiskComponent) component, bloomFilterSpec, fillFactor, verifyInput, numElementsHint, checkIfEmptyIndex, filterManager, treeFields, filterFields, MultiComparator.create(component.getLSMComponentFilter().getFilterCmpFactories()));
} else {
return new LSMBTreeWithBuddyDiskComponentBulkLoader((LSMBTreeWithBuddyDiskComponent) component, bloomFilterSpec, fillFactor, verifyInput, numElementsHint, checkIfEmptyIndex);
}
}
use of org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification in project asterixdb by apache.
the class LSMBTree method createComponentBulkLoader.
@Override
public ILSMDiskComponentBulkLoader createComponentBulkLoader(ILSMDiskComponent component, float fillFactor, boolean verifyInput, long numElementsHint, boolean checkIfEmptyIndex, boolean withFilter) throws HyracksDataException {
BloomFilterSpecification bloomFilterSpec = null;
if (hasBloomFilter) {
int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElementsHint);
bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, bloomFilterFalsePositiveRate);
}
if (withFilter && filterFields != null) {
return new LSMBTreeDiskComponentBulkLoader((LSMBTreeDiskComponent) component, bloomFilterSpec, fillFactor, verifyInput, numElementsHint, checkIfEmptyIndex, filterManager, treeFields, filterFields, MultiComparator.create(component.getLSMComponentFilter().getFilterCmpFactories()));
} else {
return new LSMBTreeDiskComponentBulkLoader((LSMBTreeDiskComponent) component, bloomFilterSpec, fillFactor, verifyInput, numElementsHint, checkIfEmptyIndex);
}
}
use of org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification in project asterixdb by apache.
the class LSMInvertedIndex method createComponentBulkLoader.
@Override
public ILSMDiskComponentBulkLoader createComponentBulkLoader(ILSMDiskComponent component, float fillFactor, boolean verifyInput, long numElementsHint, boolean checkIfEmptyIndex, boolean withFilter) throws HyracksDataException {
BloomFilterSpecification bloomFilterSpec = null;
if (numElementsHint > 0) {
int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElementsHint);
bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, bloomFilterFalsePositiveRate);
}
if (withFilter && filterFields != null) {
return new LSMInvertedIndexDiskComponentBulkLoader((LSMInvertedIndexDiskComponent) component, bloomFilterSpec, fillFactor, verifyInput, numElementsHint, checkIfEmptyIndex, filterManager, treeFields, filterFields, MultiComparator.create(component.getLSMComponentFilter().getFilterCmpFactories()));
} else {
return new LSMInvertedIndexDiskComponentBulkLoader((LSMInvertedIndexDiskComponent) component, bloomFilterSpec, fillFactor, verifyInput, numElementsHint, checkIfEmptyIndex);
}
}
Aggregations