use of org.apache.hyracks.storage.common.IIndexBulkLoader in project asterixdb by apache.
the class OrderedIndexExamplesTest method bulkLoadExample.
/**
* Bulk load example. Load a tree with 100,000 tuples. BTree has a composite
* key to "simulate" non-unique index creation.
*/
@Test
public void bulkLoadExample() throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("Bulk load example");
}
// Declare fields.
int fieldCount = 3;
ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
typeTraits[0] = IntegerPointable.TYPE_TRAITS;
typeTraits[1] = IntegerPointable.TYPE_TRAITS;
typeTraits[2] = IntegerPointable.TYPE_TRAITS;
// Declare field serdes.
ISerializerDeserializer[] fieldSerdes = { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
// declare keys
int keyFieldCount = 2;
IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[keyFieldCount];
cmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
cmpFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// This is only used for the LSM-BTree.
int[] bloomFilterKeyFields = new int[keyFieldCount];
bloomFilterKeyFields[0] = 0;
bloomFilterKeyFields[1] = 1;
ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields, null, null, null, null);
treeIndex.create();
treeIndex.activate();
// Load sorted records.
int ins = 100000;
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("Bulk loading " + ins + " tuples");
}
long start = System.currentTimeMillis();
IIndexBulkLoader bulkLoader = treeIndex.createBulkLoader(0.7f, false, ins, true);
ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
for (int i = 0; i < ins; i++) {
TupleUtils.createIntegerTuple(tb, tuple, i, i, 5);
bulkLoader.add(tuple);
}
bulkLoader.end();
long end = System.currentTimeMillis();
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info(ins + " tuples loaded in " + (end - start) + "ms");
}
IIndexAccessor indexAccessor = treeIndex.createAccessor(TestOperationCallback.INSTANCE, TestOperationCallback.INSTANCE);
// Build low key.
ArrayTupleBuilder lowKeyTb = new ArrayTupleBuilder(1);
ArrayTupleReference lowKey = new ArrayTupleReference();
TupleUtils.createIntegerTuple(lowKeyTb, lowKey, 44444);
// Build high key.
ArrayTupleBuilder highKeyTb = new ArrayTupleBuilder(1);
ArrayTupleReference highKey = new ArrayTupleReference();
TupleUtils.createIntegerTuple(highKeyTb, highKey, 44500);
// Prefix-Range search in [44444, 44500]
rangeSearch(cmpFactories, indexAccessor, fieldSerdes, lowKey, highKey, null, null);
treeIndex.validate();
treeIndex.deactivate();
treeIndex.destroy();
}
use of org.apache.hyracks.storage.common.IIndexBulkLoader in project asterixdb by apache.
the class BloomFilterTest method singleFieldTest.
@Test
public void singleFieldTest() throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("TESTING BLOOM FILTER");
}
IBufferCache bufferCache = harness.getBufferCache();
int numElements = 100;
int[] keyFields = { 0 };
BloomFilter bf = new BloomFilter(bufferCache, harness.getFileMapProvider(), harness.getFileReference(), keyFields);
double acceptanleFalsePositiveRate = 0.1;
int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, acceptanleFalsePositiveRate);
bf.create();
bf.activate();
IIndexBulkLoader builder = bf.createBuilder(numElements, bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
int fieldCount = 2;
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
// generate keys
int maxKey = 1000;
TreeSet<Integer> uniqueKeys = new TreeSet<>();
ArrayList<Integer> keys = new ArrayList<>();
while (uniqueKeys.size() < numElements) {
int key = rnd.nextInt() % maxKey;
uniqueKeys.add(key);
}
for (Integer i : uniqueKeys) {
keys.add(i);
}
// Insert tuples in the bloom filter
for (int i = 0; i < keys.size(); ++i) {
TupleUtils.createIntegerTuple(tupleBuilder, tuple, keys.get(i), i);
builder.add(tuple);
}
builder.end();
// Check all the inserted tuples can be found.
long[] hashes = new long[2];
for (int i = 0; i < keys.size(); ++i) {
TupleUtils.createIntegerTuple(tupleBuilder, tuple, keys.get(i), i);
Assert.assertTrue(bf.contains(tuple, hashes));
}
bf.deactivate();
bf.destroy();
}
use of org.apache.hyracks.storage.common.IIndexBulkLoader in project asterixdb by apache.
the class BloomFilterTest method multiFieldTest.
@Test
public void multiFieldTest() throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("TESTING BLOOM FILTER");
}
IBufferCache bufferCache = harness.getBufferCache();
int numElements = 10000;
int[] keyFields = { 2, 4, 1 };
BloomFilter bf = new BloomFilter(bufferCache, harness.getFileMapProvider(), harness.getFileReference(), keyFields);
double acceptanleFalsePositiveRate = 0.1;
int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, acceptanleFalsePositiveRate);
bf.create();
bf.activate();
IIndexBulkLoader builder = bf.createBuilder(numElements, bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
int fieldCount = 5;
ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
int maxLength = 20;
ArrayList<String> s1 = new ArrayList<>();
ArrayList<String> s2 = new ArrayList<>();
ArrayList<String> s3 = new ArrayList<>();
ArrayList<String> s4 = new ArrayList<>();
for (int i = 0; i < numElements; ++i) {
s1.add(randomString(rnd.nextInt() % maxLength, rnd));
s2.add(randomString(rnd.nextInt() % maxLength, rnd));
s3.add(randomString(rnd.nextInt() % maxLength, rnd));
s4.add(randomString(rnd.nextInt() % maxLength, rnd));
}
for (int i = 0; i < numElements; ++i) {
TupleUtils.createTuple(tupleBuilder, tuple, fieldSerdes, s1.get(i), s2.get(i), i, s3.get(i), s4.get(i));
builder.add(tuple);
}
builder.end();
long[] hashes = new long[2];
for (int i = 0; i < numElements; ++i) {
TupleUtils.createTuple(tupleBuilder, tuple, fieldSerdes, s1.get(i), s2.get(i), i, s3.get(i), s4.get(i));
Assert.assertTrue(bf.contains(tuple, hashes));
}
bf.deactivate();
bf.destroy();
}
use of org.apache.hyracks.storage.common.IIndexBulkLoader in project asterixdb by apache.
the class BTreeBulkLoadRunner method runExperiment.
@Override
public long runExperiment(DataGenThread dataGen, int numThreads) throws Exception {
btree.create();
long start = System.currentTimeMillis();
IIndexBulkLoader bulkLoader = btree.createBulkLoader(1.0f, false, 0L, true);
for (int i = 0; i < numBatches; i++) {
TupleBatch batch = dataGen.tupleBatchQueue.take();
for (int j = 0; j < batch.size(); j++) {
bulkLoader.add(batch.get(j));
}
}
bulkLoader.end();
long end = System.currentTimeMillis();
long time = end - start;
return time;
}
use of org.apache.hyracks.storage.common.IIndexBulkLoader in project asterixdb by apache.
the class LSMInvertedIndexTestUtils method bulkLoadInvIndex.
public static void bulkLoadInvIndex(LSMInvertedIndexTestContext testCtx, TupleGenerator tupleGen, int numDocs, boolean appendOnly) throws HyracksDataException, IOException {
SortedSet<CheckTuple> tmpMemIndex = new TreeSet<>();
// First generate the expected index by inserting the documents one-by-one.
for (int i = 0; i < numDocs; i++) {
ITupleReference tuple = tupleGen.next();
testCtx.insertCheckTuples(tuple, tmpMemIndex);
}
ISerializerDeserializer[] fieldSerdes = testCtx.getFieldSerdes();
// Use the expected index to bulk-load the actual index.
IIndexBulkLoader bulkLoader = testCtx.getIndex().createBulkLoader(1.0f, false, numDocs, true);
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(testCtx.getFieldSerdes().length);
ArrayTupleReference tuple = new ArrayTupleReference();
Iterator<CheckTuple> checkTupleIter = tmpMemIndex.iterator();
while (checkTupleIter.hasNext()) {
CheckTuple checkTuple = checkTupleIter.next();
OrderedIndexTestUtils.createTupleFromCheckTuple(checkTuple, tupleBuilder, tuple, fieldSerdes);
bulkLoader.add(tuple);
}
bulkLoader.end();
// Add all check tuples from the temp index to the text context.
testCtx.getCheckTuples().addAll(tmpMemIndex);
}
Aggregations