Search in sources :

Example 1 with IIndexBulkLoader

use of org.apache.hyracks.storage.common.IIndexBulkLoader in project asterixdb by apache.

the class OrderedIndexExamplesTest method bulkLoadExample.

/**
     * Bulk load example. Load a tree with 100,000 tuples. BTree has a composite
     * key to "simulate" non-unique index creation.
     */
@Test
public void bulkLoadExample() throws Exception {
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("Bulk load example");
    }
    // Declare fields.
    int fieldCount = 3;
    ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
    typeTraits[0] = IntegerPointable.TYPE_TRAITS;
    typeTraits[1] = IntegerPointable.TYPE_TRAITS;
    typeTraits[2] = IntegerPointable.TYPE_TRAITS;
    // Declare field serdes.
    ISerializerDeserializer[] fieldSerdes = { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
    // declare keys
    int keyFieldCount = 2;
    IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[keyFieldCount];
    cmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    cmpFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    // This is only used for the LSM-BTree.
    int[] bloomFilterKeyFields = new int[keyFieldCount];
    bloomFilterKeyFields[0] = 0;
    bloomFilterKeyFields[1] = 1;
    ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields, null, null, null, null);
    treeIndex.create();
    treeIndex.activate();
    // Load sorted records.
    int ins = 100000;
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("Bulk loading " + ins + " tuples");
    }
    long start = System.currentTimeMillis();
    IIndexBulkLoader bulkLoader = treeIndex.createBulkLoader(0.7f, false, ins, true);
    ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
    ArrayTupleReference tuple = new ArrayTupleReference();
    for (int i = 0; i < ins; i++) {
        TupleUtils.createIntegerTuple(tb, tuple, i, i, 5);
        bulkLoader.add(tuple);
    }
    bulkLoader.end();
    long end = System.currentTimeMillis();
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info(ins + " tuples loaded in " + (end - start) + "ms");
    }
    IIndexAccessor indexAccessor = treeIndex.createAccessor(TestOperationCallback.INSTANCE, TestOperationCallback.INSTANCE);
    // Build low key.
    ArrayTupleBuilder lowKeyTb = new ArrayTupleBuilder(1);
    ArrayTupleReference lowKey = new ArrayTupleReference();
    TupleUtils.createIntegerTuple(lowKeyTb, lowKey, 44444);
    // Build high key.
    ArrayTupleBuilder highKeyTb = new ArrayTupleBuilder(1);
    ArrayTupleReference highKey = new ArrayTupleReference();
    TupleUtils.createIntegerTuple(highKeyTb, highKey, 44500);
    // Prefix-Range search in [44444, 44500]
    rangeSearch(cmpFactories, indexAccessor, fieldSerdes, lowKey, highKey, null, null);
    treeIndex.validate();
    treeIndex.deactivate();
    treeIndex.destroy();
}
Also used : ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IIndexAccessor(org.apache.hyracks.storage.common.IIndexAccessor) IIndexBulkLoader(org.apache.hyracks.storage.common.IIndexBulkLoader) ITreeIndex(org.apache.hyracks.storage.am.common.api.ITreeIndex) Test(org.junit.Test)

Example 2 with IIndexBulkLoader

use of org.apache.hyracks.storage.common.IIndexBulkLoader in project asterixdb by apache.

the class BloomFilterTest method singleFieldTest.

@Test
public void singleFieldTest() throws Exception {
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("TESTING BLOOM FILTER");
    }
    IBufferCache bufferCache = harness.getBufferCache();
    int numElements = 100;
    int[] keyFields = { 0 };
    BloomFilter bf = new BloomFilter(bufferCache, harness.getFileMapProvider(), harness.getFileReference(), keyFields);
    double acceptanleFalsePositiveRate = 0.1;
    int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
    BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, acceptanleFalsePositiveRate);
    bf.create();
    bf.activate();
    IIndexBulkLoader builder = bf.createBuilder(numElements, bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
    int fieldCount = 2;
    ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
    ArrayTupleReference tuple = new ArrayTupleReference();
    // generate keys
    int maxKey = 1000;
    TreeSet<Integer> uniqueKeys = new TreeSet<>();
    ArrayList<Integer> keys = new ArrayList<>();
    while (uniqueKeys.size() < numElements) {
        int key = rnd.nextInt() % maxKey;
        uniqueKeys.add(key);
    }
    for (Integer i : uniqueKeys) {
        keys.add(i);
    }
    // Insert tuples in the bloom filter
    for (int i = 0; i < keys.size(); ++i) {
        TupleUtils.createIntegerTuple(tupleBuilder, tuple, keys.get(i), i);
        builder.add(tuple);
    }
    builder.end();
    // Check all the inserted tuples can be found.
    long[] hashes = new long[2];
    for (int i = 0; i < keys.size(); ++i) {
        TupleUtils.createIntegerTuple(tupleBuilder, tuple, keys.get(i), i);
        Assert.assertTrue(bf.contains(tuple, hashes));
    }
    bf.deactivate();
    bf.destroy();
}
Also used : ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) ArrayList(java.util.ArrayList) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) BloomFilter(org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilter) IIndexBulkLoader(org.apache.hyracks.storage.common.IIndexBulkLoader) TreeSet(java.util.TreeSet) BloomFilterSpecification(org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification) IBufferCache(org.apache.hyracks.storage.common.buffercache.IBufferCache) AbstractBloomFilterTest(org.apache.hyracks.storage.am.bloomfilter.util.AbstractBloomFilterTest) Test(org.junit.Test)

Example 3 with IIndexBulkLoader

use of org.apache.hyracks.storage.common.IIndexBulkLoader in project asterixdb by apache.

the class BloomFilterTest method multiFieldTest.

@Test
public void multiFieldTest() throws Exception {
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("TESTING BLOOM FILTER");
    }
    IBufferCache bufferCache = harness.getBufferCache();
    int numElements = 10000;
    int[] keyFields = { 2, 4, 1 };
    BloomFilter bf = new BloomFilter(bufferCache, harness.getFileMapProvider(), harness.getFileReference(), keyFields);
    double acceptanleFalsePositiveRate = 0.1;
    int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
    BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, acceptanleFalsePositiveRate);
    bf.create();
    bf.activate();
    IIndexBulkLoader builder = bf.createBuilder(numElements, bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
    int fieldCount = 5;
    ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
    ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
    ArrayTupleReference tuple = new ArrayTupleReference();
    int maxLength = 20;
    ArrayList<String> s1 = new ArrayList<>();
    ArrayList<String> s2 = new ArrayList<>();
    ArrayList<String> s3 = new ArrayList<>();
    ArrayList<String> s4 = new ArrayList<>();
    for (int i = 0; i < numElements; ++i) {
        s1.add(randomString(rnd.nextInt() % maxLength, rnd));
        s2.add(randomString(rnd.nextInt() % maxLength, rnd));
        s3.add(randomString(rnd.nextInt() % maxLength, rnd));
        s4.add(randomString(rnd.nextInt() % maxLength, rnd));
    }
    for (int i = 0; i < numElements; ++i) {
        TupleUtils.createTuple(tupleBuilder, tuple, fieldSerdes, s1.get(i), s2.get(i), i, s3.get(i), s4.get(i));
        builder.add(tuple);
    }
    builder.end();
    long[] hashes = new long[2];
    for (int i = 0; i < numElements; ++i) {
        TupleUtils.createTuple(tupleBuilder, tuple, fieldSerdes, s1.get(i), s2.get(i), i, s3.get(i), s4.get(i));
        Assert.assertTrue(bf.contains(tuple, hashes));
    }
    bf.deactivate();
    bf.destroy();
}
Also used : ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) ArrayList(java.util.ArrayList) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) BloomFilter(org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilter) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IIndexBulkLoader(org.apache.hyracks.storage.common.IIndexBulkLoader) BloomFilterSpecification(org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification) IBufferCache(org.apache.hyracks.storage.common.buffercache.IBufferCache) AbstractBloomFilterTest(org.apache.hyracks.storage.am.bloomfilter.util.AbstractBloomFilterTest) Test(org.junit.Test)

Example 4 with IIndexBulkLoader

use of org.apache.hyracks.storage.common.IIndexBulkLoader in project asterixdb by apache.

the class BTreeBulkLoadRunner method runExperiment.

@Override
public long runExperiment(DataGenThread dataGen, int numThreads) throws Exception {
    btree.create();
    long start = System.currentTimeMillis();
    IIndexBulkLoader bulkLoader = btree.createBulkLoader(1.0f, false, 0L, true);
    for (int i = 0; i < numBatches; i++) {
        TupleBatch batch = dataGen.tupleBatchQueue.take();
        for (int j = 0; j < batch.size(); j++) {
            bulkLoader.add(batch.get(j));
        }
    }
    bulkLoader.end();
    long end = System.currentTimeMillis();
    long time = end - start;
    return time;
}
Also used : IIndexBulkLoader(org.apache.hyracks.storage.common.IIndexBulkLoader) TupleBatch(org.apache.hyracks.storage.am.common.datagen.TupleBatch)

Example 5 with IIndexBulkLoader

use of org.apache.hyracks.storage.common.IIndexBulkLoader in project asterixdb by apache.

the class LSMInvertedIndexTestUtils method bulkLoadInvIndex.

public static void bulkLoadInvIndex(LSMInvertedIndexTestContext testCtx, TupleGenerator tupleGen, int numDocs, boolean appendOnly) throws HyracksDataException, IOException {
    SortedSet<CheckTuple> tmpMemIndex = new TreeSet<>();
    // First generate the expected index by inserting the documents one-by-one.
    for (int i = 0; i < numDocs; i++) {
        ITupleReference tuple = tupleGen.next();
        testCtx.insertCheckTuples(tuple, tmpMemIndex);
    }
    ISerializerDeserializer[] fieldSerdes = testCtx.getFieldSerdes();
    // Use the expected index to bulk-load the actual index.
    IIndexBulkLoader bulkLoader = testCtx.getIndex().createBulkLoader(1.0f, false, numDocs, true);
    ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(testCtx.getFieldSerdes().length);
    ArrayTupleReference tuple = new ArrayTupleReference();
    Iterator<CheckTuple> checkTupleIter = tmpMemIndex.iterator();
    while (checkTupleIter.hasNext()) {
        CheckTuple checkTuple = checkTupleIter.next();
        OrderedIndexTestUtils.createTupleFromCheckTuple(checkTuple, tupleBuilder, tuple, fieldSerdes);
        bulkLoader.add(tuple);
    }
    bulkLoader.end();
    // Add all check tuples from the temp index to the text context.
    testCtx.getCheckTuples().addAll(tmpMemIndex);
}
Also used : CheckTuple(org.apache.hyracks.storage.am.common.CheckTuple) TreeSet(java.util.TreeSet) IIndexBulkLoader(org.apache.hyracks.storage.common.IIndexBulkLoader) ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)

Aggregations

IIndexBulkLoader (org.apache.hyracks.storage.common.IIndexBulkLoader)11 ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)7 ArrayTupleReference (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference)7 Test (org.junit.Test)5 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)4 IBinaryComparatorFactory (org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory)3 ITypeTraits (org.apache.hyracks.api.dataflow.value.ITypeTraits)3 ITupleReference (org.apache.hyracks.dataflow.common.data.accessors.ITupleReference)3 BloomFilterSpecification (org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification)3 ITreeIndex (org.apache.hyracks.storage.am.common.api.ITreeIndex)3 ArrayList (java.util.ArrayList)2 TreeSet (java.util.TreeSet)2 BloomFilter (org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilter)2 AbstractBloomFilterTest (org.apache.hyracks.storage.am.bloomfilter.util.AbstractBloomFilterTest)2 ILSMIndexOperationContext (org.apache.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext)2 IIndexAccessor (org.apache.hyracks.storage.common.IIndexAccessor)2 IIndexCursor (org.apache.hyracks.storage.common.IIndexCursor)2 ISearchPredicate (org.apache.hyracks.storage.common.ISearchPredicate)2 IBufferCache (org.apache.hyracks.storage.common.buffercache.IBufferCache)2 Random (java.util.Random)1