Search in sources :

Example 6 with IIndexBulkLoader

use of org.apache.hyracks.storage.common.IIndexBulkLoader in project asterixdb by apache.

the class OrderedIndexExamplesTest method bulkOrderVerificationExample.

/**
     * Bulk load failure example. Repeatedly loads a tree with 1,000 tuples, of
     * which one tuple at each possible position does not conform to the
     * expected order. We expect the bulk load to fail with an exception.
     */
@Test
public void bulkOrderVerificationExample() throws Exception {
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("Bulk load order verification example");
    }
    // Declare fields.
    int fieldCount = 2;
    ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
    typeTraits[0] = IntegerPointable.TYPE_TRAITS;
    typeTraits[1] = IntegerPointable.TYPE_TRAITS;
    // declare keys
    int keyFieldCount = 1;
    IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[keyFieldCount];
    cmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    Random rnd = new Random();
    ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
    ArrayTupleReference tuple = new ArrayTupleReference();
    // This is only used for the LSM-BTree.
    int[] bloomFilterKeyFields = new int[keyFieldCount];
    bloomFilterKeyFields[0] = 0;
    int ins = 1000;
    for (int i = 1; i < ins; i++) {
        ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields, null, null, null, null);
        treeIndex.create();
        treeIndex.activate();
        // Load sorted records, and expect to fail at tuple i.
        IIndexBulkLoader bulkLoader = treeIndex.createBulkLoader(0.7f, true, ins, true);
        for (int j = 0; j < ins; j++) {
            if (j > i) {
                fail("Bulk load failure test unexpectedly succeeded past tuple: " + j);
            }
            int key = j;
            if (j == i) {
                int swapElementCase = Math.abs(rnd.nextInt()) % 2;
                if (swapElementCase == 0) {
                    // Element equal to previous element.
                    key--;
                } else {
                    // Element smaller than previous element.
                    key -= Math.abs(Math.random() % (ins - 1)) + 1;
                }
            }
            TupleUtils.createIntegerTuple(tb, tuple, key, 5);
            try {
                bulkLoader.add(tuple);
            } catch (HyracksDataException e) {
                if (e.getErrorCode() == ErrorCode.UNSORTED_LOAD_INPUT || e.getErrorCode() == ErrorCode.DUPLICATE_KEY || e.getErrorCode() == ErrorCode.DUPLICATE_LOAD_INPUT) {
                    if (j != i) {
                        fail("Unexpected exception: " + e.getMessage());
                    }
                    // Success.
                    break;
                } else {
                    throw e;
                }
            }
        }
        treeIndex.deactivate();
        treeIndex.destroy();
    }
}
Also used : ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) Random(java.util.Random) IIndexBulkLoader(org.apache.hyracks.storage.common.IIndexBulkLoader) ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) ITreeIndex(org.apache.hyracks.storage.am.common.api.ITreeIndex) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) Test(org.junit.Test)

Example 7 with IIndexBulkLoader

use of org.apache.hyracks.storage.common.IIndexBulkLoader in project asterixdb by apache.

the class AbstractRTreeExamplesTest method bulkLoadExample.

/**
     * Bulk load example. Load a tree with 10,000 tuples.
     */
@Test
public void bulkLoadExample() throws Exception {
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("Bulk load example");
    }
    // Declare fields.
    int fieldCount = 5;
    ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
    typeTraits[0] = IntegerPointable.TYPE_TRAITS;
    typeTraits[1] = IntegerPointable.TYPE_TRAITS;
    typeTraits[2] = IntegerPointable.TYPE_TRAITS;
    typeTraits[3] = IntegerPointable.TYPE_TRAITS;
    typeTraits[4] = IntegerPointable.TYPE_TRAITS;
    // Declare field serdes.
    ISerializerDeserializer[] fieldSerdes = { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
    // Declare RTree keys.
    int rtreeKeyFieldCount = 4;
    IBinaryComparatorFactory[] rtreeCmpFactories = new IBinaryComparatorFactory[rtreeKeyFieldCount];
    rtreeCmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    rtreeCmpFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    rtreeCmpFactories[2] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    rtreeCmpFactories[3] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    // Declare BTree keys, this will only be used for LSMRTree
    int btreeKeyFieldCount;
    IBinaryComparatorFactory[] btreeCmpFactories;
    int[] btreeFields = null;
    if (rTreeType == RTreeType.LSMRTREE) {
        //Parameters look different for LSM RTREE from LSM RTREE WITH ANTI MATTER TUPLES
        btreeKeyFieldCount = 1;
        btreeCmpFactories = new IBinaryComparatorFactory[btreeKeyFieldCount];
        btreeCmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
        btreeFields = new int[btreeKeyFieldCount];
        for (int i = 0; i < btreeKeyFieldCount; i++) {
            btreeFields[i] = rtreeKeyFieldCount + i;
        }
    } else {
        btreeKeyFieldCount = 5;
        btreeCmpFactories = new IBinaryComparatorFactory[btreeKeyFieldCount];
        btreeCmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
        btreeCmpFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
        btreeCmpFactories[2] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
        btreeCmpFactories[3] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
        btreeCmpFactories[4] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    }
    // create value providers
    IPrimitiveValueProviderFactory[] valueProviderFactories = RTreeUtils.createPrimitiveValueProviderFactories(rtreeCmpFactories.length, IntegerPointable.FACTORY);
    //6
    ITreeIndex treeIndex = createTreeIndex(typeTraits, rtreeCmpFactories, btreeCmpFactories, valueProviderFactories, RTreePolicyType.RTREE, null, btreeFields, null, null, null);
    treeIndex.create();
    treeIndex.activate();
    // Load records.
    int numInserts = 10000;
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("Bulk loading " + numInserts + " tuples");
    }
    long start = System.currentTimeMillis();
    IIndexBulkLoader bulkLoader = treeIndex.createBulkLoader(0.7f, false, numInserts, true);
    ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
    ArrayTupleReference tuple = new ArrayTupleReference();
    for (int i = 0; i < numInserts; i++) {
        int p1x = rnd.nextInt();
        int p1y = rnd.nextInt();
        int p2x = rnd.nextInt();
        int p2y = rnd.nextInt();
        int pk = 5;
        TupleUtils.createIntegerTuple(tb, tuple, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x), Math.max(p1y, p2y), pk);
        bulkLoader.add(tuple);
    }
    bulkLoader.end();
    long end = System.currentTimeMillis();
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info(numInserts + " tuples loaded in " + (end - start) + "ms");
    }
    IIndexAccessor indexAccessor = treeIndex.createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
    // Build key.
    ArrayTupleBuilder keyTb = new ArrayTupleBuilder(rtreeKeyFieldCount);
    ArrayTupleReference key = new ArrayTupleReference();
    TupleUtils.createIntegerTuple(keyTb, key, -1000, -1000, 1000, 1000);
    rangeSearch(rtreeCmpFactories, indexAccessor, fieldSerdes, key, null, null);
    treeIndex.deactivate();
    treeIndex.destroy();
}
Also used : ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) IPrimitiveValueProviderFactory(org.apache.hyracks.storage.am.common.api.IPrimitiveValueProviderFactory) ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IIndexAccessor(org.apache.hyracks.storage.common.IIndexAccessor) IIndexBulkLoader(org.apache.hyracks.storage.common.IIndexBulkLoader) ITreeIndex(org.apache.hyracks.storage.am.common.api.ITreeIndex) Test(org.junit.Test)

Example 8 with IIndexBulkLoader

use of org.apache.hyracks.storage.common.IIndexBulkLoader in project asterixdb by apache.

the class TreeIndexTestUtils method bulkLoadCheckTuples.

public static void bulkLoadCheckTuples(IIndexTestContext ctx, Collection<CheckTuple> checkTuples, boolean filtered) throws HyracksDataException {
    int fieldCount = ctx.getFieldCount();
    int numTuples = checkTuples.size();
    ArrayTupleBuilder tupleBuilder = filtered ? new ArrayTupleBuilder(fieldCount + 1) : new ArrayTupleBuilder(fieldCount);
    ArrayTupleReference tuple = new ArrayTupleReference();
    // Perform bulk load.
    IIndexBulkLoader bulkLoader = ctx.getIndex().createBulkLoader(0.7f, false, numTuples, false);
    int c = 1;
    for (CheckTuple checkTuple : checkTuples) {
        if (LOGGER.isLoggable(Level.INFO)) {
            //if (c % (numTuples / 10) == 0) {
            LOGGER.info("Bulk Loading Tuple " + c + "/" + numTuples);
        //}
        }
        createTupleFromCheckTuple(checkTuple, tupleBuilder, tuple, ctx.getFieldSerdes(), filtered);
        bulkLoader.add(tuple);
        c++;
    }
    bulkLoader.end();
}
Also used : IIndexBulkLoader(org.apache.hyracks.storage.common.IIndexBulkLoader) ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)

Example 9 with IIndexBulkLoader

use of org.apache.hyracks.storage.common.IIndexBulkLoader in project asterixdb by apache.

the class ExternalRTree method merge.

// The only reason for overriding the merge method is the way to determine
// the need to keep deleted tuples
// This can be done in a better way by creating a method boolean
// keepDeletedTuples(mergedComponents);
@Override
public ILSMDiskComponent merge(ILSMIOOperation operation) throws HyracksDataException {
    LSMRTreeMergeOperation mergeOp = (LSMRTreeMergeOperation) operation;
    IIndexCursor cursor = mergeOp.getCursor();
    ISearchPredicate rtreeSearchPred = new SearchPredicate(null, null);
    ILSMIndexOperationContext opCtx = ((LSMRTreeSortedCursor) cursor).getOpCtx();
    opCtx.getComponentHolder().addAll(mergeOp.getMergingComponents());
    search(opCtx, cursor, rtreeSearchPred);
    LSMRTreeDiskComponent mergedComponent = createDiskComponent(componentFactory, mergeOp.getTarget(), mergeOp.getBTreeTarget(), mergeOp.getBloomFilterTarget(), true);
    // In case we must keep the deleted-keys BTrees, then they must be
    // merged *before* merging the r-trees so that
    // lsmHarness.endSearch() is called once when the r-trees have been
    // merged.
    boolean keepDeleteTuples = false;
    if (version == 0) {
        keepDeleteTuples = mergeOp.getMergingComponents().get(mergeOp.getMergingComponents().size() - 1) != diskComponents.get(diskComponents.size() - 1);
    } else {
        keepDeleteTuples = mergeOp.getMergingComponents().get(mergeOp.getMergingComponents().size() - 1) != secondDiskComponents.get(secondDiskComponents.size() - 1);
    }
    if (keepDeleteTuples) {
        // Keep the deleted tuples since the oldest disk component is not
        // included in the merge operation
        LSMRTreeDeletedKeysBTreeMergeCursor btreeCursor = new LSMRTreeDeletedKeysBTreeMergeCursor(opCtx);
        search(opCtx, btreeCursor, rtreeSearchPred);
        BTree btree = mergedComponent.getBTree();
        IIndexBulkLoader btreeBulkLoader = btree.createBulkLoader(1.0f, true, 0L, false);
        long numElements = 0L;
        for (int i = 0; i < mergeOp.getMergingComponents().size(); ++i) {
            numElements += ((LSMRTreeDiskComponent) mergeOp.getMergingComponents().get(i)).getBloomFilter().getNumElements();
        }
        int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
        BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, bloomFilterFalsePositiveRate);
        IIndexBulkLoader builder = mergedComponent.getBloomFilter().createBuilder(numElements, bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
        try {
            while (btreeCursor.hasNext()) {
                btreeCursor.next();
                ITupleReference tuple = btreeCursor.getTuple();
                btreeBulkLoader.add(tuple);
                builder.add(tuple);
            }
        } finally {
            btreeCursor.close();
            builder.end();
        }
        btreeBulkLoader.end();
    }
    IIndexBulkLoader bulkLoader = mergedComponent.getRTree().createBulkLoader(1.0f, false, 0L, false);
    try {
        while (cursor.hasNext()) {
            cursor.next();
            ITupleReference frameTuple = cursor.getTuple();
            bulkLoader.add(frameTuple);
        }
    } finally {
        cursor.close();
    }
    bulkLoader.end();
    return mergedComponent;
}
Also used : BTree(org.apache.hyracks.storage.am.btree.impls.BTree) ILSMIndexOperationContext(org.apache.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext) SearchPredicate(org.apache.hyracks.storage.am.rtree.impls.SearchPredicate) ISearchPredicate(org.apache.hyracks.storage.common.ISearchPredicate) IIndexBulkLoader(org.apache.hyracks.storage.common.IIndexBulkLoader) ITupleReference(org.apache.hyracks.dataflow.common.data.accessors.ITupleReference) IIndexCursor(org.apache.hyracks.storage.common.IIndexCursor) ISearchPredicate(org.apache.hyracks.storage.common.ISearchPredicate) BloomFilterSpecification(org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification)

Example 10 with IIndexBulkLoader

use of org.apache.hyracks.storage.common.IIndexBulkLoader in project asterixdb by apache.

the class ExternalFilesIndexCreateOperatorDescriptor method createPushRuntime.

@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
    return new AbstractOperatorNodePushable() {

        @Override
        public void initialize() throws HyracksDataException {
            IIndexBuilder indexBuilder = indexBuilderFactory.create(ctx, partition);
            IIndexDataflowHelper indexHelper = dataflowHelperFactory.create(ctx, partition);
            FileIndexTupleTranslator filesTupleTranslator = new FileIndexTupleTranslator();
            // Build the index
            indexBuilder.build();
            // Open the index
            indexHelper.open();
            try {
                IIndex index = indexHelper.getIndexInstance();
                // Create bulk loader
                IIndexBulkLoader bulkLoader = index.createBulkLoader(BTree.DEFAULT_FILL_FACTOR, false, files.size(), false);
                // Load files
                for (ExternalFile file : files) {
                    bulkLoader.add(filesTupleTranslator.getTupleFromFile(file));
                }
                bulkLoader.end();
            } finally {
                indexHelper.close();
            }
        }

        @Override
        public void deinitialize() throws HyracksDataException {
        }

        @Override
        public int getInputArity() {
            return 0;
        }

        @Override
        public void setOutputFrameWriter(int index, IFrameWriter writer, RecordDescriptor recordDesc) throws HyracksDataException {
        }

        @Override
        public IFrameWriter getInputFrameWriter(int index) {
            return null;
        }
    };
}
Also used : IIndex(org.apache.hyracks.storage.common.IIndex) IIndexBuilder(org.apache.hyracks.storage.am.common.api.IIndexBuilder) IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) FileIndexTupleTranslator(org.apache.asterix.external.indexing.FileIndexTupleTranslator) IIndexDataflowHelper(org.apache.hyracks.storage.am.common.api.IIndexDataflowHelper) AbstractOperatorNodePushable(org.apache.hyracks.dataflow.std.base.AbstractOperatorNodePushable) IIndexBulkLoader(org.apache.hyracks.storage.common.IIndexBulkLoader) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ExternalFile(org.apache.asterix.external.indexing.ExternalFile)

Aggregations

IIndexBulkLoader (org.apache.hyracks.storage.common.IIndexBulkLoader)11 ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)7 ArrayTupleReference (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference)7 Test (org.junit.Test)5 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)4 IBinaryComparatorFactory (org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory)3 ITypeTraits (org.apache.hyracks.api.dataflow.value.ITypeTraits)3 ITupleReference (org.apache.hyracks.dataflow.common.data.accessors.ITupleReference)3 BloomFilterSpecification (org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification)3 ITreeIndex (org.apache.hyracks.storage.am.common.api.ITreeIndex)3 ArrayList (java.util.ArrayList)2 TreeSet (java.util.TreeSet)2 BloomFilter (org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilter)2 AbstractBloomFilterTest (org.apache.hyracks.storage.am.bloomfilter.util.AbstractBloomFilterTest)2 ILSMIndexOperationContext (org.apache.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext)2 IIndexAccessor (org.apache.hyracks.storage.common.IIndexAccessor)2 IIndexCursor (org.apache.hyracks.storage.common.IIndexCursor)2 ISearchPredicate (org.apache.hyracks.storage.common.ISearchPredicate)2 IBufferCache (org.apache.hyracks.storage.common.buffercache.IBufferCache)2 Random (java.util.Random)1