Examples with IIndexDataflowHelperFactory - org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory

Example 6 with IIndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory in project asterixdb by apache.

the class MetadataProvider method getWriteResultRuntime.

@Override
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getWriteResultRuntime(IDataSource<DataSourceId> dataSource, IOperatorSchema propagatedSchema, List<LogicalVariable> keys, LogicalVariable payload, List<LogicalVariable> additionalNonKeyFields, JobGenContext context, JobSpecification spec) throws AlgebricksException {
    String dataverseName = dataSource.getId().getDataverseName();
    String datasetName = dataSource.getId().getDatasourceName();
    Dataset dataset = MetadataManagerUtil.findExistingDataset(mdTxnCtx, dataverseName, datasetName);
    int numKeys = keys.size();
    int numFilterFields = DatasetUtil.getFilterField(dataset) == null ? 0 : 1;
    // move key fields to front
    int[] fieldPermutation = new int[numKeys + 1 + numFilterFields];
    int i = 0;
    for (LogicalVariable varKey : keys) {
        int idx = propagatedSchema.findVariable(varKey);
        fieldPermutation[i] = idx;
        i++;
    }
    fieldPermutation[numKeys] = propagatedSchema.findVariable(payload);
    if (numFilterFields > 0) {
        int idx = propagatedSchema.findVariable(additionalNonKeyFields.get(0));
        fieldPermutation[numKeys + 1] = idx;
    }
    try {
        boolean temp = dataset.getDatasetDetails().isTemp();
        isTemporaryDatasetWriteJob = isTemporaryDatasetWriteJob && temp;
        Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = getSplitProviderAndConstraints(dataset);
        long numElementsHint = getCardinalityPerPartitionHint(dataset);
        // TODO
        // figure out the right behavior of the bulkload and then give the
        // right callback
        // (ex. what's the expected behavior when there is an error during
        // bulkload?)
        IIndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory(storaegComponentProvider.getStorageManager(), splitsAndConstraint.first);
        TreeIndexBulkLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec, null, fieldPermutation, GlobalConfig.DEFAULT_TREE_FILL_FACTOR, false, numElementsHint, true, indexHelperFactory);
        return new Pair<>(btreeBulkLoad, splitsAndConstraint.second);
    } catch (MetadataException me) {
        throw new AlgebricksException(me);
    }
}

Example 7 with IIndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory in project asterixdb by apache.

the class MetadataProvider method getInsertOrDeleteRuntime.

private Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getInsertOrDeleteRuntime(IndexOperation indexOp, IDataSource<DataSourceId> dataSource, IOperatorSchema propagatedSchema, List<LogicalVariable> keys, LogicalVariable payload, List<LogicalVariable> additionalNonKeyFields, RecordDescriptor inputRecordDesc, JobGenContext context, JobSpecification spec, boolean bulkload, List<LogicalVariable> additionalNonFilteringFields) throws AlgebricksException {
    String datasetName = dataSource.getId().getDatasourceName();
    Dataset dataset = MetadataManagerUtil.findExistingDataset(mdTxnCtx, dataSource.getId().getDataverseName(), datasetName);
    boolean temp = dataset.getDatasetDetails().isTemp();
    isTemporaryDatasetWriteJob = isTemporaryDatasetWriteJob && temp;
    int numKeys = keys.size();
    int numFilterFields = DatasetUtil.getFilterField(dataset) == null ? 0 : 1;
    // Move key fields to front.
    int[] fieldPermutation = new int[numKeys + 1 + numFilterFields + (additionalNonFilteringFields == null ? 0 : additionalNonFilteringFields.size())];
    int[] bloomFilterKeyFields = new int[numKeys];
    int i = 0;
    for (LogicalVariable varKey : keys) {
        int idx = propagatedSchema.findVariable(varKey);
        fieldPermutation[i] = idx;
        bloomFilterKeyFields[i] = i;
        i++;
    }
    fieldPermutation[i++] = propagatedSchema.findVariable(payload);
    if (numFilterFields > 0) {
        int idx = propagatedSchema.findVariable(additionalNonKeyFields.get(0));
        fieldPermutation[i++] = idx;
    }
    if (additionalNonFilteringFields != null) {
        for (LogicalVariable variable : additionalNonFilteringFields) {
            int idx = propagatedSchema.findVariable(variable);
            fieldPermutation[i++] = idx;
        }
    }
    try {
        Index primaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), dataset.getDatasetName());
        Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = getSplitProviderAndConstraints(dataset);
        // prepare callback
        int[] primaryKeyFields = new int[numKeys];
        for (i = 0; i < numKeys; i++) {
            primaryKeyFields[i] = i;
        }
        IModificationOperationCallbackFactory modificationCallbackFactory = dataset.getModificationCallbackFactory(storaegComponentProvider, primaryIndex, jobId, indexOp, primaryKeyFields);
        IIndexDataflowHelperFactory idfh = new IndexDataflowHelperFactory(storaegComponentProvider.getStorageManager(), splitsAndConstraint.first);
        IOperatorDescriptor op;
        if (bulkload) {
            long numElementsHint = getCardinalityPerPartitionHint(dataset);
            op = new TreeIndexBulkLoadOperatorDescriptor(spec, inputRecordDesc, fieldPermutation, GlobalConfig.DEFAULT_TREE_FILL_FACTOR, true, numElementsHint, true, idfh);
        } else {
            op = new LSMTreeInsertDeleteOperatorDescriptor(spec, inputRecordDesc, fieldPermutation, indexOp, idfh, null, true, modificationCallbackFactory);
        }
        return new Pair<>(op, splitsAndConstraint.second);
    } catch (MetadataException me) {
        throw new AlgebricksException(me);
    }
}

Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) Dataset(org.apache.asterix.metadata.entities.Dataset) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) Index(org.apache.asterix.metadata.entities.Index) IDataSourceIndex(org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) DatasetCardinalityHint(org.apache.asterix.metadata.dataset.hints.DatasetHints.DatasetCardinalityHint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) MetadataException(org.apache.asterix.metadata.MetadataException) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) IModificationOperationCallbackFactory(org.apache.hyracks.storage.am.common.api.IModificationOperationCallbackFactory) LSMTreeInsertDeleteOperatorDescriptor(org.apache.asterix.common.dataflow.LSMTreeInsertDeleteOperatorDescriptor) TreeIndexBulkLoadOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Example 8 with IIndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory in project asterixdb by apache.

the class MetadataProvider method getInvertedIndexRuntime.

private Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getInvertedIndexRuntime(String dataverseName, String datasetName, String indexName, IOperatorSchema propagatedSchema, List<LogicalVariable> primaryKeys, List<LogicalVariable> secondaryKeys, List<LogicalVariable> additionalNonKeyFields, AsterixTupleFilterFactory filterFactory, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec, IndexOperation indexOp, IndexType indexType, boolean bulkload, List<LogicalVariable> prevSecondaryKeys, List<LogicalVariable> prevAdditionalFilteringKeys) throws AlgebricksException {
    // Check the index is length-partitioned or not.
    boolean isPartitioned;
    if (indexType == IndexType.LENGTH_PARTITIONED_WORD_INVIX || indexType == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX) {
        isPartitioned = true;
    } else {
        isPartitioned = false;
    }
    // Sanity checks.
    if (primaryKeys.size() > 1) {
        throw new AlgebricksException("Cannot create inverted index on dataset with composite primary key.");
    }
    // TokenizeOperator- [token, number of token]
    if ((secondaryKeys.size() > 1 && !isPartitioned) || (secondaryKeys.size() > 2 && isPartitioned)) {
        throw new AlgebricksException("Cannot create composite inverted index on multiple fields.");
    }
    Dataset dataset = MetadataManagerUtil.findExistingDataset(mdTxnCtx, dataverseName, datasetName);
    boolean temp = dataset.getDatasetDetails().isTemp();
    isTemporaryDatasetWriteJob = isTemporaryDatasetWriteJob && temp;
    // For tokenization, sorting and loading.
    // One token (+ optional partitioning field) + primary keys: [token,
    // number of token, PK]
    int numKeys = primaryKeys.size() + secondaryKeys.size();
    int numFilterFields = DatasetUtil.getFilterField(dataset) == null ? 0 : 1;
    // generate field permutations
    int[] fieldPermutation = new int[numKeys + numFilterFields];
    int[] modificationCallbackPrimaryKeyFields = new int[primaryKeys.size()];
    int i = 0;
    int j = 0;
    // Otherwise: [token]
    for (LogicalVariable varKey : secondaryKeys) {
        int idx = propagatedSchema.findVariable(varKey);
        fieldPermutation[i] = idx;
        i++;
    }
    for (LogicalVariable varKey : primaryKeys) {
        int idx = propagatedSchema.findVariable(varKey);
        fieldPermutation[i] = idx;
        modificationCallbackPrimaryKeyFields[j] = i;
        i++;
        j++;
    }
    if (numFilterFields > 0) {
        int idx = propagatedSchema.findVariable(additionalNonKeyFields.get(0));
        fieldPermutation[numKeys] = idx;
    }
    int[] prevFieldPermutation = null;
    if (indexOp == IndexOperation.UPSERT) {
        // Find permutations for prev value
        prevFieldPermutation = new int[numKeys + numFilterFields];
        i = 0;
        // Otherwise: [token]
        for (LogicalVariable varKey : prevSecondaryKeys) {
            int idx = propagatedSchema.findVariable(varKey);
            prevFieldPermutation[i] = idx;
            i++;
        }
        for (int k = 0; k < primaryKeys.size(); k++) {
            prevFieldPermutation[k + i] = fieldPermutation[k + i];
            i++;
        }
        if (numFilterFields > 0) {
            int idx = propagatedSchema.findVariable(prevAdditionalFilteringKeys.get(0));
            prevFieldPermutation[numKeys] = idx;
        }
    }
    try {
        // Index parameters.
        Index secondaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), indexName);
        Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = getSplitProviderAndConstraints(dataset, secondaryIndex.getIndexName());
        // prepare callback
        JobId jobId = ((JobEventListenerFactory) spec.getJobletEventListenerFactory()).getJobId();
        IModificationOperationCallbackFactory modificationCallbackFactory = dataset.getModificationCallbackFactory(storaegComponentProvider, secondaryIndex, jobId, indexOp, modificationCallbackPrimaryKeyFields);
        IIndexDataflowHelperFactory indexDataFlowFactory = new IndexDataflowHelperFactory(storaegComponentProvider.getStorageManager(), splitsAndConstraint.first);
        IOperatorDescriptor op;
        if (bulkload) {
            long numElementsHint = getCardinalityPerPartitionHint(dataset);
            op = new TreeIndexBulkLoadOperatorDescriptor(spec, recordDesc, fieldPermutation, GlobalConfig.DEFAULT_TREE_FILL_FACTOR, false, numElementsHint, false, indexDataFlowFactory);
        } else if (indexOp == IndexOperation.UPSERT) {
            op = new LSMSecondaryUpsertOperatorDescriptor(spec, recordDesc, fieldPermutation, indexDataFlowFactory, filterFactory, modificationCallbackFactory, prevFieldPermutation);
        } else {
            op = new LSMTreeInsertDeleteOperatorDescriptor(spec, recordDesc, fieldPermutation, indexOp, indexDataFlowFactory, filterFactory, false, modificationCallbackFactory);
        }
        return new Pair<>(op, splitsAndConstraint.second);
    } catch (Exception e) {
        throw new AlgebricksException(e);
    }
}

Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) Dataset(org.apache.asterix.metadata.entities.Dataset) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) Index(org.apache.asterix.metadata.entities.Index) IDataSourceIndex(org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex) JobEventListenerFactory(org.apache.asterix.runtime.job.listener.JobEventListenerFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) DatasetCardinalityHint(org.apache.asterix.metadata.dataset.hints.DatasetHints.DatasetCardinalityHint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) MetadataException(org.apache.asterix.metadata.MetadataException) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) CompilationException(org.apache.asterix.common.exceptions.CompilationException) IOException(java.io.IOException) AsterixException(org.apache.asterix.common.exceptions.AsterixException) LSMSecondaryUpsertOperatorDescriptor(org.apache.asterix.runtime.operators.LSMSecondaryUpsertOperatorDescriptor) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) IModificationOperationCallbackFactory(org.apache.hyracks.storage.am.common.api.IModificationOperationCallbackFactory) LSMTreeInsertDeleteOperatorDescriptor(org.apache.asterix.common.dataflow.LSMTreeInsertDeleteOperatorDescriptor) TreeIndexBulkLoadOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) JobId(org.apache.asterix.common.transactions.JobId) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Example 9 with IIndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory in project asterixdb by apache.

the class SecondaryIndexSearchExample method createJob.

private static JobSpecification createJob(Options options) throws HyracksDataException {
    JobSpecification spec = new JobSpecification(options.frameSize);
    String[] splitNCs = options.ncs.split(",");
    IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
    // schema of tuples coming out of secondary index
    RecordDescriptor secondaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
    int secondaryFieldCount = 2;
    ITypeTraits[] secondaryTypeTraits = new ITypeTraits[secondaryFieldCount];
    secondaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
    secondaryTypeTraits[1] = IntegerPointable.TYPE_TRAITS;
    // comparators for sort fields and BTree fields
    IBinaryComparatorFactory[] secondaryComparatorFactories = new IBinaryComparatorFactory[2];
    secondaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
    secondaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    // comparators for primary index
    IBinaryComparatorFactory[] primaryComparatorFactories = new IBinaryComparatorFactory[1];
    primaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    // schema of tuples coming out of primary index
    RecordDescriptor primaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
    int primaryFieldCount = 4;
    ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];
    primaryTypeTraits[0] = IntegerPointable.TYPE_TRAITS;
    primaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
    primaryTypeTraits[2] = IntegerPointable.TYPE_TRAITS;
    primaryTypeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
    // comparators for btree, note that we only need a comparator for the
    // non-unique key
    // i.e. we will have a range condition on the first field only (implying
    // [-infinity, +infinity] for the second field)
    IBinaryComparatorFactory[] searchComparatorFactories = new IBinaryComparatorFactory[1];
    searchComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
    // build tuple containing low and high search keys
    // low
    ArrayTupleBuilder tb = new ArrayTupleBuilder(searchComparatorFactories.length * 2);
    // and
    // high
    // key
    DataOutput dos = tb.getDataOutput();
    tb.reset();
    // low
    new UTF8StringSerializerDeserializer().serialize("0", dos);
    // key
    tb.addFieldEndOffset();
    // high
    new UTF8StringSerializerDeserializer().serialize("f", dos);
    // key
    tb.addFieldEndOffset();
    ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
    RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
    ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
    JobHelper.createPartitionConstraint(spec, keyProviderOp, splitNCs);
    // low key is in field 0 of tuples
    int[] secondaryLowKeyFields = { 0 };
    // going into secondary index
    // search op
    // high key is in field 1 of
    int[] secondaryHighKeyFields = { 1 };
    // tuples going into secondary
    // index search op
    IFileSplitProvider secondarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.secondaryBTreeName);
    IIndexDataflowHelperFactory secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, secondarySplitProvider);
    BTreeSearchOperatorDescriptor secondarySearchOp = new BTreeSearchOperatorDescriptor(spec, secondaryRecDesc, secondaryLowKeyFields, secondaryHighKeyFields, true, true, secondaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
    JobHelper.createPartitionConstraint(spec, secondarySearchOp, splitNCs);
    // secondary index will output tuples with [UTF8String, Integer]
    // the Integer field refers to the key in the primary index of the
    // source data records
    // low key is in field 0 of tuples
    int[] primaryLowKeyFields = { 1 };
    // going into primary index search op
    // high key is in field 1 of tuples
    int[] primaryHighKeyFields = { 1 };
    // going into primary index search
    // op
    IFileSplitProvider primarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.primaryBTreeName);
    IIndexDataflowHelperFactory primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
    BTreeSearchOperatorDescriptor primarySearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc, primaryLowKeyFields, primaryHighKeyFields, true, true, primaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
    JobHelper.createPartitionConstraint(spec, primarySearchOp, splitNCs);
    // have each node print the results of its respective B-Tree
    PrinterOperatorDescriptor printer = new PrinterOperatorDescriptor(spec);
    JobHelper.createPartitionConstraint(spec, printer, splitNCs);
    spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, secondarySearchOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), secondarySearchOp, 0, primarySearchOp, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), primarySearchOp, 0, printer, 0);
    spec.addRoot(printer);
    return spec;
}

Also used : DataOutput(java.io.DataOutput) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) BTreeSearchOperatorDescriptor(org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IStorageManager(org.apache.hyracks.storage.common.IStorageManager) ConstantTupleSourceOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) PrinterOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.PrinterOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)

Example 10 with IIndexDataflowHelperFactory

use of org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory in project asterixdb by apache.

the class InsertPipelineExample method createJob.

private static JobSpecification createJob(Options options) {
    JobSpecification spec = new JobSpecification(options.frameSize);
    String[] splitNCs = options.ncs.split(",");
    // schema of tuples to be generated: 4 fields with int, string, string,
    // string
    // we will use field 2 as primary key to fill a clustered index
    RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] { // this field will not go into B-Tree
    new UTF8StringSerializerDeserializer(), // we will use this as payload
    new UTF8StringSerializerDeserializer(), // we will use this field as key
    IntegerSerializerDeserializer.INSTANCE, // we will use this as payload
    IntegerSerializerDeserializer.INSTANCE, // we will use this as payload
    new UTF8StringSerializerDeserializer() });
    // generate numRecords records with field 2 being unique, integer values
    // in [0, 100000], and strings with max length of 10 characters, and
    // random seed 100
    DataGenOperatorDescriptor dataGen = new DataGenOperatorDescriptor(spec, recDesc, options.numTuples, 2, 0, 100000, 10, 100);
    // run data generator on first nodecontroller given
    PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, dataGen, splitNCs[0]);
    IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
    // prepare insertion into primary index
    // tuples to be put into B-Tree shall have 4 fields
    int primaryFieldCount = 4;
    ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];
    primaryTypeTraits[0] = IntegerPointable.TYPE_TRAITS;
    primaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
    primaryTypeTraits[2] = IntegerPointable.TYPE_TRAITS;
    primaryTypeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
    // comparator factories for primary index
    IBinaryComparatorFactory[] primaryComparatorFactories = new IBinaryComparatorFactory[1];
    primaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    // the B-Tree expects its keyfields to be at the front of its input
    // tuple
    // map field 2 of input
    int[] primaryFieldPermutation = { 2, 1, 3, 4 };
    // tuple to field 0 of
    // B-Tree tuple, etc.
    IFileSplitProvider primarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.primaryBTreeName);
    IIndexDataflowHelperFactory primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
    // create operator descriptor
    TreeIndexInsertUpdateDeleteOperatorDescriptor primaryInsert = new TreeIndexInsertUpdateDeleteOperatorDescriptor(spec, recDesc, primaryFieldPermutation, IndexOperation.INSERT, primaryHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
    JobHelper.createPartitionConstraint(spec, primaryInsert, splitNCs);
    // prepare insertion into secondary index
    // tuples to be put into B-Tree shall have 2 fields
    int secondaryFieldCount = 2;
    ITypeTraits[] secondaryTypeTraits = new ITypeTraits[secondaryFieldCount];
    secondaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
    secondaryTypeTraits[1] = IntegerPointable.TYPE_TRAITS;
    // comparator factories for secondary index
    IBinaryComparatorFactory[] secondaryComparatorFactories = new IBinaryComparatorFactory[2];
    secondaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
    secondaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
    // the B-Tree expects its keyfields to be at the front of its input
    // tuple
    int[] secondaryFieldPermutation = { 1, 2 };
    IFileSplitProvider secondarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.secondaryBTreeName);
    IIndexDataflowHelperFactory secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, secondarySplitProvider);
    // create operator descriptor
    TreeIndexInsertUpdateDeleteOperatorDescriptor secondaryInsert = new TreeIndexInsertUpdateDeleteOperatorDescriptor(spec, recDesc, secondaryFieldPermutation, IndexOperation.INSERT, secondaryHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
    JobHelper.createPartitionConstraint(spec, secondaryInsert, splitNCs);
    // end the insert pipeline at this sink operator
    NullSinkOperatorDescriptor nullSink = new NullSinkOperatorDescriptor(spec);
    JobHelper.createPartitionConstraint(spec, nullSink, splitNCs);
    // distribute the records from the datagen via hashing to the bulk load
    // ops
    IBinaryHashFunctionFactory[] hashFactories = new IBinaryHashFunctionFactory[1];
    hashFactories[0] = PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY);
    IConnectorDescriptor hashConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, hashFactories));
    // connect the ops
    spec.connect(hashConn, dataGen, 0, primaryInsert, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), primaryInsert, 0, secondaryInsert, 0);
    spec.connect(new OneToOneConnectorDescriptor(spec), secondaryInsert, 0, nullSink, 0);
    spec.addRoot(nullSink);
    return spec;
}

Also used : NullSinkOperatorDescriptor(org.apache.hyracks.dataflow.std.misc.NullSinkOperatorDescriptor) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) ITypeTraits(org.apache.hyracks.api.dataflow.value.ITypeTraits) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IStorageManager(org.apache.hyracks.storage.common.IStorageManager) TreeIndexInsertUpdateDeleteOperatorDescriptor(org.apache.hyracks.storage.am.common.dataflow.TreeIndexInsertUpdateDeleteOperatorDescriptor) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) DataGenOperatorDescriptor(org.apache.hyracks.examples.btree.helper.DataGenOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)

Aggregations

IIndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)32 IndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)31 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)26 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)21 JobSpecification (org.apache.hyracks.api.job.JobSpecification)19 Index (org.apache.asterix.metadata.entities.Index)13 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)11 MetadataException (org.apache.asterix.metadata.MetadataException)10 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)10 Pair (org.apache.hyracks.algebricks.common.utils.Pair)10 IStorageManager (org.apache.hyracks.storage.common.IStorageManager)9 IDataSourceIndex (org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex)8 ConnectorPolicyAssignmentPolicy (org.apache.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy)8 TreeIndexBulkLoadOperatorDescriptor (org.apache.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor)8 DatasetCardinalityHint (org.apache.asterix.metadata.dataset.hints.DatasetHints.DatasetCardinalityHint)7 AlgebricksAbsolutePartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint)7 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)7 JobId (org.apache.asterix.common.transactions.JobId)6 Dataset (org.apache.asterix.metadata.entities.Dataset)6 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)6