use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.
the class MetadataProvider method getWriteResultRuntime.
@Override
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getWriteResultRuntime(IDataSource<DataSourceId> dataSource, IOperatorSchema propagatedSchema, List<LogicalVariable> keys, LogicalVariable payload, List<LogicalVariable> additionalNonKeyFields, JobGenContext context, JobSpecification spec) throws AlgebricksException {
String dataverseName = dataSource.getId().getDataverseName();
String datasetName = dataSource.getId().getDatasourceName();
Dataset dataset = MetadataManagerUtil.findExistingDataset(mdTxnCtx, dataverseName, datasetName);
int numKeys = keys.size();
int numFilterFields = DatasetUtil.getFilterField(dataset) == null ? 0 : 1;
// move key fields to front
int[] fieldPermutation = new int[numKeys + 1 + numFilterFields];
int i = 0;
for (LogicalVariable varKey : keys) {
int idx = propagatedSchema.findVariable(varKey);
fieldPermutation[i] = idx;
i++;
}
fieldPermutation[numKeys] = propagatedSchema.findVariable(payload);
if (numFilterFields > 0) {
int idx = propagatedSchema.findVariable(additionalNonKeyFields.get(0));
fieldPermutation[numKeys + 1] = idx;
}
try {
boolean temp = dataset.getDatasetDetails().isTemp();
isTemporaryDatasetWriteJob = isTemporaryDatasetWriteJob && temp;
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = getSplitProviderAndConstraints(dataset);
long numElementsHint = getCardinalityPerPartitionHint(dataset);
// TODO
// figure out the right behavior of the bulkload and then give the
// right callback
// (ex. what's the expected behavior when there is an error during
// bulkload?)
IIndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory(storaegComponentProvider.getStorageManager(), splitsAndConstraint.first);
TreeIndexBulkLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec, null, fieldPermutation, GlobalConfig.DEFAULT_TREE_FILL_FACTOR, false, numElementsHint, true, indexHelperFactory);
return new Pair<>(btreeBulkLoad, splitsAndConstraint.second);
} catch (MetadataException me) {
throw new AlgebricksException(me);
}
}
use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.
the class MetadataProvider method getInsertOrDeleteRuntime.
private Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getInsertOrDeleteRuntime(IndexOperation indexOp, IDataSource<DataSourceId> dataSource, IOperatorSchema propagatedSchema, List<LogicalVariable> keys, LogicalVariable payload, List<LogicalVariable> additionalNonKeyFields, RecordDescriptor inputRecordDesc, JobGenContext context, JobSpecification spec, boolean bulkload, List<LogicalVariable> additionalNonFilteringFields) throws AlgebricksException {
String datasetName = dataSource.getId().getDatasourceName();
Dataset dataset = MetadataManagerUtil.findExistingDataset(mdTxnCtx, dataSource.getId().getDataverseName(), datasetName);
boolean temp = dataset.getDatasetDetails().isTemp();
isTemporaryDatasetWriteJob = isTemporaryDatasetWriteJob && temp;
int numKeys = keys.size();
int numFilterFields = DatasetUtil.getFilterField(dataset) == null ? 0 : 1;
// Move key fields to front.
int[] fieldPermutation = new int[numKeys + 1 + numFilterFields + (additionalNonFilteringFields == null ? 0 : additionalNonFilteringFields.size())];
int[] bloomFilterKeyFields = new int[numKeys];
int i = 0;
for (LogicalVariable varKey : keys) {
int idx = propagatedSchema.findVariable(varKey);
fieldPermutation[i] = idx;
bloomFilterKeyFields[i] = i;
i++;
}
fieldPermutation[i++] = propagatedSchema.findVariable(payload);
if (numFilterFields > 0) {
int idx = propagatedSchema.findVariable(additionalNonKeyFields.get(0));
fieldPermutation[i++] = idx;
}
if (additionalNonFilteringFields != null) {
for (LogicalVariable variable : additionalNonFilteringFields) {
int idx = propagatedSchema.findVariable(variable);
fieldPermutation[i++] = idx;
}
}
try {
Index primaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), dataset.getDatasetName());
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = getSplitProviderAndConstraints(dataset);
// prepare callback
int[] primaryKeyFields = new int[numKeys];
for (i = 0; i < numKeys; i++) {
primaryKeyFields[i] = i;
}
IModificationOperationCallbackFactory modificationCallbackFactory = dataset.getModificationCallbackFactory(storaegComponentProvider, primaryIndex, jobId, indexOp, primaryKeyFields);
IIndexDataflowHelperFactory idfh = new IndexDataflowHelperFactory(storaegComponentProvider.getStorageManager(), splitsAndConstraint.first);
IOperatorDescriptor op;
if (bulkload) {
long numElementsHint = getCardinalityPerPartitionHint(dataset);
op = new TreeIndexBulkLoadOperatorDescriptor(spec, inputRecordDesc, fieldPermutation, GlobalConfig.DEFAULT_TREE_FILL_FACTOR, true, numElementsHint, true, idfh);
} else {
op = new LSMTreeInsertDeleteOperatorDescriptor(spec, inputRecordDesc, fieldPermutation, indexOp, idfh, null, true, modificationCallbackFactory);
}
return new Pair<>(op, splitsAndConstraint.second);
} catch (MetadataException me) {
throw new AlgebricksException(me);
}
}
use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.
the class MetadataProvider method getInvertedIndexRuntime.
private Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getInvertedIndexRuntime(String dataverseName, String datasetName, String indexName, IOperatorSchema propagatedSchema, List<LogicalVariable> primaryKeys, List<LogicalVariable> secondaryKeys, List<LogicalVariable> additionalNonKeyFields, AsterixTupleFilterFactory filterFactory, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec, IndexOperation indexOp, IndexType indexType, boolean bulkload, List<LogicalVariable> prevSecondaryKeys, List<LogicalVariable> prevAdditionalFilteringKeys) throws AlgebricksException {
// Check the index is length-partitioned or not.
boolean isPartitioned;
if (indexType == IndexType.LENGTH_PARTITIONED_WORD_INVIX || indexType == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX) {
isPartitioned = true;
} else {
isPartitioned = false;
}
// Sanity checks.
if (primaryKeys.size() > 1) {
throw new AlgebricksException("Cannot create inverted index on dataset with composite primary key.");
}
// TokenizeOperator- [token, number of token]
if ((secondaryKeys.size() > 1 && !isPartitioned) || (secondaryKeys.size() > 2 && isPartitioned)) {
throw new AlgebricksException("Cannot create composite inverted index on multiple fields.");
}
Dataset dataset = MetadataManagerUtil.findExistingDataset(mdTxnCtx, dataverseName, datasetName);
boolean temp = dataset.getDatasetDetails().isTemp();
isTemporaryDatasetWriteJob = isTemporaryDatasetWriteJob && temp;
// For tokenization, sorting and loading.
// One token (+ optional partitioning field) + primary keys: [token,
// number of token, PK]
int numKeys = primaryKeys.size() + secondaryKeys.size();
int numFilterFields = DatasetUtil.getFilterField(dataset) == null ? 0 : 1;
// generate field permutations
int[] fieldPermutation = new int[numKeys + numFilterFields];
int[] modificationCallbackPrimaryKeyFields = new int[primaryKeys.size()];
int i = 0;
int j = 0;
// Otherwise: [token]
for (LogicalVariable varKey : secondaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
fieldPermutation[i] = idx;
i++;
}
for (LogicalVariable varKey : primaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
fieldPermutation[i] = idx;
modificationCallbackPrimaryKeyFields[j] = i;
i++;
j++;
}
if (numFilterFields > 0) {
int idx = propagatedSchema.findVariable(additionalNonKeyFields.get(0));
fieldPermutation[numKeys] = idx;
}
int[] prevFieldPermutation = null;
if (indexOp == IndexOperation.UPSERT) {
// Find permutations for prev value
prevFieldPermutation = new int[numKeys + numFilterFields];
i = 0;
// Otherwise: [token]
for (LogicalVariable varKey : prevSecondaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
prevFieldPermutation[i] = idx;
i++;
}
for (int k = 0; k < primaryKeys.size(); k++) {
prevFieldPermutation[k + i] = fieldPermutation[k + i];
i++;
}
if (numFilterFields > 0) {
int idx = propagatedSchema.findVariable(prevAdditionalFilteringKeys.get(0));
prevFieldPermutation[numKeys] = idx;
}
}
try {
// Index parameters.
Index secondaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), indexName);
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = getSplitProviderAndConstraints(dataset, secondaryIndex.getIndexName());
// prepare callback
JobId jobId = ((JobEventListenerFactory) spec.getJobletEventListenerFactory()).getJobId();
IModificationOperationCallbackFactory modificationCallbackFactory = dataset.getModificationCallbackFactory(storaegComponentProvider, secondaryIndex, jobId, indexOp, modificationCallbackPrimaryKeyFields);
IIndexDataflowHelperFactory indexDataFlowFactory = new IndexDataflowHelperFactory(storaegComponentProvider.getStorageManager(), splitsAndConstraint.first);
IOperatorDescriptor op;
if (bulkload) {
long numElementsHint = getCardinalityPerPartitionHint(dataset);
op = new TreeIndexBulkLoadOperatorDescriptor(spec, recordDesc, fieldPermutation, GlobalConfig.DEFAULT_TREE_FILL_FACTOR, false, numElementsHint, false, indexDataFlowFactory);
} else if (indexOp == IndexOperation.UPSERT) {
op = new LSMSecondaryUpsertOperatorDescriptor(spec, recordDesc, fieldPermutation, indexDataFlowFactory, filterFactory, modificationCallbackFactory, prevFieldPermutation);
} else {
op = new LSMTreeInsertDeleteOperatorDescriptor(spec, recordDesc, fieldPermutation, indexOp, indexDataFlowFactory, filterFactory, false, modificationCallbackFactory);
}
return new Pair<>(op, splitsAndConstraint.second);
} catch (Exception e) {
throw new AlgebricksException(e);
}
}
use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.
the class SecondaryIndexSearchExample method createJob.
private static JobSpecification createJob(Options options) throws HyracksDataException {
JobSpecification spec = new JobSpecification(options.frameSize);
String[] splitNCs = options.ncs.split(",");
IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
// schema of tuples coming out of secondary index
RecordDescriptor secondaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
int secondaryFieldCount = 2;
ITypeTraits[] secondaryTypeTraits = new ITypeTraits[secondaryFieldCount];
secondaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
secondaryTypeTraits[1] = IntegerPointable.TYPE_TRAITS;
// comparators for sort fields and BTree fields
IBinaryComparatorFactory[] secondaryComparatorFactories = new IBinaryComparatorFactory[2];
secondaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
secondaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// comparators for primary index
IBinaryComparatorFactory[] primaryComparatorFactories = new IBinaryComparatorFactory[1];
primaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// schema of tuples coming out of primary index
RecordDescriptor primaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
int primaryFieldCount = 4;
ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];
primaryTypeTraits[0] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[2] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
// comparators for btree, note that we only need a comparator for the
// non-unique key
// i.e. we will have a range condition on the first field only (implying
// [-infinity, +infinity] for the second field)
IBinaryComparatorFactory[] searchComparatorFactories = new IBinaryComparatorFactory[1];
searchComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
// build tuple containing low and high search keys
// low
ArrayTupleBuilder tb = new ArrayTupleBuilder(searchComparatorFactories.length * 2);
// and
// high
// key
DataOutput dos = tb.getDataOutput();
tb.reset();
// low
new UTF8StringSerializerDeserializer().serialize("0", dos);
// key
tb.addFieldEndOffset();
// high
new UTF8StringSerializerDeserializer().serialize("f", dos);
// key
tb.addFieldEndOffset();
ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
JobHelper.createPartitionConstraint(spec, keyProviderOp, splitNCs);
// low key is in field 0 of tuples
int[] secondaryLowKeyFields = { 0 };
// going into secondary index
// search op
// high key is in field 1 of
int[] secondaryHighKeyFields = { 1 };
// tuples going into secondary
// index search op
IFileSplitProvider secondarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.secondaryBTreeName);
IIndexDataflowHelperFactory secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, secondarySplitProvider);
BTreeSearchOperatorDescriptor secondarySearchOp = new BTreeSearchOperatorDescriptor(spec, secondaryRecDesc, secondaryLowKeyFields, secondaryHighKeyFields, true, true, secondaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
JobHelper.createPartitionConstraint(spec, secondarySearchOp, splitNCs);
// secondary index will output tuples with [UTF8String, Integer]
// the Integer field refers to the key in the primary index of the
// source data records
// low key is in field 0 of tuples
int[] primaryLowKeyFields = { 1 };
// going into primary index search op
// high key is in field 1 of tuples
int[] primaryHighKeyFields = { 1 };
// going into primary index search
// op
IFileSplitProvider primarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.primaryBTreeName);
IIndexDataflowHelperFactory primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
BTreeSearchOperatorDescriptor primarySearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc, primaryLowKeyFields, primaryHighKeyFields, true, true, primaryHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
JobHelper.createPartitionConstraint(spec, primarySearchOp, splitNCs);
// have each node print the results of its respective B-Tree
PrinterOperatorDescriptor printer = new PrinterOperatorDescriptor(spec);
JobHelper.createPartitionConstraint(spec, printer, splitNCs);
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, secondarySearchOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), secondarySearchOp, 0, primarySearchOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), primarySearchOp, 0, printer, 0);
spec.addRoot(printer);
return spec;
}
use of org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory in project asterixdb by apache.
the class InsertPipelineExample method createJob.
private static JobSpecification createJob(Options options) {
JobSpecification spec = new JobSpecification(options.frameSize);
String[] splitNCs = options.ncs.split(",");
// schema of tuples to be generated: 4 fields with int, string, string,
// string
// we will use field 2 as primary key to fill a clustered index
RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] { // this field will not go into B-Tree
new UTF8StringSerializerDeserializer(), // we will use this as payload
new UTF8StringSerializerDeserializer(), // we will use this field as key
IntegerSerializerDeserializer.INSTANCE, // we will use this as payload
IntegerSerializerDeserializer.INSTANCE, // we will use this as payload
new UTF8StringSerializerDeserializer() });
// generate numRecords records with field 2 being unique, integer values
// in [0, 100000], and strings with max length of 10 characters, and
// random seed 100
DataGenOperatorDescriptor dataGen = new DataGenOperatorDescriptor(spec, recDesc, options.numTuples, 2, 0, 100000, 10, 100);
// run data generator on first nodecontroller given
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, dataGen, splitNCs[0]);
IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
// prepare insertion into primary index
// tuples to be put into B-Tree shall have 4 fields
int primaryFieldCount = 4;
ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];
primaryTypeTraits[0] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[2] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
// comparator factories for primary index
IBinaryComparatorFactory[] primaryComparatorFactories = new IBinaryComparatorFactory[1];
primaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// the B-Tree expects its keyfields to be at the front of its input
// tuple
// map field 2 of input
int[] primaryFieldPermutation = { 2, 1, 3, 4 };
// tuple to field 0 of
// B-Tree tuple, etc.
IFileSplitProvider primarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.primaryBTreeName);
IIndexDataflowHelperFactory primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
// create operator descriptor
TreeIndexInsertUpdateDeleteOperatorDescriptor primaryInsert = new TreeIndexInsertUpdateDeleteOperatorDescriptor(spec, recDesc, primaryFieldPermutation, IndexOperation.INSERT, primaryHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
JobHelper.createPartitionConstraint(spec, primaryInsert, splitNCs);
// prepare insertion into secondary index
// tuples to be put into B-Tree shall have 2 fields
int secondaryFieldCount = 2;
ITypeTraits[] secondaryTypeTraits = new ITypeTraits[secondaryFieldCount];
secondaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
secondaryTypeTraits[1] = IntegerPointable.TYPE_TRAITS;
// comparator factories for secondary index
IBinaryComparatorFactory[] secondaryComparatorFactories = new IBinaryComparatorFactory[2];
secondaryComparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
secondaryComparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// the B-Tree expects its keyfields to be at the front of its input
// tuple
int[] secondaryFieldPermutation = { 1, 2 };
IFileSplitProvider secondarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.secondaryBTreeName);
IIndexDataflowHelperFactory secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, secondarySplitProvider);
// create operator descriptor
TreeIndexInsertUpdateDeleteOperatorDescriptor secondaryInsert = new TreeIndexInsertUpdateDeleteOperatorDescriptor(spec, recDesc, secondaryFieldPermutation, IndexOperation.INSERT, secondaryHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
JobHelper.createPartitionConstraint(spec, secondaryInsert, splitNCs);
// end the insert pipeline at this sink operator
NullSinkOperatorDescriptor nullSink = new NullSinkOperatorDescriptor(spec);
JobHelper.createPartitionConstraint(spec, nullSink, splitNCs);
// distribute the records from the datagen via hashing to the bulk load
// ops
IBinaryHashFunctionFactory[] hashFactories = new IBinaryHashFunctionFactory[1];
hashFactories[0] = PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY);
IConnectorDescriptor hashConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, hashFactories));
// connect the ops
spec.connect(hashConn, dataGen, 0, primaryInsert, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), primaryInsert, 0, secondaryInsert, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), secondaryInsert, 0, nullSink, 0);
spec.addRoot(nullSink);
return spec;
}
Aggregations