use of org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory in project asterixdb by apache.
the class SecondaryTreeIndexOperationsHelper method buildDropJobSpec.
@Override
public JobSpecification buildDropJobSpec() throws AlgebricksException {
JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, index.getIndexName());
IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first);
// The index drop operation should be persistent regardless of temp datasets or permanent dataset.
IndexDropOperatorDescriptor btreeDrop = new IndexDropOperatorDescriptor(spec, dataflowHelperFactory);
AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, btreeDrop, splitsAndConstraint.second);
spec.addRoot(btreeDrop);
return spec;
}
use of org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory in project asterixdb by apache.
the class SecondaryIndexBulkLoadExample method createJob.
private static JobSpecification createJob(Options options) {
JobSpecification spec = new JobSpecification(options.frameSize);
String[] splitNCs = options.ncs.split(",");
IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
// schema of tuples that we are retrieving from the primary index
RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] { // we will use this as payload in secondary index
IntegerSerializerDeserializer.INSTANCE, // we will use this ask key in secondary index
new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
int primaryFieldCount = 4;
ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];
primaryTypeTraits[0] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
primaryTypeTraits[2] = IntegerPointable.TYPE_TRAITS;
primaryTypeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
// comparators for sort fields and BTree fields
IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[2];
comparatorFactories[0] = PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY);
comparatorFactories[1] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// use a disk-order scan to read primary index
IFileSplitProvider primarySplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.primaryBTreeName);
IIndexDataflowHelperFactory primaryHelperFactory = new IndexDataflowHelperFactory(storageManager, primarySplitProvider);
TreeIndexDiskOrderScanOperatorDescriptor btreeScanOp = new TreeIndexDiskOrderScanOperatorDescriptor(spec, recDesc, primaryHelperFactory, NoOpOperationCallbackFactory.INSTANCE);
JobHelper.createPartitionConstraint(spec, btreeScanOp, splitNCs);
// sort the tuples as preparation for bulk load into secondary index
// fields to sort on
int[] sortFields = { 1, 0 };
ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, options.sbSize, sortFields, comparatorFactories, recDesc);
JobHelper.createPartitionConstraint(spec, sorter, splitNCs);
// tuples to be put into B-Tree shall have 2 fields
int secondaryFieldCount = 2;
ITypeTraits[] secondaryTypeTraits = new ITypeTraits[secondaryFieldCount];
secondaryTypeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
secondaryTypeTraits[1] = IntegerPointable.TYPE_TRAITS;
// the B-Tree expects its keyfields to be at the front of its input
// tuple
int[] fieldPermutation = { 1, 0 };
IFileSplitProvider btreeSplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.secondaryBTreeName);
IIndexDataflowHelperFactory secondaryHelperFactory = new IndexDataflowHelperFactory(storageManager, btreeSplitProvider);
TreeIndexBulkLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec, null, fieldPermutation, 0.7f, false, 1000L, true, secondaryHelperFactory);
JobHelper.createPartitionConstraint(spec, btreeBulkLoad, splitNCs);
NullSinkOperatorDescriptor nsOpDesc = new NullSinkOperatorDescriptor(spec);
JobHelper.createPartitionConstraint(spec, nsOpDesc, splitNCs);
// connect the ops
spec.connect(new OneToOneConnectorDescriptor(spec), btreeScanOp, 0, sorter, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0, btreeBulkLoad, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), btreeBulkLoad, 0, nsOpDesc, 0);
spec.addRoot(nsOpDesc);
return spec;
}
use of org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory in project asterixdb by apache.
the class PrimaryIndexSearchExample method createJob.
private static JobSpecification createJob(Options options) throws HyracksDataException {
JobSpecification spec = new JobSpecification(options.frameSize);
String[] splitNCs = options.ncs.split(",");
int fieldCount = 4;
ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
typeTraits[0] = IntegerPointable.TYPE_TRAITS;
typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
typeTraits[2] = IntegerPointable.TYPE_TRAITS;
typeTraits[3] = UTF8StringPointable.TYPE_TRAITS;
// comparators for btree
IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[1];
comparatorFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// create roviders for B-Tree
IStorageManager storageManager = BTreeHelperStorageManager.INSTANCE;
// schema of tuples coming out of primary index
RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
// build tuple containing low and high search keys
// high
ArrayTupleBuilder tb = new ArrayTupleBuilder(comparatorFactories.length * 2);
// key
// and
// low
// key
DataOutput dos = tb.getDataOutput();
tb.reset();
// low key
IntegerSerializerDeserializer.INSTANCE.serialize(100, dos);
tb.addFieldEndOffset();
// build
IntegerSerializerDeserializer.INSTANCE.serialize(200, dos);
// high key
tb.addFieldEndOffset();
ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
JobHelper.createPartitionConstraint(spec, keyProviderOp, splitNCs);
// low key is in field 0 of tuples going
int[] lowKeyFields = { 0 };
// into search op
// low key is in field 1 of tuples going
int[] highKeyFields = { 1 };
// into search op
IFileSplitProvider btreeSplitProvider = JobHelper.createFileSplitProvider(splitNCs, options.btreeName);
IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(storageManager, btreeSplitProvider);
BTreeSearchOperatorDescriptor btreeSearchOp = new BTreeSearchOperatorDescriptor(spec, recDesc, lowKeyFields, highKeyFields, true, true, dataflowHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, null, null, false);
JobHelper.createPartitionConstraint(spec, btreeSearchOp, splitNCs);
// have each node print the results of its respective B-Tree
PrinterOperatorDescriptor printer = new PrinterOperatorDescriptor(spec);
JobHelper.createPartitionConstraint(spec, printer, splitNCs);
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, btreeSearchOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), btreeSearchOp, 0, printer, 0);
spec.addRoot(printer);
return spec;
}
use of org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory in project asterixdb by apache.
the class DatasetUtil method buildDropFilesIndexJobSpec.
public static JobSpecification buildDropFilesIndexJobSpec(MetadataProvider metadataProvider, Dataset dataset) throws AlgebricksException {
String indexName = IndexingConstants.getFilesIndexName(dataset.getDatasetName());
JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, indexName);
IIndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first);
IndexDropOperatorDescriptor btreeDrop = new IndexDropOperatorDescriptor(spec, indexHelperFactory);
AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, btreeDrop, splitsAndConstraint.second);
spec.addRoot(btreeDrop);
return spec;
}
use of org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory in project asterixdb by apache.
the class DatasetUtil method createPrimaryIndexUpsertOp.
/**
* Creates a primary index upsert operator for a given dataset.
*
* @param spec,
* the job specification.
* @param metadataProvider,
* the metadata provider.
* @param dataset,
* the dataset to upsert.
* @param inputRecordDesc,the
* record descriptor for an input tuple.
* @param fieldPermutation,
* the field permutation according to the input.
* @param missingWriterFactory,
* the factory for customizing missing value serialization.
* @return a primary index scan operator and its location constraints.
* @throws AlgebricksException
*/
public static Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> createPrimaryIndexUpsertOp(JobSpecification spec, MetadataProvider metadataProvider, Dataset dataset, RecordDescriptor inputRecordDesc, int[] fieldPermutation, IMissingWriterFactory missingWriterFactory) throws AlgebricksException {
int numKeys = dataset.getPrimaryKeys().size();
int numFilterFields = DatasetUtil.getFilterField(dataset) == null ? 0 : 1;
ARecordType itemType = (ARecordType) metadataProvider.findType(dataset);
ARecordType metaItemType = (ARecordType) metadataProvider.findMetaType(dataset);
try {
Index primaryIndex = metadataProvider.getIndex(dataset.getDataverseName(), dataset.getDatasetName(), dataset.getDatasetName());
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset);
// prepare callback
JobId jobId = ((JobEventListenerFactory) spec.getJobletEventListenerFactory()).getJobId();
int[] primaryKeyFields = new int[numKeys];
for (int i = 0; i < numKeys; i++) {
primaryKeyFields[i] = i;
}
boolean hasSecondaries = metadataProvider.getDatasetIndexes(dataset.getDataverseName(), dataset.getDatasetName()).size() > 1;
IStorageComponentProvider storageComponentProvider = metadataProvider.getStorageComponentProvider();
IModificationOperationCallbackFactory modificationCallbackFactory = dataset.getModificationCallbackFactory(storageComponentProvider, primaryIndex, jobId, IndexOperation.UPSERT, primaryKeyFields);
ISearchOperationCallbackFactory searchCallbackFactory = dataset.getSearchCallbackFactory(storageComponentProvider, primaryIndex, jobId, IndexOperation.UPSERT, primaryKeyFields);
IIndexDataflowHelperFactory idfh = new IndexDataflowHelperFactory(storageComponentProvider.getStorageManager(), splitsAndConstraint.first);
LSMPrimaryUpsertOperatorDescriptor op;
ITypeTraits[] outputTypeTraits = new ITypeTraits[inputRecordDesc.getFieldCount() + (dataset.hasMetaPart() ? 2 : 1) + numFilterFields];
ISerializerDeserializer<?>[] outputSerDes = new ISerializerDeserializer[inputRecordDesc.getFieldCount() + (dataset.hasMetaPart() ? 2 : 1) + numFilterFields];
// add the previous record first
int f = 0;
outputSerDes[f] = FormatUtils.getDefaultFormat().getSerdeProvider().getSerializerDeserializer(itemType);
f++;
// add the previous meta second
if (dataset.hasMetaPart()) {
outputSerDes[f] = FormatUtils.getDefaultFormat().getSerdeProvider().getSerializerDeserializer(metaItemType);
outputTypeTraits[f] = FormatUtils.getDefaultFormat().getTypeTraitProvider().getTypeTrait(metaItemType);
f++;
}
// add the previous filter third
int fieldIdx = -1;
if (numFilterFields > 0) {
String filterField = DatasetUtil.getFilterField(dataset).get(0);
String[] fieldNames = itemType.getFieldNames();
int i = 0;
for (; i < fieldNames.length; i++) {
if (fieldNames[i].equals(filterField)) {
break;
}
}
fieldIdx = i;
outputTypeTraits[f] = FormatUtils.getDefaultFormat().getTypeTraitProvider().getTypeTrait(itemType.getFieldTypes()[fieldIdx]);
outputSerDes[f] = FormatUtils.getDefaultFormat().getSerdeProvider().getSerializerDeserializer(itemType.getFieldTypes()[fieldIdx]);
f++;
}
for (int j = 0; j < inputRecordDesc.getFieldCount(); j++) {
outputTypeTraits[j + f] = inputRecordDesc.getTypeTraits()[j];
outputSerDes[j + f] = inputRecordDesc.getFields()[j];
}
RecordDescriptor outputRecordDesc = new RecordDescriptor(outputSerDes, outputTypeTraits);
op = new LSMPrimaryUpsertOperatorDescriptor(spec, outputRecordDesc, fieldPermutation, idfh, missingWriterFactory, modificationCallbackFactory, searchCallbackFactory, dataset.getFrameOpCallbackFactory(), numKeys, itemType, fieldIdx, hasSecondaries);
return new Pair<>(op, splitsAndConstraint.second);
} catch (MetadataException me) {
throw new AlgebricksException(me);
}
}
Aggregations