use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.
the class FeedOperations method combineIntakeCollectJobs.
private static JobSpecification combineIntakeCollectJobs(MetadataProvider metadataProvider, Feed feed, JobSpecification intakeJob, List<JobSpecification> jobsList, List<FeedConnection> feedConnections, String[] intakeLocations) throws AlgebricksException, HyracksDataException {
JobSpecification jobSpec = new JobSpecification(intakeJob.getFrameSize());
// copy ingestor
FeedIntakeOperatorDescriptor firstOp = (FeedIntakeOperatorDescriptor) intakeJob.getOperatorMap().get(new OperatorDescriptorId(0));
FeedIntakeOperatorDescriptor ingestionOp;
if (firstOp.getAdaptorFactory() == null) {
ingestionOp = new FeedIntakeOperatorDescriptor(jobSpec, feed, firstOp.getAdaptorLibraryName(), firstOp.getAdaptorFactoryClassName(), firstOp.getAdapterOutputType(), firstOp.getPolicyAccessor(), firstOp.getOutputRecordDescriptors()[0]);
} else {
ingestionOp = new FeedIntakeOperatorDescriptor(jobSpec, feed, firstOp.getAdaptorFactory(), firstOp.getAdapterOutputType(), firstOp.getPolicyAccessor(), firstOp.getOutputRecordDescriptors()[0]);
}
// create replicator
ReplicateOperatorDescriptor replicateOp = new ReplicateOperatorDescriptor(jobSpec, ingestionOp.getOutputRecordDescriptors()[0], jobsList.size());
jobSpec.connect(new OneToOneConnectorDescriptor(jobSpec), ingestionOp, 0, replicateOp, 0);
PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, ingestionOp, intakeLocations);
PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, replicateOp, intakeLocations);
// Loop over the jobs to copy operators and connections
Map<OperatorDescriptorId, OperatorDescriptorId> operatorIdMapping = new HashMap<>();
Map<ConnectorDescriptorId, ConnectorDescriptorId> connectorIdMapping = new HashMap<>();
Map<OperatorDescriptorId, List<LocationConstraint>> operatorLocations = new HashMap<>();
Map<OperatorDescriptorId, Integer> operatorCounts = new HashMap<>();
List<JobId> jobIds = new ArrayList<>();
FeedMetaOperatorDescriptor metaOp;
for (int iter1 = 0; iter1 < jobsList.size(); iter1++) {
FeedConnection curFeedConnection = feedConnections.get(iter1);
JobSpecification subJob = jobsList.get(iter1);
operatorIdMapping.clear();
Map<OperatorDescriptorId, IOperatorDescriptor> operatorsMap = subJob.getOperatorMap();
String datasetName = feedConnections.get(iter1).getDatasetName();
FeedConnectionId feedConnectionId = new FeedConnectionId(ingestionOp.getEntityId(), datasetName);
FeedPolicyEntity feedPolicyEntity = FeedMetadataUtil.validateIfPolicyExists(curFeedConnection.getDataverseName(), curFeedConnection.getPolicyName(), metadataProvider.getMetadataTxnContext());
for (Map.Entry<OperatorDescriptorId, IOperatorDescriptor> entry : operatorsMap.entrySet()) {
IOperatorDescriptor opDesc = entry.getValue();
OperatorDescriptorId oldId = opDesc.getOperatorId();
OperatorDescriptorId opId = null;
if (opDesc instanceof LSMTreeInsertDeleteOperatorDescriptor && ((LSMTreeInsertDeleteOperatorDescriptor) opDesc).isPrimary()) {
metaOp = new FeedMetaOperatorDescriptor(jobSpec, feedConnectionId, opDesc, feedPolicyEntity.getProperties(), FeedRuntimeType.STORE);
opId = metaOp.getOperatorId();
opDesc.setOperatorId(opId);
} else {
if (opDesc instanceof AlgebricksMetaOperatorDescriptor) {
AlgebricksMetaOperatorDescriptor algOp = (AlgebricksMetaOperatorDescriptor) opDesc;
IPushRuntimeFactory[] runtimeFactories = algOp.getPipeline().getRuntimeFactories();
// Tweak AssignOp to work with messages
if (runtimeFactories[0] instanceof AssignRuntimeFactory && runtimeFactories.length > 1) {
IConnectorDescriptor connectorDesc = subJob.getOperatorInputMap().get(opDesc.getOperatorId()).get(0);
// anything on the network interface needs to be message compatible
if (connectorDesc instanceof MToNPartitioningConnectorDescriptor) {
metaOp = new FeedMetaOperatorDescriptor(jobSpec, feedConnectionId, opDesc, feedPolicyEntity.getProperties(), FeedRuntimeType.COMPUTE);
opId = metaOp.getOperatorId();
opDesc.setOperatorId(opId);
}
}
}
if (opId == null) {
opId = jobSpec.createOperatorDescriptorId(opDesc);
}
}
operatorIdMapping.put(oldId, opId);
}
// copy connectors
connectorIdMapping.clear();
for (Entry<ConnectorDescriptorId, IConnectorDescriptor> entry : subJob.getConnectorMap().entrySet()) {
IConnectorDescriptor connDesc = entry.getValue();
ConnectorDescriptorId newConnId;
if (connDesc instanceof MToNPartitioningConnectorDescriptor) {
MToNPartitioningConnectorDescriptor m2nConn = (MToNPartitioningConnectorDescriptor) connDesc;
connDesc = new MToNPartitioningWithMessageConnectorDescriptor(jobSpec, m2nConn.getTuplePartitionComputerFactory());
newConnId = connDesc.getConnectorId();
} else {
newConnId = jobSpec.createConnectorDescriptor(connDesc);
}
connectorIdMapping.put(entry.getKey(), newConnId);
}
// make connections between operators
for (Entry<ConnectorDescriptorId, Pair<Pair<IOperatorDescriptor, Integer>, Pair<IOperatorDescriptor, Integer>>> entry : subJob.getConnectorOperatorMap().entrySet()) {
ConnectorDescriptorId newId = connectorIdMapping.get(entry.getKey());
IConnectorDescriptor connDesc = jobSpec.getConnectorMap().get(newId);
Pair<IOperatorDescriptor, Integer> leftOp = entry.getValue().getLeft();
Pair<IOperatorDescriptor, Integer> rightOp = entry.getValue().getRight();
IOperatorDescriptor leftOpDesc = jobSpec.getOperatorMap().get(leftOp.getLeft().getOperatorId());
IOperatorDescriptor rightOpDesc = jobSpec.getOperatorMap().get(rightOp.getLeft().getOperatorId());
if (leftOp.getLeft() instanceof FeedCollectOperatorDescriptor) {
jobSpec.connect(new OneToOneConnectorDescriptor(jobSpec), replicateOp, iter1, leftOpDesc, leftOp.getRight());
}
jobSpec.connect(connDesc, leftOpDesc, leftOp.getRight(), rightOpDesc, rightOp.getRight());
}
// prepare for setting partition constraints
operatorLocations.clear();
operatorCounts.clear();
for (Constraint constraint : subJob.getUserConstraints()) {
LValueConstraintExpression lexpr = constraint.getLValue();
ConstraintExpression cexpr = constraint.getRValue();
OperatorDescriptorId opId;
switch(lexpr.getTag()) {
case PARTITION_COUNT:
opId = ((PartitionCountExpression) lexpr).getOperatorDescriptorId();
operatorCounts.put(operatorIdMapping.get(opId), (int) ((ConstantExpression) cexpr).getValue());
break;
case PARTITION_LOCATION:
opId = ((PartitionLocationExpression) lexpr).getOperatorDescriptorId();
IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(operatorIdMapping.get(opId));
List<LocationConstraint> locations = operatorLocations.get(opDesc.getOperatorId());
if (locations == null) {
locations = new ArrayList<>();
operatorLocations.put(opDesc.getOperatorId(), locations);
}
String location = (String) ((ConstantExpression) cexpr).getValue();
LocationConstraint lc = new LocationConstraint(location, ((PartitionLocationExpression) lexpr).getPartition());
locations.add(lc);
break;
default:
break;
}
}
// set absolute location constraints
for (Entry<OperatorDescriptorId, List<LocationConstraint>> entry : operatorLocations.entrySet()) {
IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(entry.getKey());
// why do we need to sort?
Collections.sort(entry.getValue(), (LocationConstraint o1, LocationConstraint o2) -> {
return o1.partition - o2.partition;
});
String[] locations = new String[entry.getValue().size()];
for (int j = 0; j < locations.length; ++j) {
locations[j] = entry.getValue().get(j).location;
}
PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, opDesc, locations);
}
// set count constraints
for (Entry<OperatorDescriptorId, Integer> entry : operatorCounts.entrySet()) {
IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(entry.getKey());
if (!operatorLocations.keySet().contains(entry.getKey())) {
PartitionConstraintHelper.addPartitionCountConstraint(jobSpec, opDesc, entry.getValue());
}
}
// roots
for (OperatorDescriptorId root : subJob.getRoots()) {
jobSpec.addRoot(jobSpec.getOperatorMap().get(operatorIdMapping.get(root)));
}
jobIds.add(((JobEventListenerFactory) subJob.getJobletEventListenerFactory()).getJobId());
}
// jobEventListenerFactory
jobSpec.setJobletEventListenerFactory(new MultiTransactionJobletEventListenerFactory(jobIds, true));
// useConnectorSchedulingPolicy
jobSpec.setUseConnectorPolicyForScheduling(jobsList.get(0).isUseConnectorPolicyForScheduling());
// connectorAssignmentPolicy
jobSpec.setConnectorPolicyAssignmentPolicy(jobsList.get(0).getConnectorPolicyAssignmentPolicy());
return jobSpec;
}
use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.
the class MetadataProvider method getBTreeRuntime.
private Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getBTreeRuntime(String dataverseName, String datasetName, String indexName, IOperatorSchema propagatedSchema, List<LogicalVariable> primaryKeys, List<LogicalVariable> secondaryKeys, List<LogicalVariable> additionalNonKeyFields, AsterixTupleFilterFactory filterFactory, RecordDescriptor inputRecordDesc, JobGenContext context, JobSpecification spec, IndexOperation indexOp, boolean bulkload, List<LogicalVariable> prevSecondaryKeys, List<LogicalVariable> prevAdditionalFilteringKeys) throws AlgebricksException {
Dataset dataset = MetadataManagerUtil.findExistingDataset(mdTxnCtx, dataverseName, datasetName);
boolean temp = dataset.getDatasetDetails().isTemp();
isTemporaryDatasetWriteJob = isTemporaryDatasetWriteJob && temp;
int numKeys = primaryKeys.size() + secondaryKeys.size();
int numFilterFields = DatasetUtil.getFilterField(dataset) == null ? 0 : 1;
// generate field permutations
int[] fieldPermutation = new int[numKeys + numFilterFields];
int[] modificationCallbackPrimaryKeyFields = new int[primaryKeys.size()];
int i = 0;
int j = 0;
for (LogicalVariable varKey : secondaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
fieldPermutation[i] = idx;
i++;
}
for (LogicalVariable varKey : primaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
fieldPermutation[i] = idx;
modificationCallbackPrimaryKeyFields[j] = i;
i++;
j++;
}
if (numFilterFields > 0) {
int idx = propagatedSchema.findVariable(additionalNonKeyFields.get(0));
fieldPermutation[numKeys] = idx;
}
int[] prevFieldPermutation = null;
if (indexOp == IndexOperation.UPSERT) {
// generate field permutations for prev record
prevFieldPermutation = new int[numKeys + numFilterFields];
int k = 0;
for (LogicalVariable varKey : prevSecondaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
prevFieldPermutation[k] = idx;
k++;
}
for (LogicalVariable varKey : primaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
prevFieldPermutation[k] = idx;
k++;
}
// Filter can only be one field!
if (numFilterFields > 0) {
int idx = propagatedSchema.findVariable(prevAdditionalFilteringKeys.get(0));
prevFieldPermutation[numKeys] = idx;
}
}
try {
// Index parameters.
Index secondaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), indexName);
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = getSplitProviderAndConstraints(dataset, secondaryIndex.getIndexName());
// prepare callback
JobId jobId = ((JobEventListenerFactory) spec.getJobletEventListenerFactory()).getJobId();
IModificationOperationCallbackFactory modificationCallbackFactory = dataset.getModificationCallbackFactory(storaegComponentProvider, secondaryIndex, jobId, indexOp, modificationCallbackPrimaryKeyFields);
IIndexDataflowHelperFactory idfh = new IndexDataflowHelperFactory(storaegComponentProvider.getStorageManager(), splitsAndConstraint.first);
IOperatorDescriptor op;
if (bulkload) {
long numElementsHint = getCardinalityPerPartitionHint(dataset);
op = new TreeIndexBulkLoadOperatorDescriptor(spec, inputRecordDesc, fieldPermutation, GlobalConfig.DEFAULT_TREE_FILL_FACTOR, false, numElementsHint, false, idfh);
} else if (indexOp == IndexOperation.UPSERT) {
op = new LSMSecondaryUpsertOperatorDescriptor(spec, inputRecordDesc, fieldPermutation, idfh, filterFactory, modificationCallbackFactory, prevFieldPermutation);
} else {
op = new LSMTreeInsertDeleteOperatorDescriptor(spec, inputRecordDesc, fieldPermutation, indexOp, idfh, filterFactory, false, modificationCallbackFactory);
}
return new Pair<>(op, splitsAndConstraint.second);
} catch (Exception e) {
throw new AlgebricksException(e);
}
}
use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.
the class MetadataProvider method getRTreeRuntime.
private Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getRTreeRuntime(String dataverseName, String datasetName, String indexName, IOperatorSchema propagatedSchema, List<LogicalVariable> primaryKeys, List<LogicalVariable> secondaryKeys, List<LogicalVariable> additionalNonKeyFields, AsterixTupleFilterFactory filterFactory, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec, IndexOperation indexOp, boolean bulkload, List<LogicalVariable> prevSecondaryKeys, List<LogicalVariable> prevAdditionalFilteringKeys) throws AlgebricksException {
try {
Dataset dataset = MetadataManagerUtil.findExistingDataset(mdTxnCtx, dataverseName, datasetName);
boolean temp = dataset.getDatasetDetails().isTemp();
isTemporaryDatasetWriteJob = isTemporaryDatasetWriteJob && temp;
String itemTypeName = dataset.getItemTypeName();
IAType itemType = MetadataManager.INSTANCE.getDatatype(mdTxnCtx, dataset.getItemTypeDataverseName(), itemTypeName).getDatatype();
validateRecordType(itemType);
ARecordType recType = (ARecordType) itemType;
Index secondaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), indexName);
List<List<String>> secondaryKeyExprs = secondaryIndex.getKeyFieldNames();
List<IAType> secondaryKeyTypes = secondaryIndex.getKeyFieldTypes();
Pair<IAType, Boolean> keyPairType = Index.getNonNullableOpenFieldType(secondaryKeyTypes.get(0), secondaryKeyExprs.get(0), recType);
IAType spatialType = keyPairType.first;
int dimension = NonTaggedFormatUtil.getNumDimensions(spatialType.getTypeTag());
int numSecondaryKeys = dimension * 2;
int numPrimaryKeys = primaryKeys.size();
int numKeys = numSecondaryKeys + numPrimaryKeys;
int numFilterFields = DatasetUtil.getFilterField(dataset) == null ? 0 : 1;
int[] fieldPermutation = new int[numKeys + numFilterFields];
int[] modificationCallbackPrimaryKeyFields = new int[primaryKeys.size()];
int i = 0;
int j = 0;
for (LogicalVariable varKey : secondaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
fieldPermutation[i] = idx;
i++;
}
for (LogicalVariable varKey : primaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
fieldPermutation[i] = idx;
modificationCallbackPrimaryKeyFields[j] = i;
i++;
j++;
}
if (numFilterFields > 0) {
int idx = propagatedSchema.findVariable(additionalNonKeyFields.get(0));
fieldPermutation[numKeys] = idx;
}
int[] prevFieldPermutation = null;
if (indexOp == IndexOperation.UPSERT) {
// Get field permutation for previous value
prevFieldPermutation = new int[numKeys + numFilterFields];
i = 0;
// Get field permutation for new value
for (LogicalVariable varKey : prevSecondaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
prevFieldPermutation[i] = idx;
i++;
}
for (int k = 0; k < numPrimaryKeys; k++) {
prevFieldPermutation[k + i] = fieldPermutation[k + i];
i++;
}
if (numFilterFields > 0) {
int idx = propagatedSchema.findVariable(prevAdditionalFilteringKeys.get(0));
prevFieldPermutation[numKeys] = idx;
}
}
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = getSplitProviderAndConstraints(dataset, secondaryIndex.getIndexName());
// prepare callback
JobId jobId = ((JobEventListenerFactory) spec.getJobletEventListenerFactory()).getJobId();
IModificationOperationCallbackFactory modificationCallbackFactory = dataset.getModificationCallbackFactory(storaegComponentProvider, secondaryIndex, jobId, indexOp, modificationCallbackPrimaryKeyFields);
IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(storaegComponentProvider.getStorageManager(), splitsAndConstraint.first);
IOperatorDescriptor op;
if (bulkload) {
long numElementsHint = getCardinalityPerPartitionHint(dataset);
op = new TreeIndexBulkLoadOperatorDescriptor(spec, recordDesc, fieldPermutation, GlobalConfig.DEFAULT_TREE_FILL_FACTOR, false, numElementsHint, false, indexDataflowHelperFactory);
} else if (indexOp == IndexOperation.UPSERT) {
op = new LSMSecondaryUpsertOperatorDescriptor(spec, recordDesc, fieldPermutation, indexDataflowHelperFactory, filterFactory, modificationCallbackFactory, prevFieldPermutation);
} else {
op = new LSMTreeInsertDeleteOperatorDescriptor(spec, recordDesc, fieldPermutation, indexOp, indexDataflowHelperFactory, filterFactory, false, modificationCallbackFactory);
}
return new Pair<>(op, splitsAndConstraint.second);
} catch (MetadataException e) {
throw new AlgebricksException(e);
}
}
use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.
the class MetadataProvider method getBinaryTokenizerRuntime.
// Get a Tokenizer for the bulk-loading data into a n-gram or keyword index.
private Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getBinaryTokenizerRuntime(String dataverseName, String datasetName, String indexName, IOperatorSchema inputSchema, IOperatorSchema propagatedSchema, List<LogicalVariable> primaryKeys, List<LogicalVariable> secondaryKeys, RecordDescriptor recordDesc, JobSpecification spec, IndexType indexType) throws AlgebricksException {
// Sanity checks.
if (primaryKeys.size() > 1) {
throw new AlgebricksException("Cannot tokenize composite primary key.");
}
if (secondaryKeys.size() > 1) {
throw new AlgebricksException("Cannot tokenize composite secondary key fields.");
}
boolean isPartitioned;
if (indexType == IndexType.LENGTH_PARTITIONED_WORD_INVIX || indexType == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX) {
isPartitioned = true;
} else {
isPartitioned = false;
}
// Number of Keys that needs to be propagated
int numKeys = inputSchema.getSize();
// Get the rest of Logical Variables that are not (PK or SK) and each
// variable's positions.
// These variables will be propagated through TokenizeOperator.
List<LogicalVariable> otherKeys = new ArrayList<>();
if (inputSchema.getSize() > 0) {
for (int k = 0; k < inputSchema.getSize(); k++) {
boolean found = false;
for (LogicalVariable varKey : primaryKeys) {
if (varKey.equals(inputSchema.getVariable(k))) {
found = true;
break;
} else {
found = false;
}
}
if (!found) {
for (LogicalVariable varKey : secondaryKeys) {
if (varKey.equals(inputSchema.getVariable(k))) {
found = true;
break;
} else {
found = false;
}
}
}
if (!found) {
otherKeys.add(inputSchema.getVariable(k));
}
}
}
// For tokenization, sorting and loading.
// One token (+ optional partitioning field) + primary keys + secondary
// keys + other variables
// secondary keys and other variables will be just passed to the
// IndexInsertDelete Operator.
int numTokenKeyPairFields = (!isPartitioned) ? 1 + numKeys : 2 + numKeys;
// generate field permutations for the input
int[] fieldPermutation = new int[numKeys];
int[] modificationCallbackPrimaryKeyFields = new int[primaryKeys.size()];
int i = 0;
int j = 0;
for (LogicalVariable varKey : primaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
fieldPermutation[i] = idx;
modificationCallbackPrimaryKeyFields[j] = i;
i++;
j++;
}
for (LogicalVariable varKey : otherKeys) {
int idx = propagatedSchema.findVariable(varKey);
fieldPermutation[i] = idx;
i++;
}
for (LogicalVariable varKey : secondaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
fieldPermutation[i] = idx;
i++;
}
Dataset dataset = MetadataManagerUtil.findExistingDataset(mdTxnCtx, dataverseName, datasetName);
String itemTypeName = dataset.getItemTypeName();
IAType itemType;
try {
itemType = MetadataManager.INSTANCE.getDatatype(mdTxnCtx, dataset.getItemTypeDataverseName(), itemTypeName).getDatatype();
if (itemType.getTypeTag() != ATypeTag.OBJECT) {
throw new AlgebricksException("Only record types can be tokenized.");
}
ARecordType recType = (ARecordType) itemType;
// Index parameters.
Index secondaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), indexName);
List<List<String>> secondaryKeyExprs = secondaryIndex.getKeyFieldNames();
List<IAType> secondaryKeyTypeEntries = secondaryIndex.getKeyFieldTypes();
int numTokenFields = (!isPartitioned) ? secondaryKeys.size() : secondaryKeys.size() + 1;
ITypeTraits[] tokenTypeTraits = new ITypeTraits[numTokenFields];
ITypeTraits[] invListsTypeTraits = new ITypeTraits[primaryKeys.size()];
// Find the key type of the secondary key. If it's a derived type,
// return the derived type.
// e.g. UNORDERED LIST -> return UNORDERED LIST type
IAType secondaryKeyType;
Pair<IAType, Boolean> keyPairType = Index.getNonNullableOpenFieldType(secondaryKeyTypeEntries.get(0), secondaryKeyExprs.get(0), recType);
secondaryKeyType = keyPairType.first;
List<List<String>> partitioningKeys = dataset.getPrimaryKeys();
i = 0;
for (List<String> partitioningKey : partitioningKeys) {
IAType keyType = recType.getSubFieldType(partitioningKey);
invListsTypeTraits[i] = TypeTraitProvider.INSTANCE.getTypeTrait(keyType);
++i;
}
tokenTypeTraits[0] = NonTaggedFormatUtil.getTokenTypeTrait(secondaryKeyType);
if (isPartitioned) {
// The partitioning field is hardcoded to be a short *without*
// an Asterix type tag.
tokenTypeTraits[1] = ShortPointable.TYPE_TRAITS;
}
IBinaryTokenizerFactory tokenizerFactory = NonTaggedFormatUtil.getBinaryTokenizerFactory(secondaryKeyType.getTypeTag(), indexType, secondaryIndex.getGramLength());
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = getSplitProviderAndConstraints(dataset, secondaryIndex.getIndexName());
// Generate Output Record format
ISerializerDeserializer<?>[] tokenKeyPairFields = new ISerializerDeserializer[numTokenKeyPairFields];
ITypeTraits[] tokenKeyPairTypeTraits = new ITypeTraits[numTokenKeyPairFields];
ISerializerDeserializerProvider serdeProvider = FormatUtils.getDefaultFormat().getSerdeProvider();
// #1. propagate all input variables
for (int k = 0; k < recordDesc.getFieldCount(); k++) {
tokenKeyPairFields[k] = recordDesc.getFields()[k];
tokenKeyPairTypeTraits[k] = recordDesc.getTypeTraits()[k];
}
int tokenOffset = recordDesc.getFieldCount();
// #2. Specify the token type
tokenKeyPairFields[tokenOffset] = serdeProvider.getSerializerDeserializer(secondaryKeyType);
tokenKeyPairTypeTraits[tokenOffset] = tokenTypeTraits[0];
tokenOffset++;
// #3. Specify the length-partitioning key: number of token
if (isPartitioned) {
tokenKeyPairFields[tokenOffset] = ShortSerializerDeserializer.INSTANCE;
tokenKeyPairTypeTraits[tokenOffset] = tokenTypeTraits[1];
}
RecordDescriptor tokenKeyPairRecDesc = new RecordDescriptor(tokenKeyPairFields, tokenKeyPairTypeTraits);
IOperatorDescriptor tokenizerOp;
// Keys to be tokenized : SK
int docField = fieldPermutation[fieldPermutation.length - 1];
// Keys to be propagated
int[] keyFields = new int[numKeys];
for (int k = 0; k < keyFields.length; k++) {
keyFields[k] = k;
}
tokenizerOp = new BinaryTokenizerOperatorDescriptor(spec, tokenKeyPairRecDesc, tokenizerFactory, docField, keyFields, isPartitioned, true);
return new Pair<>(tokenizerOp, splitsAndConstraint.second);
} catch (Exception e) {
throw new AlgebricksException(e);
}
}
use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.
the class MetadataProvider method getInsertOrDeleteRuntime.
private Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getInsertOrDeleteRuntime(IndexOperation indexOp, IDataSource<DataSourceId> dataSource, IOperatorSchema propagatedSchema, List<LogicalVariable> keys, LogicalVariable payload, List<LogicalVariable> additionalNonKeyFields, RecordDescriptor inputRecordDesc, JobGenContext context, JobSpecification spec, boolean bulkload, List<LogicalVariable> additionalNonFilteringFields) throws AlgebricksException {
String datasetName = dataSource.getId().getDatasourceName();
Dataset dataset = MetadataManagerUtil.findExistingDataset(mdTxnCtx, dataSource.getId().getDataverseName(), datasetName);
boolean temp = dataset.getDatasetDetails().isTemp();
isTemporaryDatasetWriteJob = isTemporaryDatasetWriteJob && temp;
int numKeys = keys.size();
int numFilterFields = DatasetUtil.getFilterField(dataset) == null ? 0 : 1;
// Move key fields to front.
int[] fieldPermutation = new int[numKeys + 1 + numFilterFields + (additionalNonFilteringFields == null ? 0 : additionalNonFilteringFields.size())];
int[] bloomFilterKeyFields = new int[numKeys];
int i = 0;
for (LogicalVariable varKey : keys) {
int idx = propagatedSchema.findVariable(varKey);
fieldPermutation[i] = idx;
bloomFilterKeyFields[i] = i;
i++;
}
fieldPermutation[i++] = propagatedSchema.findVariable(payload);
if (numFilterFields > 0) {
int idx = propagatedSchema.findVariable(additionalNonKeyFields.get(0));
fieldPermutation[i++] = idx;
}
if (additionalNonFilteringFields != null) {
for (LogicalVariable variable : additionalNonFilteringFields) {
int idx = propagatedSchema.findVariable(variable);
fieldPermutation[i++] = idx;
}
}
try {
Index primaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), dataset.getDatasetName());
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = getSplitProviderAndConstraints(dataset);
// prepare callback
int[] primaryKeyFields = new int[numKeys];
for (i = 0; i < numKeys; i++) {
primaryKeyFields[i] = i;
}
IModificationOperationCallbackFactory modificationCallbackFactory = dataset.getModificationCallbackFactory(storaegComponentProvider, primaryIndex, jobId, indexOp, primaryKeyFields);
IIndexDataflowHelperFactory idfh = new IndexDataflowHelperFactory(storaegComponentProvider.getStorageManager(), splitsAndConstraint.first);
IOperatorDescriptor op;
if (bulkload) {
long numElementsHint = getCardinalityPerPartitionHint(dataset);
op = new TreeIndexBulkLoadOperatorDescriptor(spec, inputRecordDesc, fieldPermutation, GlobalConfig.DEFAULT_TREE_FILL_FACTOR, true, numElementsHint, true, idfh);
} else {
op = new LSMTreeInsertDeleteOperatorDescriptor(spec, inputRecordDesc, fieldPermutation, indexOp, idfh, null, true, modificationCallbackFactory);
}
return new Pair<>(op, splitsAndConstraint.second);
} catch (MetadataException me) {
throw new AlgebricksException(me);
}
}
Aggregations