use of org.apache.asterix.runtime.job.listener.JobEventListenerFactory in project asterixdb by apache.
the class MetadataProvider method getRTreeRuntime.
private Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getRTreeRuntime(String dataverseName, String datasetName, String indexName, IOperatorSchema propagatedSchema, List<LogicalVariable> primaryKeys, List<LogicalVariable> secondaryKeys, List<LogicalVariable> additionalNonKeyFields, AsterixTupleFilterFactory filterFactory, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec, IndexOperation indexOp, boolean bulkload, List<LogicalVariable> prevSecondaryKeys, List<LogicalVariable> prevAdditionalFilteringKeys) throws AlgebricksException {
try {
Dataset dataset = MetadataManagerUtil.findExistingDataset(mdTxnCtx, dataverseName, datasetName);
boolean temp = dataset.getDatasetDetails().isTemp();
isTemporaryDatasetWriteJob = isTemporaryDatasetWriteJob && temp;
String itemTypeName = dataset.getItemTypeName();
IAType itemType = MetadataManager.INSTANCE.getDatatype(mdTxnCtx, dataset.getItemTypeDataverseName(), itemTypeName).getDatatype();
validateRecordType(itemType);
ARecordType recType = (ARecordType) itemType;
Index secondaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), indexName);
List<List<String>> secondaryKeyExprs = secondaryIndex.getKeyFieldNames();
List<IAType> secondaryKeyTypes = secondaryIndex.getKeyFieldTypes();
Pair<IAType, Boolean> keyPairType = Index.getNonNullableOpenFieldType(secondaryKeyTypes.get(0), secondaryKeyExprs.get(0), recType);
IAType spatialType = keyPairType.first;
int dimension = NonTaggedFormatUtil.getNumDimensions(spatialType.getTypeTag());
int numSecondaryKeys = dimension * 2;
int numPrimaryKeys = primaryKeys.size();
int numKeys = numSecondaryKeys + numPrimaryKeys;
int numFilterFields = DatasetUtil.getFilterField(dataset) == null ? 0 : 1;
int[] fieldPermutation = new int[numKeys + numFilterFields];
int[] modificationCallbackPrimaryKeyFields = new int[primaryKeys.size()];
int i = 0;
int j = 0;
for (LogicalVariable varKey : secondaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
fieldPermutation[i] = idx;
i++;
}
for (LogicalVariable varKey : primaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
fieldPermutation[i] = idx;
modificationCallbackPrimaryKeyFields[j] = i;
i++;
j++;
}
if (numFilterFields > 0) {
int idx = propagatedSchema.findVariable(additionalNonKeyFields.get(0));
fieldPermutation[numKeys] = idx;
}
int[] prevFieldPermutation = null;
if (indexOp == IndexOperation.UPSERT) {
// Get field permutation for previous value
prevFieldPermutation = new int[numKeys + numFilterFields];
i = 0;
// Get field permutation for new value
for (LogicalVariable varKey : prevSecondaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
prevFieldPermutation[i] = idx;
i++;
}
for (int k = 0; k < numPrimaryKeys; k++) {
prevFieldPermutation[k + i] = fieldPermutation[k + i];
i++;
}
if (numFilterFields > 0) {
int idx = propagatedSchema.findVariable(prevAdditionalFilteringKeys.get(0));
prevFieldPermutation[numKeys] = idx;
}
}
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = getSplitProviderAndConstraints(dataset, secondaryIndex.getIndexName());
// prepare callback
JobId jobId = ((JobEventListenerFactory) spec.getJobletEventListenerFactory()).getJobId();
IModificationOperationCallbackFactory modificationCallbackFactory = dataset.getModificationCallbackFactory(storaegComponentProvider, secondaryIndex, jobId, indexOp, modificationCallbackPrimaryKeyFields);
IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(storaegComponentProvider.getStorageManager(), splitsAndConstraint.first);
IOperatorDescriptor op;
if (bulkload) {
long numElementsHint = getCardinalityPerPartitionHint(dataset);
op = new TreeIndexBulkLoadOperatorDescriptor(spec, recordDesc, fieldPermutation, GlobalConfig.DEFAULT_TREE_FILL_FACTOR, false, numElementsHint, false, indexDataflowHelperFactory);
} else if (indexOp == IndexOperation.UPSERT) {
op = new LSMSecondaryUpsertOperatorDescriptor(spec, recordDesc, fieldPermutation, indexDataflowHelperFactory, filterFactory, modificationCallbackFactory, prevFieldPermutation);
} else {
op = new LSMTreeInsertDeleteOperatorDescriptor(spec, recordDesc, fieldPermutation, indexOp, indexDataflowHelperFactory, filterFactory, false, modificationCallbackFactory);
}
return new Pair<>(op, splitsAndConstraint.second);
} catch (MetadataException e) {
throw new AlgebricksException(e);
}
}
use of org.apache.asterix.runtime.job.listener.JobEventListenerFactory in project asterixdb by apache.
the class MetadataProvider method getInvertedIndexRuntime.
private Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getInvertedIndexRuntime(String dataverseName, String datasetName, String indexName, IOperatorSchema propagatedSchema, List<LogicalVariable> primaryKeys, List<LogicalVariable> secondaryKeys, List<LogicalVariable> additionalNonKeyFields, AsterixTupleFilterFactory filterFactory, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec, IndexOperation indexOp, IndexType indexType, boolean bulkload, List<LogicalVariable> prevSecondaryKeys, List<LogicalVariable> prevAdditionalFilteringKeys) throws AlgebricksException {
// Check the index is length-partitioned or not.
boolean isPartitioned;
if (indexType == IndexType.LENGTH_PARTITIONED_WORD_INVIX || indexType == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX) {
isPartitioned = true;
} else {
isPartitioned = false;
}
// Sanity checks.
if (primaryKeys.size() > 1) {
throw new AlgebricksException("Cannot create inverted index on dataset with composite primary key.");
}
// TokenizeOperator- [token, number of token]
if ((secondaryKeys.size() > 1 && !isPartitioned) || (secondaryKeys.size() > 2 && isPartitioned)) {
throw new AlgebricksException("Cannot create composite inverted index on multiple fields.");
}
Dataset dataset = MetadataManagerUtil.findExistingDataset(mdTxnCtx, dataverseName, datasetName);
boolean temp = dataset.getDatasetDetails().isTemp();
isTemporaryDatasetWriteJob = isTemporaryDatasetWriteJob && temp;
// For tokenization, sorting and loading.
// One token (+ optional partitioning field) + primary keys: [token,
// number of token, PK]
int numKeys = primaryKeys.size() + secondaryKeys.size();
int numFilterFields = DatasetUtil.getFilterField(dataset) == null ? 0 : 1;
// generate field permutations
int[] fieldPermutation = new int[numKeys + numFilterFields];
int[] modificationCallbackPrimaryKeyFields = new int[primaryKeys.size()];
int i = 0;
int j = 0;
// Otherwise: [token]
for (LogicalVariable varKey : secondaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
fieldPermutation[i] = idx;
i++;
}
for (LogicalVariable varKey : primaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
fieldPermutation[i] = idx;
modificationCallbackPrimaryKeyFields[j] = i;
i++;
j++;
}
if (numFilterFields > 0) {
int idx = propagatedSchema.findVariable(additionalNonKeyFields.get(0));
fieldPermutation[numKeys] = idx;
}
int[] prevFieldPermutation = null;
if (indexOp == IndexOperation.UPSERT) {
// Find permutations for prev value
prevFieldPermutation = new int[numKeys + numFilterFields];
i = 0;
// Otherwise: [token]
for (LogicalVariable varKey : prevSecondaryKeys) {
int idx = propagatedSchema.findVariable(varKey);
prevFieldPermutation[i] = idx;
i++;
}
for (int k = 0; k < primaryKeys.size(); k++) {
prevFieldPermutation[k + i] = fieldPermutation[k + i];
i++;
}
if (numFilterFields > 0) {
int idx = propagatedSchema.findVariable(prevAdditionalFilteringKeys.get(0));
prevFieldPermutation[numKeys] = idx;
}
}
try {
// Index parameters.
Index secondaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), indexName);
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = getSplitProviderAndConstraints(dataset, secondaryIndex.getIndexName());
// prepare callback
JobId jobId = ((JobEventListenerFactory) spec.getJobletEventListenerFactory()).getJobId();
IModificationOperationCallbackFactory modificationCallbackFactory = dataset.getModificationCallbackFactory(storaegComponentProvider, secondaryIndex, jobId, indexOp, modificationCallbackPrimaryKeyFields);
IIndexDataflowHelperFactory indexDataFlowFactory = new IndexDataflowHelperFactory(storaegComponentProvider.getStorageManager(), splitsAndConstraint.first);
IOperatorDescriptor op;
if (bulkload) {
long numElementsHint = getCardinalityPerPartitionHint(dataset);
op = new TreeIndexBulkLoadOperatorDescriptor(spec, recordDesc, fieldPermutation, GlobalConfig.DEFAULT_TREE_FILL_FACTOR, false, numElementsHint, false, indexDataFlowFactory);
} else if (indexOp == IndexOperation.UPSERT) {
op = new LSMSecondaryUpsertOperatorDescriptor(spec, recordDesc, fieldPermutation, indexDataFlowFactory, filterFactory, modificationCallbackFactory, prevFieldPermutation);
} else {
op = new LSMTreeInsertDeleteOperatorDescriptor(spec, recordDesc, fieldPermutation, indexOp, indexDataFlowFactory, filterFactory, false, modificationCallbackFactory);
}
return new Pair<>(op, splitsAndConstraint.second);
} catch (Exception e) {
throw new AlgebricksException(e);
}
}
use of org.apache.asterix.runtime.job.listener.JobEventListenerFactory in project asterixdb by apache.
the class FlushDatasetUtil method flushDataset.
public static void flushDataset(IHyracksClientConnection hcc, MetadataProvider metadataProvider, String dataverseName, String datasetName, String indexName) throws Exception {
CompilerProperties compilerProperties = metadataProvider.getApplicationContext().getCompilerProperties();
int frameSize = compilerProperties.getFrameSize();
JobSpecification spec = new JobSpecification(frameSize);
RecordDescriptor[] rDescs = new RecordDescriptor[] { new RecordDescriptor(new ISerializerDeserializer[] {}) };
AlgebricksMetaOperatorDescriptor emptySource = new AlgebricksMetaOperatorDescriptor(spec, 0, 1, new IPushRuntimeFactory[] { new EmptyTupleSourceRuntimeFactory() }, rDescs);
org.apache.asterix.common.transactions.JobId jobId = JobIdFactory.generateJobId();
Dataset dataset = metadataProvider.findDataset(dataverseName, datasetName);
FlushDatasetOperatorDescriptor flushOperator = new FlushDatasetOperatorDescriptor(spec, jobId, dataset.getDatasetId());
spec.connect(new OneToOneConnectorDescriptor(spec), emptySource, 0, flushOperator, 0);
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> primarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, indexName);
AlgebricksPartitionConstraint primaryPartitionConstraint = primarySplitsAndConstraint.second;
AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, emptySource, primaryPartitionConstraint);
JobEventListenerFactory jobEventListenerFactory = new JobEventListenerFactory(jobId, true);
spec.setJobletEventListenerFactory(jobEventListenerFactory);
JobUtils.runJob(hcc, spec, true);
}
use of org.apache.asterix.runtime.job.listener.JobEventListenerFactory in project asterixdb by apache.
the class DatasetUtil method createPrimaryIndexUpsertOp.
/**
* Creates a primary index upsert operator for a given dataset.
*
* @param spec,
* the job specification.
* @param metadataProvider,
* the metadata provider.
* @param dataset,
* the dataset to upsert.
* @param inputRecordDesc,the
* record descriptor for an input tuple.
* @param fieldPermutation,
* the field permutation according to the input.
* @param missingWriterFactory,
* the factory for customizing missing value serialization.
* @return a primary index scan operator and its location constraints.
* @throws AlgebricksException
*/
public static Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> createPrimaryIndexUpsertOp(JobSpecification spec, MetadataProvider metadataProvider, Dataset dataset, RecordDescriptor inputRecordDesc, int[] fieldPermutation, IMissingWriterFactory missingWriterFactory) throws AlgebricksException {
int numKeys = dataset.getPrimaryKeys().size();
int numFilterFields = DatasetUtil.getFilterField(dataset) == null ? 0 : 1;
ARecordType itemType = (ARecordType) metadataProvider.findType(dataset);
ARecordType metaItemType = (ARecordType) metadataProvider.findMetaType(dataset);
try {
Index primaryIndex = metadataProvider.getIndex(dataset.getDataverseName(), dataset.getDatasetName(), dataset.getDatasetName());
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset);
// prepare callback
JobId jobId = ((JobEventListenerFactory) spec.getJobletEventListenerFactory()).getJobId();
int[] primaryKeyFields = new int[numKeys];
for (int i = 0; i < numKeys; i++) {
primaryKeyFields[i] = i;
}
boolean hasSecondaries = metadataProvider.getDatasetIndexes(dataset.getDataverseName(), dataset.getDatasetName()).size() > 1;
IStorageComponentProvider storageComponentProvider = metadataProvider.getStorageComponentProvider();
IModificationOperationCallbackFactory modificationCallbackFactory = dataset.getModificationCallbackFactory(storageComponentProvider, primaryIndex, jobId, IndexOperation.UPSERT, primaryKeyFields);
ISearchOperationCallbackFactory searchCallbackFactory = dataset.getSearchCallbackFactory(storageComponentProvider, primaryIndex, jobId, IndexOperation.UPSERT, primaryKeyFields);
IIndexDataflowHelperFactory idfh = new IndexDataflowHelperFactory(storageComponentProvider.getStorageManager(), splitsAndConstraint.first);
LSMPrimaryUpsertOperatorDescriptor op;
ITypeTraits[] outputTypeTraits = new ITypeTraits[inputRecordDesc.getFieldCount() + (dataset.hasMetaPart() ? 2 : 1) + numFilterFields];
ISerializerDeserializer<?>[] outputSerDes = new ISerializerDeserializer[inputRecordDesc.getFieldCount() + (dataset.hasMetaPart() ? 2 : 1) + numFilterFields];
// add the previous record first
int f = 0;
outputSerDes[f] = FormatUtils.getDefaultFormat().getSerdeProvider().getSerializerDeserializer(itemType);
f++;
// add the previous meta second
if (dataset.hasMetaPart()) {
outputSerDes[f] = FormatUtils.getDefaultFormat().getSerdeProvider().getSerializerDeserializer(metaItemType);
outputTypeTraits[f] = FormatUtils.getDefaultFormat().getTypeTraitProvider().getTypeTrait(metaItemType);
f++;
}
// add the previous filter third
int fieldIdx = -1;
if (numFilterFields > 0) {
String filterField = DatasetUtil.getFilterField(dataset).get(0);
String[] fieldNames = itemType.getFieldNames();
int i = 0;
for (; i < fieldNames.length; i++) {
if (fieldNames[i].equals(filterField)) {
break;
}
}
fieldIdx = i;
outputTypeTraits[f] = FormatUtils.getDefaultFormat().getTypeTraitProvider().getTypeTrait(itemType.getFieldTypes()[fieldIdx]);
outputSerDes[f] = FormatUtils.getDefaultFormat().getSerdeProvider().getSerializerDeserializer(itemType.getFieldTypes()[fieldIdx]);
f++;
}
for (int j = 0; j < inputRecordDesc.getFieldCount(); j++) {
outputTypeTraits[j + f] = inputRecordDesc.getTypeTraits()[j];
outputSerDes[j + f] = inputRecordDesc.getFields()[j];
}
RecordDescriptor outputRecordDesc = new RecordDescriptor(outputSerDes, outputTypeTraits);
op = new LSMPrimaryUpsertOperatorDescriptor(spec, outputRecordDesc, fieldPermutation, idfh, missingWriterFactory, modificationCallbackFactory, searchCallbackFactory, dataset.getFrameOpCallbackFactory(), numKeys, itemType, fieldIdx, hasSecondaries);
return new Pair<>(op, splitsAndConstraint.second);
} catch (MetadataException me) {
throw new AlgebricksException(me);
}
}
Aggregations