use of org.apache.hyracks.storage.am.common.api.ISearchOperationCallbackFactory in project asterixdb by apache.
the class MetadataProvider method buildExternalDataLookupRuntime.
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildExternalDataLookupRuntime(JobSpecification jobSpec, Dataset dataset, int[] ridIndexes, boolean retainInput, IVariableTypeEnvironment typeEnv, IOperatorSchema opSchema, JobGenContext context, MetadataProvider metadataProvider, boolean retainMissing) throws AlgebricksException {
try {
// Get data type
ARecordType itemType = (ARecordType) MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), dataset.getDataverseName(), dataset.getItemTypeName()).getDatatype();
ExternalDatasetDetails datasetDetails = (ExternalDatasetDetails) dataset.getDatasetDetails();
LookupAdapterFactory<?> adapterFactory = AdapterFactoryProvider.getLookupAdapterFactory(getApplicationContext().getServiceContext(), datasetDetails.getProperties(), itemType, ridIndexes, retainInput, retainMissing, context.getMissingWriterFactory());
String fileIndexName = IndexingConstants.getFilesIndexName(dataset.getDatasetName());
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> spPc = metadataProvider.getSplitProviderAndConstraints(dataset, fileIndexName);
Index fileIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), fileIndexName);
// Create the file index data flow helper
IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(storaegComponentProvider.getStorageManager(), spPc.first);
// Create the out record descriptor, appContext and fileSplitProvider for the files index
RecordDescriptor outRecDesc = JobGenHelper.mkRecordDescriptor(typeEnv, opSchema, context);
ISearchOperationCallbackFactory searchOpCallbackFactory = dataset.getSearchCallbackFactory(storaegComponentProvider, fileIndex, jobId, IndexOperation.SEARCH, null);
// Create the operator
ExternalLookupOperatorDescriptor op = new ExternalLookupOperatorDescriptor(jobSpec, adapterFactory, outRecDesc, indexDataflowHelperFactory, searchOpCallbackFactory, ExternalDatasetsRegistry.INSTANCE.getAndLockDatasetVersion(dataset, this));
return new Pair<>(op, spPc.second);
} catch (Exception e) {
throw new AlgebricksException(e);
}
}
use of org.apache.hyracks.storage.am.common.api.ISearchOperationCallbackFactory in project asterixdb by apache.
the class MetadataProvider method buildBtreeRuntime.
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildBtreeRuntime(JobSpecification jobSpec, IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv, JobGenContext context, boolean retainInput, boolean retainMissing, Dataset dataset, String indexName, int[] lowKeyFields, int[] highKeyFields, boolean lowKeyInclusive, boolean highKeyInclusive, int[] minFilterFieldIndexes, int[] maxFilterFieldIndexes) throws AlgebricksException {
boolean isSecondary = true;
try {
Index primaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), dataset.getDatasetName());
if (primaryIndex != null && (dataset.getDatasetType() != DatasetType.EXTERNAL)) {
isSecondary = !indexName.equals(primaryIndex.getIndexName());
}
Index theIndex = isSecondary ? MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), indexName) : primaryIndex;
int numPrimaryKeys = dataset.getPrimaryKeys().size();
RecordDescriptor outputRecDesc = JobGenHelper.mkRecordDescriptor(typeEnv, opSchema, context);
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> spPc = getSplitProviderAndConstraints(dataset, theIndex.getIndexName());
int[] primaryKeyFields = new int[numPrimaryKeys];
for (int i = 0; i < numPrimaryKeys; i++) {
primaryKeyFields[i] = i;
}
ISearchOperationCallbackFactory searchCallbackFactory = dataset.getSearchCallbackFactory(storaegComponentProvider, theIndex, jobId, IndexOperation.SEARCH, primaryKeyFields);
IStorageManager storageManager = getStorageComponentProvider().getStorageManager();
IIndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory(storageManager, spPc.first);
BTreeSearchOperatorDescriptor btreeSearchOp;
if (dataset.getDatasetType() == DatasetType.INTERNAL) {
btreeSearchOp = new BTreeSearchOperatorDescriptor(jobSpec, outputRecDesc, lowKeyFields, highKeyFields, lowKeyInclusive, highKeyInclusive, indexHelperFactory, retainInput, retainMissing, context.getMissingWriterFactory(), searchCallbackFactory, minFilterFieldIndexes, maxFilterFieldIndexes, false);
} else {
btreeSearchOp = new ExternalBTreeSearchOperatorDescriptor(jobSpec, outputRecDesc, lowKeyFields, highKeyFields, lowKeyInclusive, highKeyInclusive, indexHelperFactory, retainInput, retainMissing, context.getMissingWriterFactory(), searchCallbackFactory, minFilterFieldIndexes, maxFilterFieldIndexes, ExternalDatasetsRegistry.INSTANCE.getAndLockDatasetVersion(dataset, this));
}
return new Pair<>(btreeSearchOp, spPc.second);
} catch (MetadataException me) {
throw new AlgebricksException(me);
}
}
use of org.apache.hyracks.storage.am.common.api.ISearchOperationCallbackFactory in project asterixdb by apache.
the class FramewriterTest method createWriters.
/**
* @return a list of writers to test. these writers can be of the same type but behave differently based on included mocks
* @throws HyracksDataException
* @throws IndexException
*/
public IFrameWriter[] createWriters() throws HyracksDataException {
ArrayList<BTreeSearchOperatorNodePushable> writers = new ArrayList<>();
Pair<IIndexDataflowHelperFactory, ISearchOperationCallbackFactory>[] pairs = pairs();
IRecordDescriptorProvider[] recordDescProviders = mockRecDescProviders();
int partition = 0;
IHyracksTaskContext[] ctxs = mockIHyracksTaskContext();
int[] keys = { 0 };
boolean lowKeyInclusive = true;
boolean highKeyInclusive = true;
for (Pair<IIndexDataflowHelperFactory, ISearchOperationCallbackFactory> pair : pairs) {
for (IRecordDescriptorProvider recordDescProvider : recordDescProviders) {
for (IHyracksTaskContext ctx : ctxs) {
BTreeSearchOperatorNodePushable writer = new BTreeSearchOperatorNodePushable(ctx, partition, recordDescProvider.getInputRecordDescriptor(new ActivityId(new OperatorDescriptorId(0), 0), 0), keys, keys, lowKeyInclusive, highKeyInclusive, keys, keys, pair.getLeft(), false, false, null, pair.getRight(), false);
writers.add(writer);
}
}
}
// Create the framewriter using the mocks
return writers.toArray(new IFrameWriter[writers.size()]);
}
use of org.apache.hyracks.storage.am.common.api.ISearchOperationCallbackFactory in project asterixdb by apache.
the class FramewriterTest method mockSearchOpCallbackFactories.
private ISearchOperationCallbackFactory[] mockSearchOpCallbackFactories() throws HyracksDataException {
ISearchOperationCallback searchOpCallback = mockSearchOpCallback();
ISearchOperationCallbackFactory searchOpCallbackFactory = Mockito.mock(ISearchOperationCallbackFactory.class);
Mockito.when(searchOpCallbackFactory.createSearchOperationCallback(Mockito.anyLong(), Mockito.any(IHyracksTaskContext.class), Mockito.isNull(IOperatorNodePushable.class))).thenReturn(searchOpCallback);
return new ISearchOperationCallbackFactory[] { searchOpCallbackFactory };
}
use of org.apache.hyracks.storage.am.common.api.ISearchOperationCallbackFactory in project asterixdb by apache.
the class DatasetUtil method createPrimaryIndexUpsertOp.
/**
* Creates a primary index upsert operator for a given dataset.
*
* @param spec,
* the job specification.
* @param metadataProvider,
* the metadata provider.
* @param dataset,
* the dataset to upsert.
* @param inputRecordDesc,the
* record descriptor for an input tuple.
* @param fieldPermutation,
* the field permutation according to the input.
* @param missingWriterFactory,
* the factory for customizing missing value serialization.
* @return a primary index scan operator and its location constraints.
* @throws AlgebricksException
*/
public static Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> createPrimaryIndexUpsertOp(JobSpecification spec, MetadataProvider metadataProvider, Dataset dataset, RecordDescriptor inputRecordDesc, int[] fieldPermutation, IMissingWriterFactory missingWriterFactory) throws AlgebricksException {
int numKeys = dataset.getPrimaryKeys().size();
int numFilterFields = DatasetUtil.getFilterField(dataset) == null ? 0 : 1;
ARecordType itemType = (ARecordType) metadataProvider.findType(dataset);
ARecordType metaItemType = (ARecordType) metadataProvider.findMetaType(dataset);
try {
Index primaryIndex = metadataProvider.getIndex(dataset.getDataverseName(), dataset.getDatasetName(), dataset.getDatasetName());
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset);
// prepare callback
JobId jobId = ((JobEventListenerFactory) spec.getJobletEventListenerFactory()).getJobId();
int[] primaryKeyFields = new int[numKeys];
for (int i = 0; i < numKeys; i++) {
primaryKeyFields[i] = i;
}
boolean hasSecondaries = metadataProvider.getDatasetIndexes(dataset.getDataverseName(), dataset.getDatasetName()).size() > 1;
IStorageComponentProvider storageComponentProvider = metadataProvider.getStorageComponentProvider();
IModificationOperationCallbackFactory modificationCallbackFactory = dataset.getModificationCallbackFactory(storageComponentProvider, primaryIndex, jobId, IndexOperation.UPSERT, primaryKeyFields);
ISearchOperationCallbackFactory searchCallbackFactory = dataset.getSearchCallbackFactory(storageComponentProvider, primaryIndex, jobId, IndexOperation.UPSERT, primaryKeyFields);
IIndexDataflowHelperFactory idfh = new IndexDataflowHelperFactory(storageComponentProvider.getStorageManager(), splitsAndConstraint.first);
LSMPrimaryUpsertOperatorDescriptor op;
ITypeTraits[] outputTypeTraits = new ITypeTraits[inputRecordDesc.getFieldCount() + (dataset.hasMetaPart() ? 2 : 1) + numFilterFields];
ISerializerDeserializer<?>[] outputSerDes = new ISerializerDeserializer[inputRecordDesc.getFieldCount() + (dataset.hasMetaPart() ? 2 : 1) + numFilterFields];
// add the previous record first
int f = 0;
outputSerDes[f] = FormatUtils.getDefaultFormat().getSerdeProvider().getSerializerDeserializer(itemType);
f++;
// add the previous meta second
if (dataset.hasMetaPart()) {
outputSerDes[f] = FormatUtils.getDefaultFormat().getSerdeProvider().getSerializerDeserializer(metaItemType);
outputTypeTraits[f] = FormatUtils.getDefaultFormat().getTypeTraitProvider().getTypeTrait(metaItemType);
f++;
}
// add the previous filter third
int fieldIdx = -1;
if (numFilterFields > 0) {
String filterField = DatasetUtil.getFilterField(dataset).get(0);
String[] fieldNames = itemType.getFieldNames();
int i = 0;
for (; i < fieldNames.length; i++) {
if (fieldNames[i].equals(filterField)) {
break;
}
}
fieldIdx = i;
outputTypeTraits[f] = FormatUtils.getDefaultFormat().getTypeTraitProvider().getTypeTrait(itemType.getFieldTypes()[fieldIdx]);
outputSerDes[f] = FormatUtils.getDefaultFormat().getSerdeProvider().getSerializerDeserializer(itemType.getFieldTypes()[fieldIdx]);
f++;
}
for (int j = 0; j < inputRecordDesc.getFieldCount(); j++) {
outputTypeTraits[j + f] = inputRecordDesc.getTypeTraits()[j];
outputSerDes[j + f] = inputRecordDesc.getFields()[j];
}
RecordDescriptor outputRecordDesc = new RecordDescriptor(outputSerDes, outputTypeTraits);
op = new LSMPrimaryUpsertOperatorDescriptor(spec, outputRecordDesc, fieldPermutation, idfh, missingWriterFactory, modificationCallbackFactory, searchCallbackFactory, dataset.getFrameOpCallbackFactory(), numKeys, itemType, fieldIdx, hasSecondaries);
return new Pair<>(op, splitsAndConstraint.second);
} catch (MetadataException me) {
throw new AlgebricksException(me);
}
}
Aggregations