use of org.apache.hyracks.algebricks.common.utils.Pair in project asterixdb by apache.
the class InvertedIndexPOperator method buildInvertedIndexRuntime.
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildInvertedIndexRuntime(MetadataProvider metadataProvider, JobGenContext context, JobSpecification jobSpec, AbstractUnnestMapOperator unnestMap, IOperatorSchema opSchema, boolean retainInput, boolean retainMissing, String datasetName, Dataset dataset, String indexName, ATypeTag searchKeyType, int[] keyFields, SearchModifierType searchModifierType, IAlgebricksConstantValue similarityThreshold, int[] minFilterFieldIndexes, int[] maxFilterFieldIndexes, boolean isFullTextSearchQuery) throws AlgebricksException {
try {
IAObject simThresh = ((AsterixConstantValue) similarityThreshold).getObject();
int numPrimaryKeys = dataset.getPrimaryKeys().size();
Index secondaryIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataset.getDataverseName(), dataset.getDatasetName(), indexName);
if (secondaryIndex == null) {
throw new AlgebricksException("Code generation error: no index " + indexName + " for dataset " + datasetName);
}
IVariableTypeEnvironment typeEnv = context.getTypeEnvironment(unnestMap);
RecordDescriptor outputRecDesc = JobGenHelper.mkRecordDescriptor(typeEnv, opSchema, context);
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> secondarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, indexName);
// TODO: Here we assume there is only one search key field.
int queryField = keyFields[0];
// Get tokenizer and search modifier factories.
IInvertedIndexSearchModifierFactory searchModifierFactory = InvertedIndexAccessMethod.getSearchModifierFactory(searchModifierType, simThresh, secondaryIndex);
IBinaryTokenizerFactory queryTokenizerFactory = InvertedIndexAccessMethod.getBinaryTokenizerFactory(searchModifierType, searchKeyType, secondaryIndex);
IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), secondarySplitsAndConstraint.first);
LSMInvertedIndexSearchOperatorDescriptor invIndexSearchOp = new LSMInvertedIndexSearchOperatorDescriptor(jobSpec, outputRecDesc, queryField, dataflowHelperFactory, queryTokenizerFactory, searchModifierFactory, retainInput, retainMissing, context.getMissingWriterFactory(), dataset.getSearchCallbackFactory(metadataProvider.getStorageComponentProvider(), secondaryIndex, ((JobEventListenerFactory) jobSpec.getJobletEventListenerFactory()).getJobId(), IndexOperation.SEARCH, null), minFilterFieldIndexes, maxFilterFieldIndexes, isFullTextSearchQuery, numPrimaryKeys, false);
return new Pair<>(invIndexSearchOp, secondarySplitsAndConstraint.second);
} catch (MetadataException e) {
throw new AlgebricksException(e);
}
}
use of org.apache.hyracks.algebricks.common.utils.Pair in project asterixdb by apache.
the class DefaultRuleSetFactory method buildPhysical.
public static List<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>> buildPhysical(ICcApplicationContext appCtx) {
List<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>> defaultPhysicalRewrites = new ArrayList<>();
SequentialOnceRuleController seqOnceCtrl = new SequentialOnceRuleController(true);
SequentialOnceRuleController seqOnceTopLevel = new SequentialOnceRuleController(false);
defaultPhysicalRewrites.add(new Pair<>(seqOnceCtrl, RuleCollections.buildPhysicalRewritesAllLevelsRuleCollection()));
defaultPhysicalRewrites.add(new Pair<>(seqOnceTopLevel, RuleCollections.buildPhysicalRewritesTopLevelRuleCollection(appCtx)));
defaultPhysicalRewrites.add(new Pair<>(seqOnceCtrl, RuleCollections.prepareForJobGenRuleCollection()));
return defaultPhysicalRewrites;
}
use of org.apache.hyracks.algebricks.common.utils.Pair in project asterixdb by apache.
the class AnalysisUtil method getExternalDatasetInfo.
public static Pair<String, String> getExternalDatasetInfo(UnnestMapOperator op) throws AlgebricksException {
AbstractFunctionCallExpression unnestExpr = (AbstractFunctionCallExpression) op.getExpressionRef().getValue();
String dataverseName = AccessMethodUtils.getStringConstant(unnestExpr.getArguments().get(0));
String datasetName = AccessMethodUtils.getStringConstant(unnestExpr.getArguments().get(1));
return new Pair<String, String>(dataverseName, datasetName);
}
use of org.apache.hyracks.algebricks.common.utils.Pair in project asterixdb by apache.
the class FeedDataSource method buildDatasourceScanRuntime.
@Override
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildDatasourceScanRuntime(MetadataProvider metadataProvider, IDataSource<DataSourceId> dataSource, List<LogicalVariable> scanVariables, List<LogicalVariable> projectVariables, boolean projectPushed, List<LogicalVariable> minFilterVars, List<LogicalVariable> maxFilterVars, IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv, JobGenContext context, JobSpecification jobSpec, Object implConfig) throws AlgebricksException {
try {
ARecordType feedOutputType = (ARecordType) itemType;
ISerializerDeserializer payloadSerde = NonTaggedDataFormat.INSTANCE.getSerdeProvider().getSerializerDeserializer(feedOutputType);
ArrayList<ISerializerDeserializer> serdes = new ArrayList<>();
serdes.add(payloadSerde);
if (metaItemType != null) {
serdes.add(SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(metaItemType));
}
if (pkTypes != null) {
for (IAType type : pkTypes) {
serdes.add(SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(type));
}
}
RecordDescriptor feedDesc = new RecordDescriptor(serdes.toArray(new ISerializerDeserializer[serdes.size()]));
FeedPolicyEntity feedPolicy = (FeedPolicyEntity) getProperties().get(BuiltinFeedPolicies.CONFIG_FEED_POLICY_KEY);
if (feedPolicy == null) {
throw new AlgebricksException("Feed not configured with a policy");
}
feedPolicy.getProperties().put(BuiltinFeedPolicies.CONFIG_FEED_POLICY_KEY, feedPolicy.getPolicyName());
FeedConnectionId feedConnectionId = new FeedConnectionId(getId().getDataverseName(), getId().getDatasourceName(), getTargetDataset());
FeedCollectOperatorDescriptor feedCollector = new FeedCollectOperatorDescriptor(jobSpec, feedConnectionId, feedOutputType, feedDesc, feedPolicy.getProperties(), getLocation());
return new Pair<>(feedCollector, new AlgebricksAbsolutePartitionConstraint(getLocations()));
} catch (Exception e) {
throw new AlgebricksException(e);
}
}
use of org.apache.hyracks.algebricks.common.utils.Pair in project asterixdb by apache.
the class MetadataProvider method buildExternalDataLookupRuntime.
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildExternalDataLookupRuntime(JobSpecification jobSpec, Dataset dataset, int[] ridIndexes, boolean retainInput, IVariableTypeEnvironment typeEnv, IOperatorSchema opSchema, JobGenContext context, MetadataProvider metadataProvider, boolean retainMissing) throws AlgebricksException {
try {
// Get data type
ARecordType itemType = (ARecordType) MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(), dataset.getDataverseName(), dataset.getItemTypeName()).getDatatype();
ExternalDatasetDetails datasetDetails = (ExternalDatasetDetails) dataset.getDatasetDetails();
LookupAdapterFactory<?> adapterFactory = AdapterFactoryProvider.getLookupAdapterFactory(getApplicationContext().getServiceContext(), datasetDetails.getProperties(), itemType, ridIndexes, retainInput, retainMissing, context.getMissingWriterFactory());
String fileIndexName = IndexingConstants.getFilesIndexName(dataset.getDatasetName());
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> spPc = metadataProvider.getSplitProviderAndConstraints(dataset, fileIndexName);
Index fileIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), fileIndexName);
// Create the file index data flow helper
IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(storaegComponentProvider.getStorageManager(), spPc.first);
// Create the out record descriptor, appContext and fileSplitProvider for the files index
RecordDescriptor outRecDesc = JobGenHelper.mkRecordDescriptor(typeEnv, opSchema, context);
ISearchOperationCallbackFactory searchOpCallbackFactory = dataset.getSearchCallbackFactory(storaegComponentProvider, fileIndex, jobId, IndexOperation.SEARCH, null);
// Create the operator
ExternalLookupOperatorDescriptor op = new ExternalLookupOperatorDescriptor(jobSpec, adapterFactory, outRecDesc, indexDataflowHelperFactory, searchOpCallbackFactory, ExternalDatasetsRegistry.INSTANCE.getAndLockDatasetVersion(dataset, this));
return new Pair<>(op, spPc.second);
} catch (Exception e) {
throw new AlgebricksException(e);
}
}
Aggregations