Search in sources :

Example 1 with LSMInvertedIndexSearchOperatorDescriptor

use of org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorDescriptor in project asterixdb by apache.

the class InvertedIndexPOperator method buildInvertedIndexRuntime.

public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildInvertedIndexRuntime(MetadataProvider metadataProvider, JobGenContext context, JobSpecification jobSpec, AbstractUnnestMapOperator unnestMap, IOperatorSchema opSchema, boolean retainInput, boolean retainMissing, String datasetName, Dataset dataset, String indexName, ATypeTag searchKeyType, int[] keyFields, SearchModifierType searchModifierType, IAlgebricksConstantValue similarityThreshold, int[] minFilterFieldIndexes, int[] maxFilterFieldIndexes, boolean isFullTextSearchQuery) throws AlgebricksException {
    try {
        IAObject simThresh = ((AsterixConstantValue) similarityThreshold).getObject();
        int numPrimaryKeys = dataset.getPrimaryKeys().size();
        Index secondaryIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataset.getDataverseName(), dataset.getDatasetName(), indexName);
        if (secondaryIndex == null) {
            throw new AlgebricksException("Code generation error: no index " + indexName + " for dataset " + datasetName);
        }
        IVariableTypeEnvironment typeEnv = context.getTypeEnvironment(unnestMap);
        RecordDescriptor outputRecDesc = JobGenHelper.mkRecordDescriptor(typeEnv, opSchema, context);
        Pair<IFileSplitProvider, AlgebricksPartitionConstraint> secondarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, indexName);
        // TODO: Here we assume there is only one search key field.
        int queryField = keyFields[0];
        // Get tokenizer and search modifier factories.
        IInvertedIndexSearchModifierFactory searchModifierFactory = InvertedIndexAccessMethod.getSearchModifierFactory(searchModifierType, simThresh, secondaryIndex);
        IBinaryTokenizerFactory queryTokenizerFactory = InvertedIndexAccessMethod.getBinaryTokenizerFactory(searchModifierType, searchKeyType, secondaryIndex);
        IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), secondarySplitsAndConstraint.first);
        LSMInvertedIndexSearchOperatorDescriptor invIndexSearchOp = new LSMInvertedIndexSearchOperatorDescriptor(jobSpec, outputRecDesc, queryField, dataflowHelperFactory, queryTokenizerFactory, searchModifierFactory, retainInput, retainMissing, context.getMissingWriterFactory(), dataset.getSearchCallbackFactory(metadataProvider.getStorageComponentProvider(), secondaryIndex, ((JobEventListenerFactory) jobSpec.getJobletEventListenerFactory()).getJobId(), IndexOperation.SEARCH, null), minFilterFieldIndexes, maxFilterFieldIndexes, isFullTextSearchQuery, numPrimaryKeys, false);
        return new Pair<>(invIndexSearchOp, secondarySplitsAndConstraint.second);
    } catch (MetadataException e) {
        throw new AlgebricksException(e);
    }
}
Also used : RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) IAObject(org.apache.asterix.om.base.IAObject) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) Index(org.apache.asterix.metadata.entities.Index) IDataSourceIndex(org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex) IInvertedIndexSearchModifierFactory(org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifierFactory) JobEventListenerFactory(org.apache.asterix.runtime.job.listener.JobEventListenerFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) MetadataException(org.apache.asterix.metadata.MetadataException) LSMInvertedIndexSearchOperatorDescriptor(org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorDescriptor) AsterixConstantValue(org.apache.asterix.om.constants.AsterixConstantValue) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IBinaryTokenizerFactory(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) IndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory) IIndexDataflowHelperFactory(org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Aggregations

MetadataException (org.apache.asterix.metadata.MetadataException)1 Index (org.apache.asterix.metadata.entities.Index)1 IAObject (org.apache.asterix.om.base.IAObject)1 AsterixConstantValue (org.apache.asterix.om.constants.AsterixConstantValue)1 JobEventListenerFactory (org.apache.asterix.runtime.job.listener.JobEventListenerFactory)1 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)1 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)1 Pair (org.apache.hyracks.algebricks.common.utils.Pair)1 IVariableTypeEnvironment (org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment)1 IDataSourceIndex (org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex)1 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)1 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)1 IIndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)1 IndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)1 IInvertedIndexSearchModifierFactory (org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifierFactory)1 LSMInvertedIndexSearchOperatorDescriptor (org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorDescriptor)1 IBinaryTokenizerFactory (org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory)1