Search in sources :

Example 1 with IDataSourceIndex

use of org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex in project asterixdb by apache.

the class MetadataProvider method findDataSourceIndex.

@Override
public IDataSourceIndex<String, DataSourceId> findDataSourceIndex(String indexId, DataSourceId dataSourceId) throws AlgebricksException {
    DataSource source = findDataSource(dataSourceId);
    Dataset dataset = ((DatasetDataSource) source).getDataset();
    String indexName = indexId;
    Index secondaryIndex = getIndex(dataset.getDataverseName(), dataset.getDatasetName(), indexName);
    return (secondaryIndex != null) ? new DataSourceIndex(secondaryIndex, dataset.getDataverseName(), dataset.getDatasetName(), this) : null;
}
Also used : Dataset(org.apache.asterix.metadata.entities.Dataset) IDataSourceIndex(org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex) Index(org.apache.asterix.metadata.entities.Index) IDataSourceIndex(org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex) IDataSource(org.apache.hyracks.algebricks.core.algebra.metadata.IDataSource)

Example 2 with IDataSourceIndex

use of org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex in project asterixdb by apache.

the class MetadataProvider method getTokenizerRuntime.

@Override
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getTokenizerRuntime(IDataSourceIndex<String, DataSourceId> dataSourceIndex, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas, IVariableTypeEnvironment typeEnv, List<LogicalVariable> primaryKeys, List<LogicalVariable> secondaryKeys, ILogicalExpression filterExpr, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec, boolean bulkload) throws AlgebricksException {
    String indexName = dataSourceIndex.getId();
    String dataverseName = dataSourceIndex.getDataSource().getId().getDataverseName();
    String datasetName = dataSourceIndex.getDataSource().getId().getDatasourceName();
    IOperatorSchema inputSchema;
    if (inputSchemas.length > 0) {
        inputSchema = inputSchemas[0];
    } else {
        throw new AlgebricksException("TokenizeOperator can not operate without any input variable.");
    }
    Dataset dataset = MetadataManagerUtil.findExistingDataset(mdTxnCtx, dataverseName, datasetName);
    Index secondaryIndex;
    try {
        secondaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), indexName);
    } catch (MetadataException e) {
        throw new AlgebricksException(e);
    }
    // TokenizeOperator only supports a keyword or n-gram index.
    switch(secondaryIndex.getIndexType()) {
        case SINGLE_PARTITION_WORD_INVIX:
        case SINGLE_PARTITION_NGRAM_INVIX:
        case LENGTH_PARTITIONED_WORD_INVIX:
        case LENGTH_PARTITIONED_NGRAM_INVIX:
            return getBinaryTokenizerRuntime(dataverseName, datasetName, indexName, inputSchema, propagatedSchema, primaryKeys, secondaryKeys, recordDesc, spec, secondaryIndex.getIndexType());
        default:
            throw new AlgebricksException("Currently, we do not support TokenizeOperator for the index type: " + secondaryIndex.getIndexType());
    }
}
Also used : IOperatorSchema(org.apache.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema) Dataset(org.apache.asterix.metadata.entities.Dataset) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) Index(org.apache.asterix.metadata.entities.Index) IDataSourceIndex(org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex) MetadataException(org.apache.asterix.metadata.MetadataException)

Example 3 with IDataSourceIndex

use of org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex in project asterixdb by apache.

the class MetadataProvider method getIndexInsertOrDeleteOrUpsertRuntime.

private Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getIndexInsertOrDeleteOrUpsertRuntime(IndexOperation indexOp, IDataSourceIndex<String, DataSourceId> dataSourceIndex, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas, IVariableTypeEnvironment typeEnv, List<LogicalVariable> primaryKeys, List<LogicalVariable> secondaryKeys, List<LogicalVariable> additionalNonKeyFields, ILogicalExpression filterExpr, RecordDescriptor inputRecordDesc, JobGenContext context, JobSpecification spec, boolean bulkload, List<LogicalVariable> prevSecondaryKeys, LogicalVariable prevAdditionalFilteringKey) throws AlgebricksException {
    String indexName = dataSourceIndex.getId();
    String dataverseName = dataSourceIndex.getDataSource().getId().getDataverseName();
    String datasetName = dataSourceIndex.getDataSource().getId().getDatasourceName();
    Dataset dataset = MetadataManagerUtil.findExistingDataset(mdTxnCtx, dataverseName, datasetName);
    Index secondaryIndex;
    try {
        secondaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), indexName);
    } catch (MetadataException e) {
        throw new AlgebricksException(e);
    }
    ArrayList<LogicalVariable> prevAdditionalFilteringKeys = null;
    if (indexOp == IndexOperation.UPSERT && prevAdditionalFilteringKey != null) {
        prevAdditionalFilteringKeys = new ArrayList<>();
        prevAdditionalFilteringKeys.add(prevAdditionalFilteringKey);
    }
    AsterixTupleFilterFactory filterFactory = createTupleFilterFactory(inputSchemas, typeEnv, filterExpr, context);
    switch(secondaryIndex.getIndexType()) {
        case BTREE:
            return getBTreeRuntime(dataverseName, datasetName, indexName, propagatedSchema, primaryKeys, secondaryKeys, additionalNonKeyFields, filterFactory, inputRecordDesc, context, spec, indexOp, bulkload, prevSecondaryKeys, prevAdditionalFilteringKeys);
        case RTREE:
            return getRTreeRuntime(dataverseName, datasetName, indexName, propagatedSchema, primaryKeys, secondaryKeys, additionalNonKeyFields, filterFactory, inputRecordDesc, context, spec, indexOp, bulkload, prevSecondaryKeys, prevAdditionalFilteringKeys);
        case SINGLE_PARTITION_WORD_INVIX:
        case SINGLE_PARTITION_NGRAM_INVIX:
        case LENGTH_PARTITIONED_WORD_INVIX:
        case LENGTH_PARTITIONED_NGRAM_INVIX:
            return getInvertedIndexRuntime(dataverseName, datasetName, indexName, propagatedSchema, primaryKeys, secondaryKeys, additionalNonKeyFields, filterFactory, inputRecordDesc, context, spec, indexOp, secondaryIndex.getIndexType(), bulkload, prevSecondaryKeys, prevAdditionalFilteringKeys);
        default:
            throw new AlgebricksException(indexOp.name() + "Insert, upsert, and delete not implemented for index type: " + secondaryIndex.getIndexType());
    }
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) Dataset(org.apache.asterix.metadata.entities.Dataset) AsterixTupleFilterFactory(org.apache.asterix.runtime.base.AsterixTupleFilterFactory) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) Index(org.apache.asterix.metadata.entities.Index) IDataSourceIndex(org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex) MetadataException(org.apache.asterix.metadata.MetadataException)

Example 4 with IDataSourceIndex

use of org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex in project asterixdb by apache.

the class SetAsterixPhysicalOperatorsRule method computeDefaultPhysicalOp.

private static void computeDefaultPhysicalOp(AbstractLogicalOperator op, IOptimizationContext context) throws AlgebricksException {
    PhysicalOptimizationConfig physicalOptimizationConfig = context.getPhysicalOptimizationConfig();
    if (op.getOperatorTag().equals(LogicalOperatorTag.GROUP)) {
        GroupByOperator gby = (GroupByOperator) op;
        if (gby.getNestedPlans().size() == 1) {
            ILogicalPlan p0 = gby.getNestedPlans().get(0);
            if (p0.getRoots().size() == 1) {
                Mutable<ILogicalOperator> r0 = p0.getRoots().get(0);
                if (((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().equals(LogicalOperatorTag.AGGREGATE)) {
                    AggregateOperator aggOp = (AggregateOperator) r0.getValue();
                    boolean serializable = true;
                    for (Mutable<ILogicalExpression> exprRef : aggOp.getExpressions()) {
                        AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) exprRef.getValue();
                        if (!BuiltinFunctions.isAggregateFunctionSerializable(expr.getFunctionIdentifier())) {
                            serializable = false;
                            break;
                        }
                    }
                    if ((gby.getAnnotations().get(OperatorAnnotations.USE_HASH_GROUP_BY) == Boolean.TRUE || gby.getAnnotations().get(OperatorAnnotations.USE_EXTERNAL_GROUP_BY) == Boolean.TRUE)) {
                        boolean setToExternalGby = false;
                        if (serializable) {
                            // if serializable, use external group-by
                            // now check whether the serialized version aggregation function has corresponding intermediate agg
                            boolean hasIntermediateAgg = true;
                            IMergeAggregationExpressionFactory mergeAggregationExpressionFactory = context.getMergeAggregationExpressionFactory();
                            List<LogicalVariable> originalVariables = aggOp.getVariables();
                            List<Mutable<ILogicalExpression>> aggExprs = aggOp.getExpressions();
                            int aggNum = aggExprs.size();
                            for (int i = 0; i < aggNum; i++) {
                                AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) aggExprs.get(i).getValue();
                                AggregateFunctionCallExpression serialAggExpr = BuiltinFunctions.makeSerializableAggregateFunctionExpression(expr.getFunctionIdentifier(), expr.getArguments());
                                if (mergeAggregationExpressionFactory.createMergeAggregation(originalVariables.get(i), serialAggExpr, context) == null) {
                                    hasIntermediateAgg = false;
                                    break;
                                }
                            }
                            // Check whether there are multiple aggregates in the sub plan.
                            // Currently, we don't support multiple aggregates in one external group-by.
                            boolean multipleAggOpsFound = false;
                            ILogicalOperator r1Logical = aggOp;
                            while (r1Logical.hasInputs()) {
                                r1Logical = r1Logical.getInputs().get(0).getValue();
                                if (r1Logical.getOperatorTag() == LogicalOperatorTag.AGGREGATE) {
                                    multipleAggOpsFound = true;
                                    break;
                                }
                            }
                            if (hasIntermediateAgg && !multipleAggOpsFound) {
                                for (int i = 0; i < aggNum; i++) {
                                    AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) aggExprs.get(i).getValue();
                                    AggregateFunctionCallExpression serialAggExpr = BuiltinFunctions.makeSerializableAggregateFunctionExpression(expr.getFunctionIdentifier(), expr.getArguments());
                                    aggOp.getExpressions().get(i).setValue(serialAggExpr);
                                }
                                ExternalGroupByPOperator externalGby = new ExternalGroupByPOperator(gby.getGroupByList(), physicalOptimizationConfig.getMaxFramesExternalGroupBy(), (long) physicalOptimizationConfig.getMaxFramesExternalGroupBy() * physicalOptimizationConfig.getFrameSize());
                                generateMergeAggregationExpressions(gby, context);
                                op.setPhysicalOperator(externalGby);
                                setToExternalGby = true;
                            }
                        }
                        if (!setToExternalGby) {
                            // if not serializable or no intermediate agg, use pre-clustered group-by
                            List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> gbyList = gby.getGroupByList();
                            List<LogicalVariable> columnList = new ArrayList<LogicalVariable>(gbyList.size());
                            for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gbyList) {
                                ILogicalExpression expr = p.second.getValue();
                                if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
                                    VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
                                    columnList.add(varRef.getVariableReference());
                                }
                            }
                            op.setPhysicalOperator(new PreclusteredGroupByPOperator(columnList, gby.isGroupAll()));
                        }
                    }
                } else if (((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().equals(LogicalOperatorTag.RUNNINGAGGREGATE)) {
                    List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> gbyList = gby.getGroupByList();
                    List<LogicalVariable> columnList = new ArrayList<LogicalVariable>(gbyList.size());
                    for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gbyList) {
                        ILogicalExpression expr = p.second.getValue();
                        if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
                            VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
                            columnList.add(varRef.getVariableReference());
                        }
                    }
                    op.setPhysicalOperator(new PreclusteredGroupByPOperator(columnList, gby.isGroupAll()));
                } else {
                    throw new AlgebricksException("Unsupported nested operator within a group-by: " + ((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().name());
                }
            }
        }
    }
    if (op.getPhysicalOperator() == null) {
        switch(op.getOperatorTag()) {
            case INNERJOIN:
                {
                    JoinUtils.setJoinAlgorithmAndExchangeAlgo((InnerJoinOperator) op, context);
                    break;
                }
            case LEFTOUTERJOIN:
                {
                    JoinUtils.setJoinAlgorithmAndExchangeAlgo((LeftOuterJoinOperator) op, context);
                    break;
                }
            case UNNEST_MAP:
            case LEFT_OUTER_UNNEST_MAP:
                {
                    ILogicalExpression unnestExpr = null;
                    unnestExpr = ((AbstractUnnestMapOperator) op).getExpressionRef().getValue();
                    if (unnestExpr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
                        AbstractFunctionCallExpression f = (AbstractFunctionCallExpression) unnestExpr;
                        FunctionIdentifier fid = f.getFunctionIdentifier();
                        if (!fid.equals(BuiltinFunctions.INDEX_SEARCH)) {
                            throw new IllegalStateException();
                        }
                        AccessMethodJobGenParams jobGenParams = new AccessMethodJobGenParams();
                        jobGenParams.readFromFuncArgs(f.getArguments());
                        MetadataProvider mp = (MetadataProvider) context.getMetadataProvider();
                        DataSourceId dataSourceId = new DataSourceId(jobGenParams.getDataverseName(), jobGenParams.getDatasetName());
                        Dataset dataset = mp.findDataset(jobGenParams.getDataverseName(), jobGenParams.getDatasetName());
                        IDataSourceIndex<String, DataSourceId> dsi = mp.findDataSourceIndex(jobGenParams.getIndexName(), dataSourceId);
                        INodeDomain storageDomain = mp.findNodeDomain(dataset.getNodeGroupName());
                        if (dsi == null) {
                            throw new AlgebricksException("Could not find index " + jobGenParams.getIndexName() + " for dataset " + dataSourceId);
                        }
                        IndexType indexType = jobGenParams.getIndexType();
                        boolean requiresBroadcast = jobGenParams.getRequiresBroadcast();
                        switch(indexType) {
                            case BTREE:
                                {
                                    BTreeJobGenParams btreeJobGenParams = new BTreeJobGenParams();
                                    btreeJobGenParams.readFromFuncArgs(f.getArguments());
                                    op.setPhysicalOperator(new BTreeSearchPOperator(dsi, storageDomain, requiresBroadcast, btreeJobGenParams.isPrimaryIndex(), btreeJobGenParams.isEqCondition(), btreeJobGenParams.getLowKeyVarList(), btreeJobGenParams.getHighKeyVarList()));
                                    break;
                                }
                            case RTREE:
                                {
                                    op.setPhysicalOperator(new RTreeSearchPOperator(dsi, storageDomain, requiresBroadcast));
                                    break;
                                }
                            case SINGLE_PARTITION_WORD_INVIX:
                            case SINGLE_PARTITION_NGRAM_INVIX:
                                {
                                    op.setPhysicalOperator(new InvertedIndexPOperator(dsi, storageDomain, requiresBroadcast, false));
                                    break;
                                }
                            case LENGTH_PARTITIONED_WORD_INVIX:
                            case LENGTH_PARTITIONED_NGRAM_INVIX:
                                {
                                    op.setPhysicalOperator(new InvertedIndexPOperator(dsi, storageDomain, requiresBroadcast, true));
                                    break;
                                }
                            default:
                                {
                                    throw new NotImplementedException(indexType + " indexes are not implemented.");
                                }
                        }
                    }
                    break;
                }
        }
    }
    if (op.hasNestedPlans()) {
        AbstractOperatorWithNestedPlans nested = (AbstractOperatorWithNestedPlans) op;
        for (ILogicalPlan p : nested.getNestedPlans()) {
            setPhysicalOperators(p, context);
        }
    }
    for (Mutable<ILogicalOperator> opRef : op.getInputs()) {
        computeDefaultPhysicalOp((AbstractLogicalOperator) opRef.getValue(), context);
    }
}
Also used : IMergeAggregationExpressionFactory(org.apache.hyracks.algebricks.core.algebra.expressions.IMergeAggregationExpressionFactory) PreclusteredGroupByPOperator(org.apache.hyracks.algebricks.core.algebra.operators.physical.PreclusteredGroupByPOperator) NotImplementedException(org.apache.hyracks.algebricks.common.exceptions.NotImplementedException) ArrayList(java.util.ArrayList) LeftOuterJoinOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.LeftOuterJoinOperator) InnerJoinOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator) AccessMethodJobGenParams(org.apache.asterix.optimizer.rules.am.AccessMethodJobGenParams) AggregateOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator) IDataSourceIndex(org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex) ArrayList(java.util.ArrayList) List(java.util.List) IndexType(org.apache.asterix.common.config.DatasetConfig.IndexType) PhysicalOptimizationConfig(org.apache.hyracks.algebricks.core.rewriter.base.PhysicalOptimizationConfig) Pair(org.apache.hyracks.algebricks.common.utils.Pair) AbstractOperatorWithNestedPlans(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractOperatorWithNestedPlans) LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) AggregateFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression) GroupByOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator) AbstractLogicalOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator) RTreeSearchPOperator(org.apache.asterix.algebra.operators.physical.RTreeSearchPOperator) Dataset(org.apache.asterix.metadata.entities.Dataset) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) AbstractFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression) BTreeJobGenParams(org.apache.asterix.optimizer.rules.am.BTreeJobGenParams) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) INodeDomain(org.apache.hyracks.algebricks.core.algebra.properties.INodeDomain) InvertedIndexPOperator(org.apache.asterix.algebra.operators.physical.InvertedIndexPOperator) BTreeSearchPOperator(org.apache.asterix.algebra.operators.physical.BTreeSearchPOperator) Mutable(org.apache.commons.lang3.mutable.Mutable) FunctionIdentifier(org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) MetadataProvider(org.apache.asterix.metadata.declared.MetadataProvider) VariableReferenceExpression(org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression) ILogicalPlan(org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan) DataSourceId(org.apache.asterix.metadata.declared.DataSourceId) ExternalGroupByPOperator(org.apache.hyracks.algebricks.core.algebra.operators.physical.ExternalGroupByPOperator)

Aggregations

Dataset (org.apache.asterix.metadata.entities.Dataset)4 IDataSourceIndex (org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex)4 Index (org.apache.asterix.metadata.entities.Index)3 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)3 MetadataException (org.apache.asterix.metadata.MetadataException)2 LogicalVariable (org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable)2 ArrayList (java.util.ArrayList)1 List (java.util.List)1 BTreeSearchPOperator (org.apache.asterix.algebra.operators.physical.BTreeSearchPOperator)1 InvertedIndexPOperator (org.apache.asterix.algebra.operators.physical.InvertedIndexPOperator)1 RTreeSearchPOperator (org.apache.asterix.algebra.operators.physical.RTreeSearchPOperator)1 IndexType (org.apache.asterix.common.config.DatasetConfig.IndexType)1 DataSourceId (org.apache.asterix.metadata.declared.DataSourceId)1 MetadataProvider (org.apache.asterix.metadata.declared.MetadataProvider)1 AccessMethodJobGenParams (org.apache.asterix.optimizer.rules.am.AccessMethodJobGenParams)1 BTreeJobGenParams (org.apache.asterix.optimizer.rules.am.BTreeJobGenParams)1 AsterixTupleFilterFactory (org.apache.asterix.runtime.base.AsterixTupleFilterFactory)1 Mutable (org.apache.commons.lang3.mutable.Mutable)1 NotImplementedException (org.apache.hyracks.algebricks.common.exceptions.NotImplementedException)1 Pair (org.apache.hyracks.algebricks.common.utils.Pair)1