Search in sources :

Example 31 with Dataset

use of org.apache.asterix.metadata.entities.Dataset in project asterixdb by apache.

the class RTreeAccessMethod method applyJoinPlanTransformation.

@Override
public boolean applyJoinPlanTransformation(Mutable<ILogicalOperator> joinRef, OptimizableOperatorSubTree leftSubTree, OptimizableOperatorSubTree rightSubTree, Index chosenIndex, AccessMethodAnalysisContext analysisCtx, IOptimizationContext context, boolean isLeftOuterJoin, boolean hasGroupBy) throws AlgebricksException {
    // Determine if the index is applicable on the left or right side (if both, we arbitrarily prefer the left
    // side).
    Dataset dataset = analysisCtx.getDatasetFromIndexDatasetMap(chosenIndex);
    OptimizableOperatorSubTree indexSubTree;
    OptimizableOperatorSubTree probeSubTree;
    // The following is just a sanity check.
    if (rightSubTree.hasDataSourceScan() && dataset.getDatasetName().equals(rightSubTree.getDataset().getDatasetName())) {
        indexSubTree = rightSubTree;
        probeSubTree = leftSubTree;
    } else {
        return false;
    }
    LogicalVariable newNullPlaceHolderVar = null;
    if (isLeftOuterJoin) {
        // get a new null place holder variable that is the first field variable of the primary key
        // from the indexSubTree's datasourceScanOp
        newNullPlaceHolderVar = indexSubTree.getDataSourceVariables().get(0);
    }
    // TODO: We can probably do something smarter here based on selectivity or MBR area.
    ILogicalOperator primaryIndexUnnestOp = createSecondaryToPrimaryPlan(indexSubTree, probeSubTree, chosenIndex, analysisCtx, true, isLeftOuterJoin, true, context);
    if (primaryIndexUnnestOp == null) {
        return false;
    }
    if (isLeftOuterJoin && hasGroupBy) {
        // reset the null place holder variable
        AccessMethodUtils.resetLOJNullPlaceholderVariableInGroupByOp(analysisCtx, newNullPlaceHolderVar, context);
    }
    indexSubTree.getDataSourceRef().setValue(primaryIndexUnnestOp);
    // Change join into a select with the same condition.
    AbstractBinaryJoinOperator joinOp = (AbstractBinaryJoinOperator) joinRef.getValue();
    SelectOperator topSelect = new SelectOperator(joinOp.getCondition(), isLeftOuterJoin, newNullPlaceHolderVar);
    topSelect.getInputs().add(indexSubTree.getRootRef());
    topSelect.setExecutionMode(ExecutionMode.LOCAL);
    context.computeAndSetTypeEnvironmentForOperator(topSelect);
    // Replace the original join with the new subtree rooted at the select op.
    joinRef.setValue(topSelect);
    return true;
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) SelectOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator) Dataset(org.apache.asterix.metadata.entities.Dataset) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) AbstractBinaryJoinOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator)

Example 32 with Dataset

use of org.apache.asterix.metadata.entities.Dataset in project asterixdb by apache.

the class RTreeAccessMethod method createSecondaryToPrimaryPlan.

private ILogicalOperator createSecondaryToPrimaryPlan(OptimizableOperatorSubTree indexSubTree, OptimizableOperatorSubTree probeSubTree, Index chosenIndex, AccessMethodAnalysisContext analysisCtx, boolean retainInput, boolean retainNull, boolean requiresBroadcast, IOptimizationContext context) throws AlgebricksException {
    IOptimizableFuncExpr optFuncExpr = AccessMethodUtils.chooseFirstOptFuncExpr(chosenIndex, analysisCtx);
    Dataset dataset = indexSubTree.getDataset();
    ARecordType recordType = indexSubTree.getRecordType();
    ARecordType metaRecordType = indexSubTree.getMetaRecordType();
    int optFieldIdx = AccessMethodUtils.chooseFirstOptFuncVar(chosenIndex, analysisCtx);
    Pair<IAType, Boolean> keyPairType = Index.getNonNullableOpenFieldType(optFuncExpr.getFieldType(optFieldIdx), optFuncExpr.getFieldName(optFieldIdx), recordType);
    if (keyPairType == null) {
        return null;
    }
    // Get the number of dimensions corresponding to the field indexed by chosenIndex.
    IAType spatialType = keyPairType.first;
    int numDimensions = NonTaggedFormatUtil.getNumDimensions(spatialType.getTypeTag());
    int numSecondaryKeys = numDimensions * 2;
    // we made sure indexSubTree has datasource scan
    AbstractDataSourceOperator dataSourceOp = (AbstractDataSourceOperator) indexSubTree.getDataSourceRef().getValue();
    RTreeJobGenParams jobGenParams = new RTreeJobGenParams(chosenIndex.getIndexName(), IndexType.RTREE, dataset.getDataverseName(), dataset.getDatasetName(), retainInput, requiresBroadcast);
    // A spatial object is serialized in the constant of the func expr we are optimizing.
    // The R-Tree expects as input an MBR represented with 1 field per dimension.
    // Here we generate vars and funcs for extracting MBR fields from the constant into fields of a tuple (as the
    // R-Tree expects them).
    // List of variables for the assign.
    ArrayList<LogicalVariable> keyVarList = new ArrayList<>();
    // List of expressions for the assign.
    ArrayList<Mutable<ILogicalExpression>> keyExprList = new ArrayList<>();
    Pair<ILogicalExpression, Boolean> returnedSearchKeyExpr = AccessMethodUtils.createSearchKeyExpr(optFuncExpr, indexSubTree, probeSubTree);
    ILogicalExpression searchKeyExpr = returnedSearchKeyExpr.first;
    for (int i = 0; i < numSecondaryKeys; i++) {
        // The create MBR function "extracts" one field of an MBR around the given spatial object.
        AbstractFunctionCallExpression createMBR = new ScalarFunctionCallExpression(FunctionUtil.getFunctionInfo(BuiltinFunctions.CREATE_MBR));
        // Spatial object is the constant from the func expr we are optimizing.
        createMBR.getArguments().add(new MutableObject<>(searchKeyExpr));
        // The number of dimensions.
        createMBR.getArguments().add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(new AInt32(numDimensions)))));
        // Which part of the MBR to extract.
        createMBR.getArguments().add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(new AInt32(i)))));
        // Add a variable and its expr to the lists which will be passed into an assign op.
        LogicalVariable keyVar = context.newVar();
        keyVarList.add(keyVar);
        keyExprList.add(new MutableObject<ILogicalExpression>(createMBR));
    }
    jobGenParams.setKeyVarList(keyVarList);
    // Assign operator that "extracts" the MBR fields from the func-expr constant into a tuple.
    AssignOperator assignSearchKeys = new AssignOperator(keyVarList, keyExprList);
    if (probeSubTree == null) {
        // We are optimizing a selection query.
        // Input to this assign is the EmptyTupleSource (which the dataSourceScan also must have had as input).
        assignSearchKeys.getInputs().add(new MutableObject<>(OperatorManipulationUtil.deepCopy(dataSourceOp.getInputs().get(0).getValue())));
        assignSearchKeys.setExecutionMode(dataSourceOp.getExecutionMode());
    } else {
        // We are optimizing a join, place the assign op top of the probe subtree.
        assignSearchKeys.getInputs().add(probeSubTree.getRootRef());
    }
    ILogicalOperator secondaryIndexUnnestOp = AccessMethodUtils.createSecondaryIndexUnnestMap(dataset, recordType, metaRecordType, chosenIndex, assignSearchKeys, jobGenParams, context, false, retainInput, retainNull);
    // Generate the rest of the upstream plan which feeds the search results into the primary index.
    return dataset.getDatasetType() == DatasetType.EXTERNAL ? AccessMethodUtils.createExternalDataLookupUnnestMap(dataSourceOp, dataset, recordType, secondaryIndexUnnestOp, context, retainInput, retainNull) : AccessMethodUtils.createPrimaryIndexUnnestMap(dataSourceOp, dataset, recordType, metaRecordType, secondaryIndexUnnestOp, context, true, retainInput, false, false);
}
Also used : ConstantExpression(org.apache.hyracks.algebricks.core.algebra.expressions.ConstantExpression) ArrayList(java.util.ArrayList) AsterixConstantValue(org.apache.asterix.om.constants.AsterixConstantValue) ScalarFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression) LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) AbstractDataSourceOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractDataSourceOperator) Dataset(org.apache.asterix.metadata.entities.Dataset) AbstractFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) AssignOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator) AInt32(org.apache.asterix.om.base.AInt32) Mutable(org.apache.commons.lang3.mutable.Mutable) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) ARecordType(org.apache.asterix.om.types.ARecordType) IAType(org.apache.asterix.om.types.IAType)

Example 33 with Dataset

use of org.apache.asterix.metadata.entities.Dataset in project asterixdb by apache.

the class SetAsterixPhysicalOperatorsRule method computeDefaultPhysicalOp.

private static void computeDefaultPhysicalOp(AbstractLogicalOperator op, IOptimizationContext context) throws AlgebricksException {
    PhysicalOptimizationConfig physicalOptimizationConfig = context.getPhysicalOptimizationConfig();
    if (op.getOperatorTag().equals(LogicalOperatorTag.GROUP)) {
        GroupByOperator gby = (GroupByOperator) op;
        if (gby.getNestedPlans().size() == 1) {
            ILogicalPlan p0 = gby.getNestedPlans().get(0);
            if (p0.getRoots().size() == 1) {
                Mutable<ILogicalOperator> r0 = p0.getRoots().get(0);
                if (((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().equals(LogicalOperatorTag.AGGREGATE)) {
                    AggregateOperator aggOp = (AggregateOperator) r0.getValue();
                    boolean serializable = true;
                    for (Mutable<ILogicalExpression> exprRef : aggOp.getExpressions()) {
                        AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) exprRef.getValue();
                        if (!BuiltinFunctions.isAggregateFunctionSerializable(expr.getFunctionIdentifier())) {
                            serializable = false;
                            break;
                        }
                    }
                    if ((gby.getAnnotations().get(OperatorAnnotations.USE_HASH_GROUP_BY) == Boolean.TRUE || gby.getAnnotations().get(OperatorAnnotations.USE_EXTERNAL_GROUP_BY) == Boolean.TRUE)) {
                        boolean setToExternalGby = false;
                        if (serializable) {
                            // if serializable, use external group-by
                            // now check whether the serialized version aggregation function has corresponding intermediate agg
                            boolean hasIntermediateAgg = true;
                            IMergeAggregationExpressionFactory mergeAggregationExpressionFactory = context.getMergeAggregationExpressionFactory();
                            List<LogicalVariable> originalVariables = aggOp.getVariables();
                            List<Mutable<ILogicalExpression>> aggExprs = aggOp.getExpressions();
                            int aggNum = aggExprs.size();
                            for (int i = 0; i < aggNum; i++) {
                                AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) aggExprs.get(i).getValue();
                                AggregateFunctionCallExpression serialAggExpr = BuiltinFunctions.makeSerializableAggregateFunctionExpression(expr.getFunctionIdentifier(), expr.getArguments());
                                if (mergeAggregationExpressionFactory.createMergeAggregation(originalVariables.get(i), serialAggExpr, context) == null) {
                                    hasIntermediateAgg = false;
                                    break;
                                }
                            }
                            // Check whether there are multiple aggregates in the sub plan.
                            // Currently, we don't support multiple aggregates in one external group-by.
                            boolean multipleAggOpsFound = false;
                            ILogicalOperator r1Logical = aggOp;
                            while (r1Logical.hasInputs()) {
                                r1Logical = r1Logical.getInputs().get(0).getValue();
                                if (r1Logical.getOperatorTag() == LogicalOperatorTag.AGGREGATE) {
                                    multipleAggOpsFound = true;
                                    break;
                                }
                            }
                            if (hasIntermediateAgg && !multipleAggOpsFound) {
                                for (int i = 0; i < aggNum; i++) {
                                    AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) aggExprs.get(i).getValue();
                                    AggregateFunctionCallExpression serialAggExpr = BuiltinFunctions.makeSerializableAggregateFunctionExpression(expr.getFunctionIdentifier(), expr.getArguments());
                                    aggOp.getExpressions().get(i).setValue(serialAggExpr);
                                }
                                ExternalGroupByPOperator externalGby = new ExternalGroupByPOperator(gby.getGroupByList(), physicalOptimizationConfig.getMaxFramesExternalGroupBy(), (long) physicalOptimizationConfig.getMaxFramesExternalGroupBy() * physicalOptimizationConfig.getFrameSize());
                                generateMergeAggregationExpressions(gby, context);
                                op.setPhysicalOperator(externalGby);
                                setToExternalGby = true;
                            }
                        }
                        if (!setToExternalGby) {
                            // if not serializable or no intermediate agg, use pre-clustered group-by
                            List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> gbyList = gby.getGroupByList();
                            List<LogicalVariable> columnList = new ArrayList<LogicalVariable>(gbyList.size());
                            for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gbyList) {
                                ILogicalExpression expr = p.second.getValue();
                                if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
                                    VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
                                    columnList.add(varRef.getVariableReference());
                                }
                            }
                            op.setPhysicalOperator(new PreclusteredGroupByPOperator(columnList, gby.isGroupAll()));
                        }
                    }
                } else if (((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().equals(LogicalOperatorTag.RUNNINGAGGREGATE)) {
                    List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> gbyList = gby.getGroupByList();
                    List<LogicalVariable> columnList = new ArrayList<LogicalVariable>(gbyList.size());
                    for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gbyList) {
                        ILogicalExpression expr = p.second.getValue();
                        if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
                            VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
                            columnList.add(varRef.getVariableReference());
                        }
                    }
                    op.setPhysicalOperator(new PreclusteredGroupByPOperator(columnList, gby.isGroupAll()));
                } else {
                    throw new AlgebricksException("Unsupported nested operator within a group-by: " + ((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().name());
                }
            }
        }
    }
    if (op.getPhysicalOperator() == null) {
        switch(op.getOperatorTag()) {
            case INNERJOIN:
                {
                    JoinUtils.setJoinAlgorithmAndExchangeAlgo((InnerJoinOperator) op, context);
                    break;
                }
            case LEFTOUTERJOIN:
                {
                    JoinUtils.setJoinAlgorithmAndExchangeAlgo((LeftOuterJoinOperator) op, context);
                    break;
                }
            case UNNEST_MAP:
            case LEFT_OUTER_UNNEST_MAP:
                {
                    ILogicalExpression unnestExpr = null;
                    unnestExpr = ((AbstractUnnestMapOperator) op).getExpressionRef().getValue();
                    if (unnestExpr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
                        AbstractFunctionCallExpression f = (AbstractFunctionCallExpression) unnestExpr;
                        FunctionIdentifier fid = f.getFunctionIdentifier();
                        if (!fid.equals(BuiltinFunctions.INDEX_SEARCH)) {
                            throw new IllegalStateException();
                        }
                        AccessMethodJobGenParams jobGenParams = new AccessMethodJobGenParams();
                        jobGenParams.readFromFuncArgs(f.getArguments());
                        MetadataProvider mp = (MetadataProvider) context.getMetadataProvider();
                        DataSourceId dataSourceId = new DataSourceId(jobGenParams.getDataverseName(), jobGenParams.getDatasetName());
                        Dataset dataset = mp.findDataset(jobGenParams.getDataverseName(), jobGenParams.getDatasetName());
                        IDataSourceIndex<String, DataSourceId> dsi = mp.findDataSourceIndex(jobGenParams.getIndexName(), dataSourceId);
                        INodeDomain storageDomain = mp.findNodeDomain(dataset.getNodeGroupName());
                        if (dsi == null) {
                            throw new AlgebricksException("Could not find index " + jobGenParams.getIndexName() + " for dataset " + dataSourceId);
                        }
                        IndexType indexType = jobGenParams.getIndexType();
                        boolean requiresBroadcast = jobGenParams.getRequiresBroadcast();
                        switch(indexType) {
                            case BTREE:
                                {
                                    BTreeJobGenParams btreeJobGenParams = new BTreeJobGenParams();
                                    btreeJobGenParams.readFromFuncArgs(f.getArguments());
                                    op.setPhysicalOperator(new BTreeSearchPOperator(dsi, storageDomain, requiresBroadcast, btreeJobGenParams.isPrimaryIndex(), btreeJobGenParams.isEqCondition(), btreeJobGenParams.getLowKeyVarList(), btreeJobGenParams.getHighKeyVarList()));
                                    break;
                                }
                            case RTREE:
                                {
                                    op.setPhysicalOperator(new RTreeSearchPOperator(dsi, storageDomain, requiresBroadcast));
                                    break;
                                }
                            case SINGLE_PARTITION_WORD_INVIX:
                            case SINGLE_PARTITION_NGRAM_INVIX:
                                {
                                    op.setPhysicalOperator(new InvertedIndexPOperator(dsi, storageDomain, requiresBroadcast, false));
                                    break;
                                }
                            case LENGTH_PARTITIONED_WORD_INVIX:
                            case LENGTH_PARTITIONED_NGRAM_INVIX:
                                {
                                    op.setPhysicalOperator(new InvertedIndexPOperator(dsi, storageDomain, requiresBroadcast, true));
                                    break;
                                }
                            default:
                                {
                                    throw new NotImplementedException(indexType + " indexes are not implemented.");
                                }
                        }
                    }
                    break;
                }
        }
    }
    if (op.hasNestedPlans()) {
        AbstractOperatorWithNestedPlans nested = (AbstractOperatorWithNestedPlans) op;
        for (ILogicalPlan p : nested.getNestedPlans()) {
            setPhysicalOperators(p, context);
        }
    }
    for (Mutable<ILogicalOperator> opRef : op.getInputs()) {
        computeDefaultPhysicalOp((AbstractLogicalOperator) opRef.getValue(), context);
    }
}
Also used : IMergeAggregationExpressionFactory(org.apache.hyracks.algebricks.core.algebra.expressions.IMergeAggregationExpressionFactory) PreclusteredGroupByPOperator(org.apache.hyracks.algebricks.core.algebra.operators.physical.PreclusteredGroupByPOperator) NotImplementedException(org.apache.hyracks.algebricks.common.exceptions.NotImplementedException) ArrayList(java.util.ArrayList) LeftOuterJoinOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.LeftOuterJoinOperator) InnerJoinOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator) AccessMethodJobGenParams(org.apache.asterix.optimizer.rules.am.AccessMethodJobGenParams) AggregateOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator) IDataSourceIndex(org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex) ArrayList(java.util.ArrayList) List(java.util.List) IndexType(org.apache.asterix.common.config.DatasetConfig.IndexType) PhysicalOptimizationConfig(org.apache.hyracks.algebricks.core.rewriter.base.PhysicalOptimizationConfig) Pair(org.apache.hyracks.algebricks.common.utils.Pair) AbstractOperatorWithNestedPlans(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractOperatorWithNestedPlans) LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) AggregateFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression) GroupByOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator) AbstractLogicalOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator) RTreeSearchPOperator(org.apache.asterix.algebra.operators.physical.RTreeSearchPOperator) Dataset(org.apache.asterix.metadata.entities.Dataset) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) AbstractFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression) BTreeJobGenParams(org.apache.asterix.optimizer.rules.am.BTreeJobGenParams) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) INodeDomain(org.apache.hyracks.algebricks.core.algebra.properties.INodeDomain) InvertedIndexPOperator(org.apache.asterix.algebra.operators.physical.InvertedIndexPOperator) BTreeSearchPOperator(org.apache.asterix.algebra.operators.physical.BTreeSearchPOperator) Mutable(org.apache.commons.lang3.mutable.Mutable) FunctionIdentifier(org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) MetadataProvider(org.apache.asterix.metadata.declared.MetadataProvider) VariableReferenceExpression(org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression) ILogicalPlan(org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan) DataSourceId(org.apache.asterix.metadata.declared.DataSourceId) ExternalGroupByPOperator(org.apache.hyracks.algebricks.core.algebra.operators.physical.ExternalGroupByPOperator)

Example 34 with Dataset

use of org.apache.asterix.metadata.entities.Dataset in project asterixdb by apache.

the class IntroduceJoinAccessMethodRule method checkAndApplyJoinTransformation.

/**
     * Recursively traverse the given plan and check whether a INNERJOIN or LEFTOUTERJOIN operator exists.
     * If one is found, maintain the path from the root to the given join operator and
     * optimize the path from the given join operator to the EMPTY_TUPLE_SOURCE operator
     * if it is not already optimized.
     */
protected boolean checkAndApplyJoinTransformation(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
    AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
    boolean joinFoundAndOptimizationApplied;
    // Check the current operator pattern to see whether it is a JOIN or not.
    boolean isThisOpInnerJoin = isInnerJoin(op);
    boolean isThisOpLeftOuterJoin = isLeftOuterJoin(op);
    boolean isParentOpGroupBy = hasGroupBy;
    Mutable<ILogicalOperator> joinRefFromThisOp = null;
    AbstractBinaryJoinOperator joinOpFromThisOp = null;
    if (isThisOpInnerJoin) {
        // Set join operator.
        joinRef = opRef;
        joinOp = (InnerJoinOperator) op;
        joinRefFromThisOp = opRef;
        joinOpFromThisOp = (InnerJoinOperator) op;
    } else if (isThisOpLeftOuterJoin) {
        // Set left-outer-join op.
        // The current operator is GROUP and the child of this op is LEFTOUERJOIN.
        joinRef = op.getInputs().get(0);
        joinOp = (LeftOuterJoinOperator) joinRef.getValue();
        joinRefFromThisOp = op.getInputs().get(0);
        joinOpFromThisOp = (LeftOuterJoinOperator) joinRefFromThisOp.getValue();
    }
    // to make sure an earlier join in the path is optimized first.
    for (Mutable<ILogicalOperator> inputOpRef : op.getInputs()) {
        joinFoundAndOptimizationApplied = checkAndApplyJoinTransformation(inputOpRef, context);
        if (joinFoundAndOptimizationApplied) {
            return true;
        }
    }
    // For a JOIN case, try to transform the given plan.
    if (isThisOpInnerJoin || isThisOpLeftOuterJoin) {
        // Restore the information from this operator since it might have been be set to null
        // if there are other join operators in the earlier path.
        joinRef = joinRefFromThisOp;
        joinOp = joinOpFromThisOp;
        boolean continueCheck = true;
        // Already checked? If not, this operator may be optimized.
        if (context.checkIfInDontApplySet(this, joinOp)) {
            continueCheck = false;
        }
        // For each access method, this contains the information about
        // whether an available index can be applicable or not.
        Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs = null;
        if (continueCheck) {
            analyzedAMs = new HashMap<>();
        }
        // whether the given plan is truly optimizable or not.
        if (continueCheck && !checkJoinOpConditionAndInitSubTree(context)) {
            continueCheck = false;
        }
        // Analyze the condition of SELECT operator and initialize analyzedAMs.
        // Check whether the function in the SELECT operator can be truly transformed.
        boolean matchInLeftSubTree = false;
        boolean matchInRightSubTree = false;
        if (continueCheck) {
            if (leftSubTree.hasDataSource()) {
                matchInLeftSubTree = analyzeSelectOrJoinOpConditionAndUpdateAnalyzedAM(joinCond, leftSubTree.getAssignsAndUnnests(), analyzedAMs, context, typeEnvironment);
            }
            if (rightSubTree.hasDataSource()) {
                matchInRightSubTree = analyzeSelectOrJoinOpConditionAndUpdateAnalyzedAM(joinCond, rightSubTree.getAssignsAndUnnests(), analyzedAMs, context, typeEnvironment);
            }
        }
        // Find the dataset from the data-source and the record type of the dataset from the metadata.
        // This will be used to find an applicable index on the dataset.
        boolean checkLeftSubTreeMetadata = false;
        boolean checkRightSubTreeMetadata = false;
        if (continueCheck && (matchInLeftSubTree || matchInRightSubTree)) {
            // Set dataset and type metadata.
            if (matchInLeftSubTree) {
                checkLeftSubTreeMetadata = leftSubTree.setDatasetAndTypeMetadata(metadataProvider);
            }
            if (matchInRightSubTree) {
                checkRightSubTreeMetadata = rightSubTree.setDatasetAndTypeMetadata(metadataProvider);
            }
        }
        if (continueCheck && (checkLeftSubTreeMetadata || checkRightSubTreeMetadata)) {
            // Then find the applicable indexes for the variables used in the JOIN condition.
            if (checkLeftSubTreeMetadata) {
                fillSubTreeIndexExprs(leftSubTree, analyzedAMs, context);
            }
            if (checkRightSubTreeMetadata) {
                fillSubTreeIndexExprs(rightSubTree, analyzedAMs, context);
            }
            // Prune the access methods based on the function expression and access methods.
            pruneIndexCandidates(analyzedAMs, context, typeEnvironment);
            // If the right subtree (inner branch) has indexes, one of those indexes will be used.
            // Remove the indexes from the outer branch in the optimizer's consideration list for this rule.
            pruneIndexCandidatesFromOuterBranch(analyzedAMs);
            // We are going to use indexes from the inner branch.
            // If no index is available, then we stop here.
            Pair<IAccessMethod, Index> chosenIndex = chooseBestIndex(analyzedAMs);
            if (chosenIndex == null) {
                context.addToDontApplySet(this, joinOp);
                continueCheck = false;
            }
            if (continueCheck) {
                // Apply plan transformation using chosen index.
                AccessMethodAnalysisContext analysisCtx = analyzedAMs.get(chosenIndex.first);
                // in GroupByOp.
                if (isThisOpLeftOuterJoin && isParentOpGroupBy) {
                    analysisCtx.setLOJGroupbyOpRef(opRef);
                    ScalarFunctionCallExpression isNullFuncExpr = AccessMethodUtils.findLOJIsMissingFuncInGroupBy((GroupByOperator) opRef.getValue());
                    analysisCtx.setLOJIsNullFuncInGroupBy(isNullFuncExpr);
                }
                Dataset indexDataset = analysisCtx.getDatasetFromIndexDatasetMap(chosenIndex.second);
                // from the right subtree. The following is just a sanity check.
                if (!rightSubTree.hasDataSourceScan() && !indexDataset.getDatasetName().equals(rightSubTree.getDataset().getDatasetName())) {
                    return false;
                }
                // Finally, try to apply plan transformation using chosen index.
                boolean res = chosenIndex.first.applyJoinPlanTransformation(joinRef, leftSubTree, rightSubTree, chosenIndex.second, analysisCtx, context, isThisOpLeftOuterJoin, isParentOpGroupBy);
                // will find them.
                if (res) {
                    return res;
                }
            }
        }
        joinRef = null;
        joinOp = null;
    }
    return false;
}
Also used : AbstractLogicalOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator) Dataset(org.apache.asterix.metadata.entities.Dataset) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) LeftOuterJoinOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.LeftOuterJoinOperator) Index(org.apache.asterix.metadata.entities.Index) AbstractBinaryJoinOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator) ScalarFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression)

Example 35 with Dataset

use of org.apache.asterix.metadata.entities.Dataset in project asterixdb by apache.

the class InvertedIndexAccessMethod method applyJoinPlanTransformation.

@Override
public boolean applyJoinPlanTransformation(Mutable<ILogicalOperator> joinRef, OptimizableOperatorSubTree leftSubTree, OptimizableOperatorSubTree rightSubTree, Index chosenIndex, AccessMethodAnalysisContext analysisCtx, IOptimizationContext context, boolean isLeftOuterJoin, boolean hasGroupBy) throws AlgebricksException {
    // Figure out if the index is applicable on the left or right side (if both, we arbitrarily prefer the left side).
    Dataset dataset = analysisCtx.getDatasetFromIndexDatasetMap(chosenIndex);
    OptimizableOperatorSubTree indexSubTree;
    OptimizableOperatorSubTree probeSubTree;
    // The following is just a sanity check.
    if (rightSubTree.hasDataSourceScan() && dataset.getDatasetName().equals(rightSubTree.getDataset().getDatasetName())) {
        indexSubTree = rightSubTree;
        probeSubTree = leftSubTree;
    } else {
        return false;
    }
    IOptimizableFuncExpr optFuncExpr = AccessMethodUtils.chooseFirstOptFuncExpr(chosenIndex, analysisCtx);
    // if the dataset of index subtree and the dataset of first argument's subtree is the same
    if (optFuncExpr.getFuncExpr().getFunctionIdentifier() == BuiltinFunctions.EDIT_DISTANCE_CONTAINS && optFuncExpr.getOperatorSubTree(0).getDataset() != null && !optFuncExpr.getOperatorSubTree(0).getDataset().getDatasetName().equals(indexSubTree.getDataset().getDatasetName())) {
        return false;
    }
    //if LOJ, reset null place holder variable
    LogicalVariable newNullPlaceHolderVar = null;
    if (isLeftOuterJoin && hasGroupBy) {
        //get a new null place holder variable that is the first field variable of the primary key
        //from the indexSubTree's datasourceScanOp
        newNullPlaceHolderVar = indexSubTree.getDataSourceVariables().get(0);
        //reset the null place holder variable
        AccessMethodUtils.resetLOJNullPlaceholderVariableInGroupByOp(analysisCtx, newNullPlaceHolderVar, context);
    }
    AbstractBinaryJoinOperator join = (AbstractBinaryJoinOperator) joinRef.getValue();
    // Remember the original probe subtree, and its primary-key variables,
    // so we can later retrieve the missing attributes via an equi join.
    List<LogicalVariable> originalSubTreePKs = new ArrayList<>();
    // Remember the primary-keys of the new probe subtree for the top-level equi join.
    List<LogicalVariable> surrogateSubTreePKs = new ArrayList<>();
    // Copy probe subtree, replacing their variables with new ones. We will use the original variables
    // to stitch together a top-level equi join.
    Mutable<ILogicalOperator> originalProbeSubTreeRootRef = copyAndReinitProbeSubTree(probeSubTree, join.getCondition().getValue(), optFuncExpr, originalSubTreePKs, surrogateSubTreePKs, context);
    // Remember original live variables from the index sub tree.
    List<LogicalVariable> indexSubTreeLiveVars = new ArrayList<>();
    VariableUtilities.getLiveVariables(indexSubTree.getRoot(), indexSubTreeLiveVars);
    // Clone the original join condition because we may have to modify it (and we also need the original).
    ILogicalExpression joinCond = join.getCondition().getValue().cloneExpression();
    // Create "panic" (non indexed) nested-loop join path if necessary.
    Mutable<ILogicalOperator> panicJoinRef = null;
    Map<LogicalVariable, LogicalVariable> panicVarMap = null;
    if (optFuncExpr.getFuncExpr().getFunctionIdentifier() == BuiltinFunctions.EDIT_DISTANCE_CHECK || optFuncExpr.getFuncExpr().getFunctionIdentifier() == BuiltinFunctions.EDIT_DISTANCE_CONTAINS) {
        panicJoinRef = new MutableObject<>(joinRef.getValue());
        panicVarMap = new HashMap<>();
        Mutable<ILogicalOperator> newProbeRootRef = createPanicNestedLoopJoinPlan(panicJoinRef, indexSubTree, probeSubTree, optFuncExpr, chosenIndex, panicVarMap, context);
        probeSubTree.getRootRef().setValue(newProbeRootRef.getValue());
        probeSubTree.setRoot(newProbeRootRef.getValue());
    }
    // Create regular indexed-nested loop join path.
    ILogicalOperator indexPlanRootOp = createSecondaryToPrimaryPlan(null, indexSubTree, probeSubTree, chosenIndex, analysisCtx, true, isLeftOuterJoin, true, context);
    indexSubTree.getDataSourceRef().setValue(indexPlanRootOp);
    // Change join into a select with the same condition.
    SelectOperator topSelect = new SelectOperator(new MutableObject<ILogicalExpression>(joinCond), isLeftOuterJoin, newNullPlaceHolderVar);
    topSelect.getInputs().add(indexSubTree.getRootRef());
    topSelect.setExecutionMode(ExecutionMode.LOCAL);
    context.computeAndSetTypeEnvironmentForOperator(topSelect);
    ILogicalOperator topOp = topSelect;
    // Hook up the indexed-nested loop join path with the "panic" (non indexed) nested-loop join path by putting a union all on top.
    if (panicJoinRef != null) {
        LogicalVariable inputSearchVar = getInputSearchVar(optFuncExpr, indexSubTree);
        indexSubTreeLiveVars.addAll(originalSubTreePKs);
        indexSubTreeLiveVars.add(inputSearchVar);
        List<LogicalVariable> panicPlanLiveVars = new ArrayList<>();
        VariableUtilities.getLiveVariables(panicJoinRef.getValue(), panicPlanLiveVars);
        // Create variable mapping for union all operator.
        List<Triple<LogicalVariable, LogicalVariable, LogicalVariable>> varMap = new ArrayList<>();
        for (int i = 0; i < indexSubTreeLiveVars.size(); i++) {
            LogicalVariable indexSubTreeVar = indexSubTreeLiveVars.get(i);
            LogicalVariable panicPlanVar = panicVarMap.get(indexSubTreeVar);
            if (panicPlanVar == null) {
                panicPlanVar = indexSubTreeVar;
            }
            varMap.add(new Triple<LogicalVariable, LogicalVariable, LogicalVariable>(indexSubTreeVar, panicPlanVar, indexSubTreeVar));
        }
        UnionAllOperator unionAllOp = new UnionAllOperator(varMap);
        unionAllOp.getInputs().add(new MutableObject<ILogicalOperator>(topOp));
        unionAllOp.getInputs().add(panicJoinRef);
        unionAllOp.setExecutionMode(ExecutionMode.PARTITIONED);
        context.computeAndSetTypeEnvironmentForOperator(unionAllOp);
        topOp = unionAllOp;
    }
    // Place a top-level equi-join on top to retrieve the missing variables from the original probe subtree.
    // The inner (build) branch of the join is the subtree with the data scan, since the result of the similarity join could potentially be big.
    // This choice may not always be the most efficient, but it seems more robust than the alternative.
    Mutable<ILogicalExpression> eqJoinConditionRef = createPrimaryKeysEqJoinCondition(originalSubTreePKs, surrogateSubTreePKs);
    InnerJoinOperator topEqJoin = new InnerJoinOperator(eqJoinConditionRef, originalProbeSubTreeRootRef, new MutableObject<ILogicalOperator>(topOp));
    topEqJoin.setExecutionMode(ExecutionMode.PARTITIONED);
    joinRef.setValue(topEqJoin);
    context.computeAndSetTypeEnvironmentForOperator(topEqJoin);
    return true;
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) Dataset(org.apache.asterix.metadata.entities.Dataset) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) ArrayList(java.util.ArrayList) InnerJoinOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator) AbstractBinaryJoinOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator) Triple(org.apache.hyracks.algebricks.common.utils.Triple) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) SelectOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator) UnionAllOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.UnionAllOperator)

Aggregations

Dataset (org.apache.asterix.metadata.entities.Dataset)77 ArrayList (java.util.ArrayList)33 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)32 Index (org.apache.asterix.metadata.entities.Index)25 LogicalVariable (org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable)23 MetadataException (org.apache.asterix.metadata.MetadataException)19 ARecordType (org.apache.asterix.om.types.ARecordType)19 IAType (org.apache.asterix.om.types.IAType)18 ILogicalExpression (org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression)18 List (java.util.List)17 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)16 RemoteException (java.rmi.RemoteException)15 AsterixException (org.apache.asterix.common.exceptions.AsterixException)15 MetadataProvider (org.apache.asterix.metadata.declared.MetadataProvider)15 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)15 IOException (java.io.IOException)14 MetadataTransactionContext (org.apache.asterix.metadata.MetadataTransactionContext)14 CompilationException (org.apache.asterix.common.exceptions.CompilationException)13 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)12 ACIDException (org.apache.asterix.common.exceptions.ACIDException)11