Search in sources :

Example 1 with InnerJoinOperator

use of org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator in project asterixdb by apache.

the class ComplexUnnestToProductRule method rewritePost.

@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
    AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
    if (op.getOperatorTag() != LogicalOperatorTag.DATASOURCESCAN && op.getOperatorTag() != LogicalOperatorTag.UNNEST) {
        return false;
    }
    //stop rewriting if the operators originates from a nested tuple source
    if (insideSubplan(opRef)) {
        return false;
    }
    // We may pull selects above the join we create in order to eliminate possible dependencies between
    // the outer and inner input plans of the join.
    List<ILogicalOperator> topSelects = new ArrayList<ILogicalOperator>();
    // Keep track of the operators and used variables participating in the inner input plan.
    HashSet<LogicalVariable> innerUsedVars = new HashSet<LogicalVariable>();
    List<ILogicalOperator> innerOps = new ArrayList<ILogicalOperator>();
    HashSet<LogicalVariable> outerUsedVars = new HashSet<LogicalVariable>();
    List<ILogicalOperator> outerOps = new ArrayList<ILogicalOperator>();
    innerOps.add(op);
    VariableUtilities.getUsedVariables(op, innerUsedVars);
    Mutable<ILogicalOperator> opRef2 = op.getInputs().get(0);
    AbstractLogicalOperator op2 = (AbstractLogicalOperator) opRef2.getValue();
    // Find an unnest or join and partition the plan between the first unnest and that operator into independent parts.
    if (!findPlanPartition(op2, innerUsedVars, outerUsedVars, innerOps, outerOps, topSelects, false)) {
        // We could not find an unnest or join.
        return false;
    }
    // The last operator must be an unnest or join.
    AbstractLogicalOperator unnestOrJoin = (AbstractLogicalOperator) outerOps.get(outerOps.size() - 1);
    ILogicalOperator outerRoot = null;
    ILogicalOperator innerRoot = null;
    EmptyTupleSourceOperator ets = new EmptyTupleSourceOperator();
    // If we found a join, simply use it as the outer root.
    if (unnestOrJoin.getOperatorTag() != LogicalOperatorTag.INNERJOIN && unnestOrJoin.getOperatorTag() != LogicalOperatorTag.LEFTOUTERJOIN) {
        // We've found a second unnest. First, sanity check that the unnest does not output any live variables
        // that are used by the plan above (until the first unnest).
        List<LogicalVariable> liveVars = new ArrayList<>();
        VariableUtilities.getLiveVariables(unnestOrJoin, liveVars);
        for (LogicalVariable liveVar : liveVars) {
            if (innerUsedVars.contains(liveVar)) {
                return false;
            }
        }
        // Continue finding a partitioning of the plan such that the inner and outer partitions are independent, in order to feed a join.
        // Now, we look below the second unnest or join.
        VariableUtilities.getUsedVariables(unnestOrJoin, outerUsedVars);
        AbstractLogicalOperator unnestChild = (AbstractLogicalOperator) unnestOrJoin.getInputs().get(0).getValue();
        if (!findPlanPartition(unnestChild, innerUsedVars, outerUsedVars, innerOps, outerOps, topSelects, true)) {
            // We could not find a suitable partitioning.
            return false;
        }
    }
    innerRoot = buildOperatorChain(innerOps, ets, context);
    context.computeAndSetTypeEnvironmentForOperator(innerRoot);
    outerRoot = buildOperatorChain(outerOps, null, context);
    context.computeAndSetTypeEnvironmentForOperator(outerRoot);
    InnerJoinOperator product = new InnerJoinOperator(new MutableObject<ILogicalExpression>(ConstantExpression.TRUE));
    // Outer branch.
    product.getInputs().add(new MutableObject<ILogicalOperator>(outerRoot));
    // Inner branch.
    product.getInputs().add(new MutableObject<ILogicalOperator>(innerRoot));
    context.computeAndSetTypeEnvironmentForOperator(product);
    // Put the selects on top of the join.
    ILogicalOperator topOp = product;
    if (!topSelects.isEmpty()) {
        topOp = buildOperatorChain(topSelects, product, context);
    }
    // Plug the selects + product in the plan.
    opRef.setValue(topOp);
    context.computeAndSetTypeEnvironmentForOperator(topOp);
    return true;
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) AbstractLogicalOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) ArrayList(java.util.ArrayList) InnerJoinOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) EmptyTupleSourceOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator) HashSet(java.util.HashSet)

Example 2 with InnerJoinOperator

use of org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator in project asterixdb by apache.

the class RequiredCapacityVisitorTest method testParallelJoin.

@Test
public void testParallelJoin() throws AlgebricksException {
    IClusterCapacity clusterCapacity = new ClusterCapacity();
    RequiredCapacityVisitor visitor = makeComputationCapacityVisitor(PARALLELISM, clusterCapacity);
    // Constructs a join query plan.
    InnerJoinOperator join = makeJoinOperator(AbstractLogicalOperator.ExecutionMode.PARTITIONED);
    // Left child plan of the join.
    ExchangeOperator leftChildExchange = new ExchangeOperator();
    leftChildExchange.setExecutionMode(AbstractLogicalOperator.ExecutionMode.PARTITIONED);
    leftChildExchange.setPhysicalOperator(new HashPartitionExchangePOperator(Collections.emptyList(), null));
    InnerJoinOperator leftChild = makeJoinOperator(AbstractLogicalOperator.ExecutionMode.PARTITIONED);
    join.getInputs().add(new MutableObject<>(leftChildExchange));
    leftChildExchange.getInputs().add(new MutableObject<>(leftChild));
    EmptyTupleSourceOperator ets = new EmptyTupleSourceOperator();
    ets.setExecutionMode(AbstractLogicalOperator.ExecutionMode.PARTITIONED);
    leftChild.getInputs().add(new MutableObject<>(ets));
    leftChild.getInputs().add(new MutableObject<>(ets));
    // Right child plan of the join.
    ExchangeOperator rightChildExchange = new ExchangeOperator();
    rightChildExchange.setExecutionMode(AbstractLogicalOperator.ExecutionMode.PARTITIONED);
    rightChildExchange.setPhysicalOperator(new HashPartitionExchangePOperator(Collections.emptyList(), null));
    GroupByOperator rightChild = makeGroupByOperator(AbstractLogicalOperator.ExecutionMode.LOCAL);
    join.getInputs().add(new MutableObject<>(rightChildExchange));
    rightChildExchange.getInputs().add(new MutableObject<>(rightChild));
    rightChild.getInputs().add(new MutableObject<>(ets));
    // Verifies the calculated cluster capacity requirement for the test quer plan.
    join.accept(visitor, null);
    Assert.assertTrue(clusterCapacity.getAggregatedCores() == PARALLELISM);
    Assert.assertTrue(clusterCapacity.getAggregatedMemoryByteSize() == 3 * MEMORY_BUDGET * PARALLELISM + 2 * 2L * PARALLELISM * PARALLELISM * FRAME_SIZE + 3 * FRAME_SIZE * PARALLELISM);
}
Also used : IClusterCapacity(org.apache.hyracks.api.job.resource.IClusterCapacity) GroupByOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator) ClusterCapacity(org.apache.hyracks.api.job.resource.ClusterCapacity) IClusterCapacity(org.apache.hyracks.api.job.resource.IClusterCapacity) HashPartitionExchangePOperator(org.apache.hyracks.algebricks.core.algebra.operators.physical.HashPartitionExchangePOperator) ExchangeOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.ExchangeOperator) InnerJoinOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator) EmptyTupleSourceOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator) Test(org.junit.Test)

Example 3 with InnerJoinOperator

use of org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator in project asterixdb by apache.

the class RequiredCapacityVisitorTest method testUnPartitionedJoin.

@Test
public void testUnPartitionedJoin() throws AlgebricksException {
    IClusterCapacity clusterCapacity = new ClusterCapacity();
    RequiredCapacityVisitor visitor = makeComputationCapacityVisitor(PARALLELISM, clusterCapacity);
    // Constructs a join query plan.
    InnerJoinOperator join = makeJoinOperator(AbstractLogicalOperator.ExecutionMode.UNPARTITIONED);
    // Left child plan of the join.
    ExchangeOperator leftChildExchange = new ExchangeOperator();
    leftChildExchange.setExecutionMode(AbstractLogicalOperator.ExecutionMode.UNPARTITIONED);
    leftChildExchange.setPhysicalOperator(new OneToOneExchangePOperator());
    InnerJoinOperator leftChild = makeJoinOperator(AbstractLogicalOperator.ExecutionMode.UNPARTITIONED);
    join.getInputs().add(new MutableObject<>(leftChildExchange));
    leftChildExchange.getInputs().add(new MutableObject<>(leftChild));
    EmptyTupleSourceOperator ets = new EmptyTupleSourceOperator();
    ets.setExecutionMode(AbstractLogicalOperator.ExecutionMode.UNPARTITIONED);
    leftChild.getInputs().add(new MutableObject<>(ets));
    leftChild.getInputs().add(new MutableObject<>(ets));
    // Right child plan of the join.
    ExchangeOperator rightChildExchange = new ExchangeOperator();
    rightChildExchange.setExecutionMode(AbstractLogicalOperator.ExecutionMode.UNPARTITIONED);
    rightChildExchange.setPhysicalOperator(new OneToOneExchangePOperator());
    GroupByOperator rightChild = makeGroupByOperator(AbstractLogicalOperator.ExecutionMode.UNPARTITIONED);
    join.getInputs().add(new MutableObject<>(rightChildExchange));
    rightChildExchange.getInputs().add(new MutableObject<>(rightChild));
    rightChild.getInputs().add(new MutableObject<>(ets));
    // Verifies the calculated cluster capacity requirement for the test quer plan.
    join.accept(visitor, null);
    Assert.assertTrue(clusterCapacity.getAggregatedCores() == 1);
    Assert.assertTrue(clusterCapacity.getAggregatedMemoryByteSize() == 3 * MEMORY_BUDGET + 5L * FRAME_SIZE);
}
Also used : IClusterCapacity(org.apache.hyracks.api.job.resource.IClusterCapacity) GroupByOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator) ClusterCapacity(org.apache.hyracks.api.job.resource.ClusterCapacity) IClusterCapacity(org.apache.hyracks.api.job.resource.IClusterCapacity) OneToOneExchangePOperator(org.apache.hyracks.algebricks.core.algebra.operators.physical.OneToOneExchangePOperator) ExchangeOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.ExchangeOperator) InnerJoinOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator) EmptyTupleSourceOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator) Test(org.junit.Test)

Example 4 with InnerJoinOperator

use of org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator in project asterixdb by apache.

the class SetAsterixPhysicalOperatorsRule method computeDefaultPhysicalOp.

private static void computeDefaultPhysicalOp(AbstractLogicalOperator op, IOptimizationContext context) throws AlgebricksException {
    PhysicalOptimizationConfig physicalOptimizationConfig = context.getPhysicalOptimizationConfig();
    if (op.getOperatorTag().equals(LogicalOperatorTag.GROUP)) {
        GroupByOperator gby = (GroupByOperator) op;
        if (gby.getNestedPlans().size() == 1) {
            ILogicalPlan p0 = gby.getNestedPlans().get(0);
            if (p0.getRoots().size() == 1) {
                Mutable<ILogicalOperator> r0 = p0.getRoots().get(0);
                if (((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().equals(LogicalOperatorTag.AGGREGATE)) {
                    AggregateOperator aggOp = (AggregateOperator) r0.getValue();
                    boolean serializable = true;
                    for (Mutable<ILogicalExpression> exprRef : aggOp.getExpressions()) {
                        AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) exprRef.getValue();
                        if (!BuiltinFunctions.isAggregateFunctionSerializable(expr.getFunctionIdentifier())) {
                            serializable = false;
                            break;
                        }
                    }
                    if ((gby.getAnnotations().get(OperatorAnnotations.USE_HASH_GROUP_BY) == Boolean.TRUE || gby.getAnnotations().get(OperatorAnnotations.USE_EXTERNAL_GROUP_BY) == Boolean.TRUE)) {
                        boolean setToExternalGby = false;
                        if (serializable) {
                            // if serializable, use external group-by
                            // now check whether the serialized version aggregation function has corresponding intermediate agg
                            boolean hasIntermediateAgg = true;
                            IMergeAggregationExpressionFactory mergeAggregationExpressionFactory = context.getMergeAggregationExpressionFactory();
                            List<LogicalVariable> originalVariables = aggOp.getVariables();
                            List<Mutable<ILogicalExpression>> aggExprs = aggOp.getExpressions();
                            int aggNum = aggExprs.size();
                            for (int i = 0; i < aggNum; i++) {
                                AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) aggExprs.get(i).getValue();
                                AggregateFunctionCallExpression serialAggExpr = BuiltinFunctions.makeSerializableAggregateFunctionExpression(expr.getFunctionIdentifier(), expr.getArguments());
                                if (mergeAggregationExpressionFactory.createMergeAggregation(originalVariables.get(i), serialAggExpr, context) == null) {
                                    hasIntermediateAgg = false;
                                    break;
                                }
                            }
                            // Check whether there are multiple aggregates in the sub plan.
                            // Currently, we don't support multiple aggregates in one external group-by.
                            boolean multipleAggOpsFound = false;
                            ILogicalOperator r1Logical = aggOp;
                            while (r1Logical.hasInputs()) {
                                r1Logical = r1Logical.getInputs().get(0).getValue();
                                if (r1Logical.getOperatorTag() == LogicalOperatorTag.AGGREGATE) {
                                    multipleAggOpsFound = true;
                                    break;
                                }
                            }
                            if (hasIntermediateAgg && !multipleAggOpsFound) {
                                for (int i = 0; i < aggNum; i++) {
                                    AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) aggExprs.get(i).getValue();
                                    AggregateFunctionCallExpression serialAggExpr = BuiltinFunctions.makeSerializableAggregateFunctionExpression(expr.getFunctionIdentifier(), expr.getArguments());
                                    aggOp.getExpressions().get(i).setValue(serialAggExpr);
                                }
                                ExternalGroupByPOperator externalGby = new ExternalGroupByPOperator(gby.getGroupByList(), physicalOptimizationConfig.getMaxFramesExternalGroupBy(), (long) physicalOptimizationConfig.getMaxFramesExternalGroupBy() * physicalOptimizationConfig.getFrameSize());
                                generateMergeAggregationExpressions(gby, context);
                                op.setPhysicalOperator(externalGby);
                                setToExternalGby = true;
                            }
                        }
                        if (!setToExternalGby) {
                            // if not serializable or no intermediate agg, use pre-clustered group-by
                            List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> gbyList = gby.getGroupByList();
                            List<LogicalVariable> columnList = new ArrayList<LogicalVariable>(gbyList.size());
                            for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gbyList) {
                                ILogicalExpression expr = p.second.getValue();
                                if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
                                    VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
                                    columnList.add(varRef.getVariableReference());
                                }
                            }
                            op.setPhysicalOperator(new PreclusteredGroupByPOperator(columnList, gby.isGroupAll()));
                        }
                    }
                } else if (((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().equals(LogicalOperatorTag.RUNNINGAGGREGATE)) {
                    List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> gbyList = gby.getGroupByList();
                    List<LogicalVariable> columnList = new ArrayList<LogicalVariable>(gbyList.size());
                    for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : gbyList) {
                        ILogicalExpression expr = p.second.getValue();
                        if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
                            VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
                            columnList.add(varRef.getVariableReference());
                        }
                    }
                    op.setPhysicalOperator(new PreclusteredGroupByPOperator(columnList, gby.isGroupAll()));
                } else {
                    throw new AlgebricksException("Unsupported nested operator within a group-by: " + ((AbstractLogicalOperator) (r0.getValue())).getOperatorTag().name());
                }
            }
        }
    }
    if (op.getPhysicalOperator() == null) {
        switch(op.getOperatorTag()) {
            case INNERJOIN:
                {
                    JoinUtils.setJoinAlgorithmAndExchangeAlgo((InnerJoinOperator) op, context);
                    break;
                }
            case LEFTOUTERJOIN:
                {
                    JoinUtils.setJoinAlgorithmAndExchangeAlgo((LeftOuterJoinOperator) op, context);
                    break;
                }
            case UNNEST_MAP:
            case LEFT_OUTER_UNNEST_MAP:
                {
                    ILogicalExpression unnestExpr = null;
                    unnestExpr = ((AbstractUnnestMapOperator) op).getExpressionRef().getValue();
                    if (unnestExpr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
                        AbstractFunctionCallExpression f = (AbstractFunctionCallExpression) unnestExpr;
                        FunctionIdentifier fid = f.getFunctionIdentifier();
                        if (!fid.equals(BuiltinFunctions.INDEX_SEARCH)) {
                            throw new IllegalStateException();
                        }
                        AccessMethodJobGenParams jobGenParams = new AccessMethodJobGenParams();
                        jobGenParams.readFromFuncArgs(f.getArguments());
                        MetadataProvider mp = (MetadataProvider) context.getMetadataProvider();
                        DataSourceId dataSourceId = new DataSourceId(jobGenParams.getDataverseName(), jobGenParams.getDatasetName());
                        Dataset dataset = mp.findDataset(jobGenParams.getDataverseName(), jobGenParams.getDatasetName());
                        IDataSourceIndex<String, DataSourceId> dsi = mp.findDataSourceIndex(jobGenParams.getIndexName(), dataSourceId);
                        INodeDomain storageDomain = mp.findNodeDomain(dataset.getNodeGroupName());
                        if (dsi == null) {
                            throw new AlgebricksException("Could not find index " + jobGenParams.getIndexName() + " for dataset " + dataSourceId);
                        }
                        IndexType indexType = jobGenParams.getIndexType();
                        boolean requiresBroadcast = jobGenParams.getRequiresBroadcast();
                        switch(indexType) {
                            case BTREE:
                                {
                                    BTreeJobGenParams btreeJobGenParams = new BTreeJobGenParams();
                                    btreeJobGenParams.readFromFuncArgs(f.getArguments());
                                    op.setPhysicalOperator(new BTreeSearchPOperator(dsi, storageDomain, requiresBroadcast, btreeJobGenParams.isPrimaryIndex(), btreeJobGenParams.isEqCondition(), btreeJobGenParams.getLowKeyVarList(), btreeJobGenParams.getHighKeyVarList()));
                                    break;
                                }
                            case RTREE:
                                {
                                    op.setPhysicalOperator(new RTreeSearchPOperator(dsi, storageDomain, requiresBroadcast));
                                    break;
                                }
                            case SINGLE_PARTITION_WORD_INVIX:
                            case SINGLE_PARTITION_NGRAM_INVIX:
                                {
                                    op.setPhysicalOperator(new InvertedIndexPOperator(dsi, storageDomain, requiresBroadcast, false));
                                    break;
                                }
                            case LENGTH_PARTITIONED_WORD_INVIX:
                            case LENGTH_PARTITIONED_NGRAM_INVIX:
                                {
                                    op.setPhysicalOperator(new InvertedIndexPOperator(dsi, storageDomain, requiresBroadcast, true));
                                    break;
                                }
                            default:
                                {
                                    throw new NotImplementedException(indexType + " indexes are not implemented.");
                                }
                        }
                    }
                    break;
                }
        }
    }
    if (op.hasNestedPlans()) {
        AbstractOperatorWithNestedPlans nested = (AbstractOperatorWithNestedPlans) op;
        for (ILogicalPlan p : nested.getNestedPlans()) {
            setPhysicalOperators(p, context);
        }
    }
    for (Mutable<ILogicalOperator> opRef : op.getInputs()) {
        computeDefaultPhysicalOp((AbstractLogicalOperator) opRef.getValue(), context);
    }
}
Also used : IMergeAggregationExpressionFactory(org.apache.hyracks.algebricks.core.algebra.expressions.IMergeAggregationExpressionFactory) PreclusteredGroupByPOperator(org.apache.hyracks.algebricks.core.algebra.operators.physical.PreclusteredGroupByPOperator) NotImplementedException(org.apache.hyracks.algebricks.common.exceptions.NotImplementedException) ArrayList(java.util.ArrayList) LeftOuterJoinOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.LeftOuterJoinOperator) InnerJoinOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator) AccessMethodJobGenParams(org.apache.asterix.optimizer.rules.am.AccessMethodJobGenParams) AggregateOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator) IDataSourceIndex(org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex) ArrayList(java.util.ArrayList) List(java.util.List) IndexType(org.apache.asterix.common.config.DatasetConfig.IndexType) PhysicalOptimizationConfig(org.apache.hyracks.algebricks.core.rewriter.base.PhysicalOptimizationConfig) Pair(org.apache.hyracks.algebricks.common.utils.Pair) AbstractOperatorWithNestedPlans(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractOperatorWithNestedPlans) LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) AggregateFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression) GroupByOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator) AbstractLogicalOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator) RTreeSearchPOperator(org.apache.asterix.algebra.operators.physical.RTreeSearchPOperator) Dataset(org.apache.asterix.metadata.entities.Dataset) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) AbstractFunctionCallExpression(org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression) BTreeJobGenParams(org.apache.asterix.optimizer.rules.am.BTreeJobGenParams) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) INodeDomain(org.apache.hyracks.algebricks.core.algebra.properties.INodeDomain) InvertedIndexPOperator(org.apache.asterix.algebra.operators.physical.InvertedIndexPOperator) BTreeSearchPOperator(org.apache.asterix.algebra.operators.physical.BTreeSearchPOperator) Mutable(org.apache.commons.lang3.mutable.Mutable) FunctionIdentifier(org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) MetadataProvider(org.apache.asterix.metadata.declared.MetadataProvider) VariableReferenceExpression(org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression) ILogicalPlan(org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan) DataSourceId(org.apache.asterix.metadata.declared.DataSourceId) ExternalGroupByPOperator(org.apache.hyracks.algebricks.core.algebra.operators.physical.ExternalGroupByPOperator)

Example 5 with InnerJoinOperator

use of org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator in project asterixdb by apache.

the class InvertedIndexAccessMethod method applyJoinPlanTransformation.

@Override
public boolean applyJoinPlanTransformation(Mutable<ILogicalOperator> joinRef, OptimizableOperatorSubTree leftSubTree, OptimizableOperatorSubTree rightSubTree, Index chosenIndex, AccessMethodAnalysisContext analysisCtx, IOptimizationContext context, boolean isLeftOuterJoin, boolean hasGroupBy) throws AlgebricksException {
    // Figure out if the index is applicable on the left or right side (if both, we arbitrarily prefer the left side).
    Dataset dataset = analysisCtx.getDatasetFromIndexDatasetMap(chosenIndex);
    OptimizableOperatorSubTree indexSubTree;
    OptimizableOperatorSubTree probeSubTree;
    // The following is just a sanity check.
    if (rightSubTree.hasDataSourceScan() && dataset.getDatasetName().equals(rightSubTree.getDataset().getDatasetName())) {
        indexSubTree = rightSubTree;
        probeSubTree = leftSubTree;
    } else {
        return false;
    }
    IOptimizableFuncExpr optFuncExpr = AccessMethodUtils.chooseFirstOptFuncExpr(chosenIndex, analysisCtx);
    // if the dataset of index subtree and the dataset of first argument's subtree is the same
    if (optFuncExpr.getFuncExpr().getFunctionIdentifier() == BuiltinFunctions.EDIT_DISTANCE_CONTAINS && optFuncExpr.getOperatorSubTree(0).getDataset() != null && !optFuncExpr.getOperatorSubTree(0).getDataset().getDatasetName().equals(indexSubTree.getDataset().getDatasetName())) {
        return false;
    }
    //if LOJ, reset null place holder variable
    LogicalVariable newNullPlaceHolderVar = null;
    if (isLeftOuterJoin && hasGroupBy) {
        //get a new null place holder variable that is the first field variable of the primary key
        //from the indexSubTree's datasourceScanOp
        newNullPlaceHolderVar = indexSubTree.getDataSourceVariables().get(0);
        //reset the null place holder variable
        AccessMethodUtils.resetLOJNullPlaceholderVariableInGroupByOp(analysisCtx, newNullPlaceHolderVar, context);
    }
    AbstractBinaryJoinOperator join = (AbstractBinaryJoinOperator) joinRef.getValue();
    // Remember the original probe subtree, and its primary-key variables,
    // so we can later retrieve the missing attributes via an equi join.
    List<LogicalVariable> originalSubTreePKs = new ArrayList<>();
    // Remember the primary-keys of the new probe subtree for the top-level equi join.
    List<LogicalVariable> surrogateSubTreePKs = new ArrayList<>();
    // Copy probe subtree, replacing their variables with new ones. We will use the original variables
    // to stitch together a top-level equi join.
    Mutable<ILogicalOperator> originalProbeSubTreeRootRef = copyAndReinitProbeSubTree(probeSubTree, join.getCondition().getValue(), optFuncExpr, originalSubTreePKs, surrogateSubTreePKs, context);
    // Remember original live variables from the index sub tree.
    List<LogicalVariable> indexSubTreeLiveVars = new ArrayList<>();
    VariableUtilities.getLiveVariables(indexSubTree.getRoot(), indexSubTreeLiveVars);
    // Clone the original join condition because we may have to modify it (and we also need the original).
    ILogicalExpression joinCond = join.getCondition().getValue().cloneExpression();
    // Create "panic" (non indexed) nested-loop join path if necessary.
    Mutable<ILogicalOperator> panicJoinRef = null;
    Map<LogicalVariable, LogicalVariable> panicVarMap = null;
    if (optFuncExpr.getFuncExpr().getFunctionIdentifier() == BuiltinFunctions.EDIT_DISTANCE_CHECK || optFuncExpr.getFuncExpr().getFunctionIdentifier() == BuiltinFunctions.EDIT_DISTANCE_CONTAINS) {
        panicJoinRef = new MutableObject<>(joinRef.getValue());
        panicVarMap = new HashMap<>();
        Mutable<ILogicalOperator> newProbeRootRef = createPanicNestedLoopJoinPlan(panicJoinRef, indexSubTree, probeSubTree, optFuncExpr, chosenIndex, panicVarMap, context);
        probeSubTree.getRootRef().setValue(newProbeRootRef.getValue());
        probeSubTree.setRoot(newProbeRootRef.getValue());
    }
    // Create regular indexed-nested loop join path.
    ILogicalOperator indexPlanRootOp = createSecondaryToPrimaryPlan(null, indexSubTree, probeSubTree, chosenIndex, analysisCtx, true, isLeftOuterJoin, true, context);
    indexSubTree.getDataSourceRef().setValue(indexPlanRootOp);
    // Change join into a select with the same condition.
    SelectOperator topSelect = new SelectOperator(new MutableObject<ILogicalExpression>(joinCond), isLeftOuterJoin, newNullPlaceHolderVar);
    topSelect.getInputs().add(indexSubTree.getRootRef());
    topSelect.setExecutionMode(ExecutionMode.LOCAL);
    context.computeAndSetTypeEnvironmentForOperator(topSelect);
    ILogicalOperator topOp = topSelect;
    // Hook up the indexed-nested loop join path with the "panic" (non indexed) nested-loop join path by putting a union all on top.
    if (panicJoinRef != null) {
        LogicalVariable inputSearchVar = getInputSearchVar(optFuncExpr, indexSubTree);
        indexSubTreeLiveVars.addAll(originalSubTreePKs);
        indexSubTreeLiveVars.add(inputSearchVar);
        List<LogicalVariable> panicPlanLiveVars = new ArrayList<>();
        VariableUtilities.getLiveVariables(panicJoinRef.getValue(), panicPlanLiveVars);
        // Create variable mapping for union all operator.
        List<Triple<LogicalVariable, LogicalVariable, LogicalVariable>> varMap = new ArrayList<>();
        for (int i = 0; i < indexSubTreeLiveVars.size(); i++) {
            LogicalVariable indexSubTreeVar = indexSubTreeLiveVars.get(i);
            LogicalVariable panicPlanVar = panicVarMap.get(indexSubTreeVar);
            if (panicPlanVar == null) {
                panicPlanVar = indexSubTreeVar;
            }
            varMap.add(new Triple<LogicalVariable, LogicalVariable, LogicalVariable>(indexSubTreeVar, panicPlanVar, indexSubTreeVar));
        }
        UnionAllOperator unionAllOp = new UnionAllOperator(varMap);
        unionAllOp.getInputs().add(new MutableObject<ILogicalOperator>(topOp));
        unionAllOp.getInputs().add(panicJoinRef);
        unionAllOp.setExecutionMode(ExecutionMode.PARTITIONED);
        context.computeAndSetTypeEnvironmentForOperator(unionAllOp);
        topOp = unionAllOp;
    }
    // Place a top-level equi-join on top to retrieve the missing variables from the original probe subtree.
    // The inner (build) branch of the join is the subtree with the data scan, since the result of the similarity join could potentially be big.
    // This choice may not always be the most efficient, but it seems more robust than the alternative.
    Mutable<ILogicalExpression> eqJoinConditionRef = createPrimaryKeysEqJoinCondition(originalSubTreePKs, surrogateSubTreePKs);
    InnerJoinOperator topEqJoin = new InnerJoinOperator(eqJoinConditionRef, originalProbeSubTreeRootRef, new MutableObject<ILogicalOperator>(topOp));
    topEqJoin.setExecutionMode(ExecutionMode.PARTITIONED);
    joinRef.setValue(topEqJoin);
    context.computeAndSetTypeEnvironmentForOperator(topEqJoin);
    return true;
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) Dataset(org.apache.asterix.metadata.entities.Dataset) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) ArrayList(java.util.ArrayList) InnerJoinOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator) AbstractBinaryJoinOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator) Triple(org.apache.hyracks.algebricks.common.utils.Triple) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) SelectOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator) UnionAllOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.UnionAllOperator)

Aggregations

InnerJoinOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator)21 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)15 ILogicalExpression (org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression)13 LogicalVariable (org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable)12 AbstractLogicalOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator)10 Mutable (org.apache.commons.lang3.mutable.Mutable)9 ArrayList (java.util.ArrayList)8 HashSet (java.util.HashSet)6 ILogicalPlan (org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan)6 MutableObject (org.apache.commons.lang3.mutable.MutableObject)5 AbstractBinaryJoinOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator)5 EmptyTupleSourceOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator)5 Pair (org.apache.hyracks.algebricks.common.utils.Pair)4 SelectOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator)4 SubplanOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator)4 VariableReferenceExpression (org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression)3 GroupByOperator (org.apache.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator)3 MetadataProvider (org.apache.asterix.metadata.declared.MetadataProvider)2 Dataset (org.apache.asterix.metadata.entities.Dataset)2 AsterixConstantValue (org.apache.asterix.om.constants.AsterixConstantValue)2