Search in sources :

Example 1 with MergePhase

use of io.crate.execution.dsl.phases.MergePhase in project crate by crate.

the class HashAggregate method build.

@Override
public ExecutionPlan build(PlannerContext plannerContext, Set<PlanHint> planHints, ProjectionBuilder projectionBuilder, int limit, int offset, @Nullable OrderBy order, @Nullable Integer pageSizeHint, Row params, SubQueryResults subQueryResults) {
    ExecutionPlan executionPlan = source.build(plannerContext, planHints, projectionBuilder, LogicalPlanner.NO_LIMIT, 0, null, null, params, subQueryResults);
    AggregationOutputValidator.validateOutputs(aggregates);
    var paramBinder = new SubQueryAndParamBinder(params, subQueryResults);
    var sourceOutputs = source.outputs();
    if (executionPlan.resultDescription().hasRemainingLimitOrOffset()) {
        executionPlan = Merge.ensureOnHandler(executionPlan, plannerContext);
    }
    if (ExecutionPhases.executesOnHandler(plannerContext.handlerNode(), executionPlan.resultDescription().nodeIds())) {
        if (source.preferShardProjections()) {
            executionPlan.addProjection(projectionBuilder.aggregationProjection(sourceOutputs, aggregates, paramBinder, AggregateMode.ITER_PARTIAL, RowGranularity.SHARD, plannerContext.transactionContext().sessionContext().searchPath()));
            executionPlan.addProjection(projectionBuilder.aggregationProjection(aggregates, aggregates, paramBinder, AggregateMode.PARTIAL_FINAL, RowGranularity.CLUSTER, plannerContext.transactionContext().sessionContext().searchPath()));
            return executionPlan;
        }
        AggregationProjection fullAggregation = projectionBuilder.aggregationProjection(sourceOutputs, aggregates, paramBinder, AggregateMode.ITER_FINAL, RowGranularity.CLUSTER, plannerContext.transactionContext().sessionContext().searchPath());
        executionPlan.addProjection(fullAggregation);
        return executionPlan;
    }
    AggregationProjection toPartial = projectionBuilder.aggregationProjection(sourceOutputs, aggregates, paramBinder, AggregateMode.ITER_PARTIAL, source.preferShardProjections() ? RowGranularity.SHARD : RowGranularity.NODE, plannerContext.transactionContext().sessionContext().searchPath());
    executionPlan.addProjection(toPartial);
    AggregationProjection toFinal = projectionBuilder.aggregationProjection(aggregates, aggregates, paramBinder, AggregateMode.PARTIAL_FINAL, RowGranularity.CLUSTER, plannerContext.transactionContext().sessionContext().searchPath());
    return new Merge(executionPlan, new MergePhase(plannerContext.jobId(), plannerContext.nextExecutionPhaseId(), MERGE_PHASE_NAME, executionPlan.resultDescription().nodeIds().size(), 1, Collections.singletonList(plannerContext.handlerNode()), executionPlan.resultDescription().streamOutputs(), Collections.singletonList(toFinal), DistributionInfo.DEFAULT_BROADCAST, null), LogicalPlanner.NO_LIMIT, 0, aggregates.size(), 1, null);
}
Also used : ExecutionPlan(io.crate.planner.ExecutionPlan) MergePhase(io.crate.execution.dsl.phases.MergePhase) Merge(io.crate.planner.Merge) AggregationProjection(io.crate.execution.dsl.projection.AggregationProjection)

Example 2 with MergePhase

use of io.crate.execution.dsl.phases.MergePhase in project crate by crate.

the class Count method build.

@Override
public ExecutionPlan build(PlannerContext plannerContext, Set<PlanHint> planHints, ProjectionBuilder projectionBuilder, int limit, int offset, @Nullable OrderBy order, @Nullable Integer pageSizeHint, Row params, SubQueryResults subQueryResults) {
    var normalizer = new EvaluatingNormalizer(plannerContext.nodeContext(), RowGranularity.CLUSTER, null, tableRelation);
    var binder = new SubQueryAndParamBinder(params, subQueryResults).andThen(x -> normalizer.normalize(x, plannerContext.transactionContext()));
    // bind all parameters and possible subQuery values and re-analyze the query
    // (could result in a NO_MATCH, routing could've changed, etc).
    WhereClause boundWhere = WhereClauseAnalyzer.resolvePartitions(where.map(binder), tableRelation, plannerContext.transactionContext(), plannerContext.nodeContext());
    Routing routing = plannerContext.allocateRouting(tableRelation.tableInfo(), boundWhere, RoutingProvider.ShardSelection.ANY, plannerContext.transactionContext().sessionContext());
    CountPhase countPhase = new CountPhase(plannerContext.nextExecutionPhaseId(), routing, Optimizer.optimizeCasts(boundWhere.queryOrFallback(), plannerContext), DistributionInfo.DEFAULT_BROADCAST);
    MergePhase mergePhase = new MergePhase(plannerContext.jobId(), plannerContext.nextExecutionPhaseId(), COUNT_PHASE_NAME, countPhase.nodeIds().size(), 1, Collections.singletonList(plannerContext.handlerNode()), Collections.singletonList(DataTypes.LONG), Collections.singletonList(MergeCountProjection.INSTANCE), DistributionInfo.DEFAULT_BROADCAST, null);
    return new CountPlan(countPhase, mergePhase);
}
Also used : MergePhase(io.crate.execution.dsl.phases.MergePhase) EvaluatingNormalizer(io.crate.expression.eval.EvaluatingNormalizer) WhereClause(io.crate.analyze.WhereClause) Routing(io.crate.metadata.Routing) CountPhase(io.crate.execution.dsl.phases.CountPhase) CountPlan(io.crate.planner.node.dql.CountPlan)

Example 3 with MergePhase

use of io.crate.execution.dsl.phases.MergePhase in project crate by crate.

the class HashJoin method build.

@Override
public ExecutionPlan build(PlannerContext plannerContext, Set<PlanHint> hints, ProjectionBuilder projectionBuilder, int limit, int offset, @Nullable OrderBy order, @Nullable Integer pageSizeHint, Row params, SubQueryResults subQueryResults) {
    ExecutionPlan leftExecutionPlan = lhs.build(plannerContext, hints, projectionBuilder, NO_LIMIT, 0, null, null, params, subQueryResults);
    ExecutionPlan rightExecutionPlan = rhs.build(plannerContext, hints, projectionBuilder, NO_LIMIT, 0, null, null, params, subQueryResults);
    LogicalPlan leftLogicalPlan = lhs;
    LogicalPlan rightLogicalPlan = rhs;
    boolean tablesSwitched = false;
    // revealed that this improves performance in most cases.
    if (lhs.numExpectedRows() < rhs.numExpectedRows()) {
        tablesSwitched = true;
        leftLogicalPlan = rhs;
        rightLogicalPlan = lhs;
        ExecutionPlan tmp = leftExecutionPlan;
        leftExecutionPlan = rightExecutionPlan;
        rightExecutionPlan = tmp;
    }
    SubQueryAndParamBinder paramBinder = new SubQueryAndParamBinder(params, subQueryResults);
    Tuple<List<Symbol>, List<Symbol>> hashSymbols = extractHashJoinSymbolsFromJoinSymbolsAndSplitPerSide(tablesSwitched);
    ResultDescription leftResultDesc = leftExecutionPlan.resultDescription();
    ResultDescription rightResultDesc = rightExecutionPlan.resultDescription();
    Collection<String> joinExecutionNodes = leftResultDesc.nodeIds();
    List<Symbol> leftOutputs = leftLogicalPlan.outputs();
    List<Symbol> rightOutputs = rightLogicalPlan.outputs();
    MergePhase leftMerge = null;
    MergePhase rightMerge = null;
    // We can only run the join distributed if no remaining limit or offset must be applied on the source relations.
    // Because on distributed joins, every join is running on a slice (modulo) set of the data and so no limit/offset
    // could be applied. Limit/offset can only be applied on the whole data set after all partial rows from the
    // shards are merged
    boolean isDistributed = leftResultDesc.hasRemainingLimitOrOffset() == false && rightResultDesc.hasRemainingLimitOrOffset() == false;
    if (joinExecutionNodes.isEmpty()) {
        // The left source might have zero execution nodes, for example in the case of `sys.shards` without any tables
        // If the join then also uses zero execution nodes, a distributed plan no longer works because
        // the source operators wouldn't have a downstream node where they can send the results to.
        // → we switch to non-distributed which results in the join running on the handlerNode.
        isDistributed = false;
    }
    if (joinExecutionNodes.size() == 1 && joinExecutionNodes.equals(rightResultDesc.nodeIds()) && !rightResultDesc.hasRemainingLimitOrOffset()) {
        // If the left and the right plan are executed on the same single node the mergePhase
        // should be omitted. This is the case if the left and right table have only one shards which
        // are on the same node
        leftExecutionPlan.setDistributionInfo(DistributionInfo.DEFAULT_SAME_NODE);
        rightExecutionPlan.setDistributionInfo(DistributionInfo.DEFAULT_SAME_NODE);
    } else {
        if (isDistributed) {
            // Run the join distributed by modulo distribution algorithm
            leftOutputs = setModuloDistribution(Lists2.map(hashSymbols.v1(), paramBinder), leftLogicalPlan.outputs(), leftExecutionPlan);
            rightOutputs = setModuloDistribution(Lists2.map(hashSymbols.v2(), paramBinder), rightLogicalPlan.outputs(), rightExecutionPlan);
        } else {
            // Run the join non-distributed on the handler node
            joinExecutionNodes = Collections.singletonList(plannerContext.handlerNode());
            leftExecutionPlan.setDistributionInfo(DistributionInfo.DEFAULT_BROADCAST);
            rightExecutionPlan.setDistributionInfo(DistributionInfo.DEFAULT_BROADCAST);
        }
        leftMerge = JoinOperations.buildMergePhaseForJoin(plannerContext, leftResultDesc, joinExecutionNodes);
        rightMerge = JoinOperations.buildMergePhaseForJoin(plannerContext, rightResultDesc, joinExecutionNodes);
    }
    List<Symbol> joinOutputs = Lists2.concat(leftOutputs, rightOutputs);
    HashJoinPhase joinPhase = new HashJoinPhase(plannerContext.jobId(), plannerContext.nextExecutionPhaseId(), "hash-join", Collections.singletonList(JoinOperations.createJoinProjection(outputs, joinOutputs)), leftMerge, rightMerge, leftOutputs.size(), rightOutputs.size(), joinExecutionNodes, InputColumns.create(paramBinder.apply(joinCondition), joinOutputs), InputColumns.create(Lists2.map(hashSymbols.v1(), paramBinder), new InputColumns.SourceSymbols(leftOutputs)), InputColumns.create(Lists2.map(hashSymbols.v2(), paramBinder), new InputColumns.SourceSymbols(rightOutputs)), Symbols.typeView(leftOutputs), leftLogicalPlan.estimatedRowSize(), leftLogicalPlan.numExpectedRows());
    return new Join(joinPhase, leftExecutionPlan, rightExecutionPlan, TopN.NO_LIMIT, 0, TopN.NO_LIMIT, outputs.size(), null);
}
Also used : HashJoinPhase(io.crate.execution.dsl.phases.HashJoinPhase) SelectSymbol(io.crate.expression.symbol.SelectSymbol) Symbol(io.crate.expression.symbol.Symbol) Join(io.crate.planner.node.dql.join.Join) ExecutionPlan(io.crate.planner.ExecutionPlan) MergePhase(io.crate.execution.dsl.phases.MergePhase) ResultDescription(io.crate.planner.ResultDescription) ArrayList(java.util.ArrayList) List(java.util.List)

Example 4 with MergePhase

use of io.crate.execution.dsl.phases.MergePhase in project crate by crate.

the class WindowAgg method build.

@Override
public ExecutionPlan build(PlannerContext plannerContext, Set<PlanHint> planHints, ProjectionBuilder projectionBuilder, int limit, int offset, @Nullable OrderBy order, @Nullable Integer pageSizeHint, Row params, SubQueryResults subQueryResults) {
    InputColumns.SourceSymbols sourceSymbols = new InputColumns.SourceSymbols(source.outputs());
    SubQueryAndParamBinder binder = new SubQueryAndParamBinder(params, subQueryResults);
    Function<Symbol, Symbol> toInputCols = binder.andThen(s -> InputColumns.create(s, sourceSymbols));
    List<WindowFunction> boundWindowFunctions = (List<WindowFunction>) (List) Lists2.map(windowFunctions, toInputCols);
    List<Projection> projections = new ArrayList<>();
    WindowAggProjection windowAggProjection = new WindowAggProjection(windowDefinition.map(toInputCols), boundWindowFunctions, InputColumns.create(this.standalone, sourceSymbols));
    projections.add(windowAggProjection);
    ExecutionPlan sourcePlan = source.build(plannerContext, planHints, projectionBuilder, TopN.NO_LIMIT, TopN.NO_OFFSET, null, pageSizeHint, params, subQueryResults);
    ResultDescription resultDescription = sourcePlan.resultDescription();
    boolean executesOnHandler = executesOnHandler(plannerContext.handlerNode(), resultDescription.nodeIds());
    boolean nonDistExecution = windowDefinition.partitions().isEmpty() || resultDescription.hasRemainingLimitOrOffset() || executesOnHandler;
    if (nonDistExecution) {
        sourcePlan = Merge.ensureOnHandler(sourcePlan, plannerContext);
        for (Projection projection : projections) {
            sourcePlan.addProjection(projection);
        }
    } else {
        sourcePlan.setDistributionInfo(new DistributionInfo(DistributionType.MODULO, source.outputs().indexOf(windowDefinition.partitions().iterator().next())));
        MergePhase distWindowAgg = new MergePhase(UUIDs.dirtyUUID(), plannerContext.nextExecutionPhaseId(), "distWindowAgg", resultDescription.nodeIds().size(), resultDescription.numOutputs(), resultDescription.nodeIds(), resultDescription.streamOutputs(), projections, DistributionInfo.DEFAULT_BROADCAST, null);
        return new Merge(sourcePlan, distWindowAgg, TopN.NO_LIMIT, TopN.NO_OFFSET, windowAggProjection.outputs().size(), resultDescription.maxRowsPerNode(), null);
    }
    return sourcePlan;
}
Also used : InputColumns(io.crate.execution.dsl.projection.builder.InputColumns) Symbol(io.crate.expression.symbol.Symbol) ArrayList(java.util.ArrayList) Projection(io.crate.execution.dsl.projection.Projection) WindowAggProjection(io.crate.execution.dsl.projection.WindowAggProjection) DistributionInfo(io.crate.planner.distribution.DistributionInfo) WindowFunction(io.crate.expression.symbol.WindowFunction) ExecutionPlan(io.crate.planner.ExecutionPlan) MergePhase(io.crate.execution.dsl.phases.MergePhase) Merge(io.crate.planner.Merge) ResultDescription(io.crate.planner.ResultDescription) WindowAggProjection(io.crate.execution.dsl.projection.WindowAggProjection) ArrayList(java.util.ArrayList) List(java.util.List)

Example 5 with MergePhase

use of io.crate.execution.dsl.phases.MergePhase in project crate by crate.

the class ExecutionPhasesRootTaskTest method testGroupByServer.

@Test
public void testGroupByServer() throws Exception {
    var routingMap = new TreeMap<String, Map<String, IntIndexedContainer>>();
    routingMap.put("node1", Map.of("t1", IntArrayList.from(1, 2)));
    routingMap.put("node2", Map.of("t1", IntArrayList.from(3, 4)));
    Routing twoNodeRouting = new Routing(routingMap);
    UUID jobId = UUID.randomUUID();
    RoutedCollectPhase c1 = new RoutedCollectPhase(jobId, 1, "c1", twoNodeRouting, RowGranularity.DOC, List.of(), List.of(), WhereClause.MATCH_ALL.queryOrFallback(), DistributionInfo.DEFAULT_BROADCAST);
    MergePhase m1 = new MergePhase(jobId, 2, "merge1", 2, 1, Set.of("node3", "node4"), List.of(), List.of(), DistributionInfo.DEFAULT_BROADCAST, null);
    MergePhase m2 = new MergePhase(jobId, 3, "merge2", 2, 1, Set.of("node1", "node3"), List.of(), List.of(), DistributionInfo.DEFAULT_BROADCAST, null);
    NodeOperation n1 = NodeOperation.withDownstream(c1, m1, (byte) 0);
    NodeOperation n2 = NodeOperation.withDownstream(m1, m2, (byte) 0);
    NodeOperation n3 = NodeOperation.withDownstream(m2, mock(ExecutionPhase.class), (byte) 0);
    Map<String, Collection<NodeOperation>> groupByServer = NodeOperationGrouper.groupByServer(List.of(n1, n2, n3));
    assertThat(groupByServer.containsKey("node1"), is(true));
    assertThat(groupByServer.get("node1"), Matchers.containsInAnyOrder(n1, n3));
    assertThat(groupByServer.containsKey("node2"), is(true));
    assertThat(groupByServer.get("node2"), Matchers.containsInAnyOrder(n1));
    assertThat(groupByServer.containsKey("node3"), is(true));
    assertThat(groupByServer.get("node3"), Matchers.containsInAnyOrder(n2, n3));
    assertThat(groupByServer.containsKey("node4"), is(true));
    assertThat(groupByServer.get("node4"), Matchers.containsInAnyOrder(n2));
}
Also used : MergePhase(io.crate.execution.dsl.phases.MergePhase) Routing(io.crate.metadata.Routing) Collection(java.util.Collection) IntIndexedContainer(com.carrotsearch.hppc.IntIndexedContainer) NodeOperation(io.crate.execution.dsl.phases.NodeOperation) ExecutionPhase(io.crate.execution.dsl.phases.ExecutionPhase) TreeMap(java.util.TreeMap) UUID(java.util.UUID) RoutedCollectPhase(io.crate.execution.dsl.phases.RoutedCollectPhase) Test(org.junit.Test)

Aggregations

MergePhase (io.crate.execution.dsl.phases.MergePhase)35 Test (org.junit.Test)26 CrateDummyClusterServiceUnitTest (io.crate.test.integration.CrateDummyClusterServiceUnitTest)24 RandomizedTest (com.carrotsearch.randomizedtesting.RandomizedTest)17 RoutedCollectPhase (io.crate.execution.dsl.phases.RoutedCollectPhase)16 Merge (io.crate.planner.Merge)15 Collect (io.crate.planner.node.dql.Collect)14 GroupProjection (io.crate.execution.dsl.projection.GroupProjection)13 EvalProjection (io.crate.execution.dsl.projection.EvalProjection)12 InputColumn (io.crate.expression.symbol.InputColumn)12 Symbol (io.crate.expression.symbol.Symbol)10 OrderedTopNProjection (io.crate.execution.dsl.projection.OrderedTopNProjection)9 Projection (io.crate.execution.dsl.projection.Projection)9 TopNProjection (io.crate.execution.dsl.projection.TopNProjection)9 FilterProjection (io.crate.execution.dsl.projection.FilterProjection)8 ColumnIndexWriterProjection (io.crate.execution.dsl.projection.ColumnIndexWriterProjection)6 MergeCountProjection (io.crate.execution.dsl.projection.MergeCountProjection)5 ExecutionPlan (io.crate.planner.ExecutionPlan)4 ResultDescription (io.crate.planner.ResultDescription)4 Routing (io.crate.metadata.Routing)3