Search in sources :

Example 1 with NestedLoopPhase

use of io.crate.execution.dsl.phases.NestedLoopPhase in project crate by crate.

the class NestedLoopJoin method build.

@Override
public ExecutionPlan build(PlannerContext plannerContext, Set<PlanHint> hints, ProjectionBuilder projectionBuilder, int limit, int offset, @Nullable OrderBy order, @Nullable Integer pageSizeHint, Row params, SubQueryResults subQueryResults) {
    /*
         * Benchmarks reveal that if rows are filtered out distributed execution gives better performance.
         * Therefore if `filterNeeded` is true (there is joinCondition or a filtering after the join operation)
         * then it's a good indication that distributed execution will be faster.
         *
         * We may at some point add some kind of session-settings to override this behaviour
         * or otherwise come up with a better heuristic.
         */
    Integer childPageSizeHint = !isFiltered && limit != TopN.NO_LIMIT ? limitAndOffset(limit, offset) : null;
    ExecutionPlan left = lhs.build(plannerContext, hints, projectionBuilder, NO_LIMIT, 0, null, childPageSizeHint, params, subQueryResults);
    ExecutionPlan right = rhs.build(plannerContext, hints, projectionBuilder, NO_LIMIT, 0, null, childPageSizeHint, params, subQueryResults);
    PositionalOrderBy orderByFromLeft = left.resultDescription().orderBy();
    boolean hasDocTables = baseTables.stream().anyMatch(r -> r instanceof DocTableRelation);
    boolean isDistributed = hasDocTables && isFiltered && !joinType.isOuter();
    LogicalPlan leftLogicalPlan = lhs;
    LogicalPlan rightLogicalPlan = rhs;
    isDistributed = isDistributed && (!left.resultDescription().nodeIds().isEmpty() && !right.resultDescription().nodeIds().isEmpty());
    boolean blockNlPossible = !isDistributed && isBlockNlPossible(left, right);
    JoinType joinType = this.joinType;
    if (!orderByWasPushedDown && joinType.supportsInversion() && (isDistributed && lhs.numExpectedRows() < rhs.numExpectedRows() && orderByFromLeft == null) || (blockNlPossible && lhs.numExpectedRows() > rhs.numExpectedRows())) {
        // 1) The right side is always broadcast-ed, so for performance reasons we switch the tables so that
        // the right table is the smaller (numOfRows). If left relation has a pushed-down OrderBy that needs
        // to be preserved, then the switch is not possible.
        // 2) For block nested loop, the left side should always be smaller. Benchmarks have shown that the
        // performance decreases if the left side is much larger and no limit is applied.
        ExecutionPlan tmpExecutionPlan = left;
        left = right;
        right = tmpExecutionPlan;
        leftLogicalPlan = rhs;
        rightLogicalPlan = lhs;
        joinType = joinType.invert();
    }
    Tuple<Collection<String>, List<MergePhase>> joinExecutionNodesAndMergePhases = configureExecution(left, right, plannerContext, isDistributed);
    List<Symbol> joinOutputs = Lists2.concat(leftLogicalPlan.outputs(), rightLogicalPlan.outputs());
    SubQueryAndParamBinder paramBinder = new SubQueryAndParamBinder(params, subQueryResults);
    Symbol joinInput = null;
    if (joinCondition != null) {
        joinInput = InputColumns.create(paramBinder.apply(joinCondition), joinOutputs);
    }
    NestedLoopPhase nlPhase = new NestedLoopPhase(plannerContext.jobId(), plannerContext.nextExecutionPhaseId(), isDistributed ? "distributed-nested-loop" : "nested-loop", Collections.singletonList(JoinOperations.createJoinProjection(outputs, joinOutputs)), joinExecutionNodesAndMergePhases.v2().get(0), joinExecutionNodesAndMergePhases.v2().get(1), leftLogicalPlan.outputs().size(), rightLogicalPlan.outputs().size(), joinExecutionNodesAndMergePhases.v1(), joinType, joinInput, Symbols.typeView(leftLogicalPlan.outputs()), leftLogicalPlan.estimatedRowSize(), leftLogicalPlan.numExpectedRows(), blockNlPossible);
    return new Join(nlPhase, left, right, TopN.NO_LIMIT, 0, TopN.NO_LIMIT, outputs.size(), orderByFromLeft);
}
Also used : SelectSymbol(io.crate.expression.symbol.SelectSymbol) Symbol(io.crate.expression.symbol.Symbol) JoinType(io.crate.planner.node.dql.join.JoinType) Join(io.crate.planner.node.dql.join.Join) PositionalOrderBy(io.crate.planner.PositionalOrderBy) ExecutionPlan(io.crate.planner.ExecutionPlan) DocTableRelation(io.crate.analyze.relations.DocTableRelation) Collection(java.util.Collection) List(java.util.List) NestedLoopPhase(io.crate.execution.dsl.phases.NestedLoopPhase)

Example 2 with NestedLoopPhase

use of io.crate.execution.dsl.phases.NestedLoopPhase in project crate by crate.

the class JoinPhaseTest method testNestedLoopSerialization.

@Test
public void testNestedLoopSerialization() throws Exception {
    NestedLoopPhase node = new NestedLoopPhase(jobId, 1, "nestedLoop", List.of(topNProjection), mp1, mp2, 2, 3, Set.of("node1", "node2"), JoinType.FULL, joinCondition, List.of(DataTypes.LONG, DataTypes.STRING, new ArrayType<>(DataTypes.INTEGER)), 32L, 100_000, true);
    BytesStreamOutput output = new BytesStreamOutput();
    node.writeTo(output);
    StreamInput input = output.bytes().streamInput();
    NestedLoopPhase node2 = new NestedLoopPhase(input);
    assertThat(node.nodeIds(), is(node2.nodeIds()));
    assertThat(node.jobId(), is(node2.jobId()));
    assertThat(node.joinCondition(), is(node2.joinCondition()));
    assertThat(node.type(), is(node2.type()));
    assertThat(node.nodeIds(), is(node2.nodeIds()));
    assertThat(node.jobId(), is(node2.jobId()));
    assertThat(node.name(), is(node2.name()));
    assertThat(node.outputTypes(), is(node2.outputTypes()));
    assertThat(node.joinType(), is(node2.joinType()));
    assertThat(node.joinCondition(), is(node2.joinCondition()));
    assertThat(node.estimatedRowsSizeLeft, is(32L));
    assertThat(node.estimatedNumberOfRowsLeft, is(100_000L));
    assertThat(node.blockNestedLoop, is(true));
}
Also used : ArrayType(io.crate.types.ArrayType) StreamInput(org.elasticsearch.common.io.stream.StreamInput) NestedLoopPhase(io.crate.execution.dsl.phases.NestedLoopPhase) BytesStreamOutput(org.elasticsearch.common.io.stream.BytesStreamOutput) Test(org.junit.Test)

Aggregations

NestedLoopPhase (io.crate.execution.dsl.phases.NestedLoopPhase)2 DocTableRelation (io.crate.analyze.relations.DocTableRelation)1 SelectSymbol (io.crate.expression.symbol.SelectSymbol)1 Symbol (io.crate.expression.symbol.Symbol)1 ExecutionPlan (io.crate.planner.ExecutionPlan)1 PositionalOrderBy (io.crate.planner.PositionalOrderBy)1 Join (io.crate.planner.node.dql.join.Join)1 JoinType (io.crate.planner.node.dql.join.JoinType)1 ArrayType (io.crate.types.ArrayType)1 Collection (java.util.Collection)1 List (java.util.List)1 BytesStreamOutput (org.elasticsearch.common.io.stream.BytesStreamOutput)1 StreamInput (org.elasticsearch.common.io.stream.StreamInput)1 Test (org.junit.Test)1