use of io.crate.execution.dsl.phases.NestedLoopPhase in project crate by crate.
the class NestedLoopJoin method build.
@Override
public ExecutionPlan build(PlannerContext plannerContext, Set<PlanHint> hints, ProjectionBuilder projectionBuilder, int limit, int offset, @Nullable OrderBy order, @Nullable Integer pageSizeHint, Row params, SubQueryResults subQueryResults) {
/*
* Benchmarks reveal that if rows are filtered out distributed execution gives better performance.
* Therefore if `filterNeeded` is true (there is joinCondition or a filtering after the join operation)
* then it's a good indication that distributed execution will be faster.
*
* We may at some point add some kind of session-settings to override this behaviour
* or otherwise come up with a better heuristic.
*/
Integer childPageSizeHint = !isFiltered && limit != TopN.NO_LIMIT ? limitAndOffset(limit, offset) : null;
ExecutionPlan left = lhs.build(plannerContext, hints, projectionBuilder, NO_LIMIT, 0, null, childPageSizeHint, params, subQueryResults);
ExecutionPlan right = rhs.build(plannerContext, hints, projectionBuilder, NO_LIMIT, 0, null, childPageSizeHint, params, subQueryResults);
PositionalOrderBy orderByFromLeft = left.resultDescription().orderBy();
boolean hasDocTables = baseTables.stream().anyMatch(r -> r instanceof DocTableRelation);
boolean isDistributed = hasDocTables && isFiltered && !joinType.isOuter();
LogicalPlan leftLogicalPlan = lhs;
LogicalPlan rightLogicalPlan = rhs;
isDistributed = isDistributed && (!left.resultDescription().nodeIds().isEmpty() && !right.resultDescription().nodeIds().isEmpty());
boolean blockNlPossible = !isDistributed && isBlockNlPossible(left, right);
JoinType joinType = this.joinType;
if (!orderByWasPushedDown && joinType.supportsInversion() && (isDistributed && lhs.numExpectedRows() < rhs.numExpectedRows() && orderByFromLeft == null) || (blockNlPossible && lhs.numExpectedRows() > rhs.numExpectedRows())) {
// 1) The right side is always broadcast-ed, so for performance reasons we switch the tables so that
// the right table is the smaller (numOfRows). If left relation has a pushed-down OrderBy that needs
// to be preserved, then the switch is not possible.
// 2) For block nested loop, the left side should always be smaller. Benchmarks have shown that the
// performance decreases if the left side is much larger and no limit is applied.
ExecutionPlan tmpExecutionPlan = left;
left = right;
right = tmpExecutionPlan;
leftLogicalPlan = rhs;
rightLogicalPlan = lhs;
joinType = joinType.invert();
}
Tuple<Collection<String>, List<MergePhase>> joinExecutionNodesAndMergePhases = configureExecution(left, right, plannerContext, isDistributed);
List<Symbol> joinOutputs = Lists2.concat(leftLogicalPlan.outputs(), rightLogicalPlan.outputs());
SubQueryAndParamBinder paramBinder = new SubQueryAndParamBinder(params, subQueryResults);
Symbol joinInput = null;
if (joinCondition != null) {
joinInput = InputColumns.create(paramBinder.apply(joinCondition), joinOutputs);
}
NestedLoopPhase nlPhase = new NestedLoopPhase(plannerContext.jobId(), plannerContext.nextExecutionPhaseId(), isDistributed ? "distributed-nested-loop" : "nested-loop", Collections.singletonList(JoinOperations.createJoinProjection(outputs, joinOutputs)), joinExecutionNodesAndMergePhases.v2().get(0), joinExecutionNodesAndMergePhases.v2().get(1), leftLogicalPlan.outputs().size(), rightLogicalPlan.outputs().size(), joinExecutionNodesAndMergePhases.v1(), joinType, joinInput, Symbols.typeView(leftLogicalPlan.outputs()), leftLogicalPlan.estimatedRowSize(), leftLogicalPlan.numExpectedRows(), blockNlPossible);
return new Join(nlPhase, left, right, TopN.NO_LIMIT, 0, TopN.NO_LIMIT, outputs.size(), orderByFromLeft);
}
use of io.crate.execution.dsl.phases.NestedLoopPhase in project crate by crate.
the class JoinPhaseTest method testNestedLoopSerialization.
@Test
public void testNestedLoopSerialization() throws Exception {
NestedLoopPhase node = new NestedLoopPhase(jobId, 1, "nestedLoop", List.of(topNProjection), mp1, mp2, 2, 3, Set.of("node1", "node2"), JoinType.FULL, joinCondition, List.of(DataTypes.LONG, DataTypes.STRING, new ArrayType<>(DataTypes.INTEGER)), 32L, 100_000, true);
BytesStreamOutput output = new BytesStreamOutput();
node.writeTo(output);
StreamInput input = output.bytes().streamInput();
NestedLoopPhase node2 = new NestedLoopPhase(input);
assertThat(node.nodeIds(), is(node2.nodeIds()));
assertThat(node.jobId(), is(node2.jobId()));
assertThat(node.joinCondition(), is(node2.joinCondition()));
assertThat(node.type(), is(node2.type()));
assertThat(node.nodeIds(), is(node2.nodeIds()));
assertThat(node.jobId(), is(node2.jobId()));
assertThat(node.name(), is(node2.name()));
assertThat(node.outputTypes(), is(node2.outputTypes()));
assertThat(node.joinType(), is(node2.joinType()));
assertThat(node.joinCondition(), is(node2.joinCondition()));
assertThat(node.estimatedRowsSizeLeft, is(32L));
assertThat(node.estimatedNumberOfRowsLeft, is(100_000L));
assertThat(node.blockNestedLoop, is(true));
}
Aggregations