use of io.trino.cost.PlanNodeStatsEstimate in project trino by trinodb.
the class DetermineSemiJoinDistributionType method canReplicate.
private boolean canReplicate(SemiJoinNode node, Context context) {
DataSize joinMaxBroadcastTableSize = getJoinMaxBroadcastTableSize(context.getSession());
PlanNode buildSide = node.getFilteringSource();
PlanNodeStatsEstimate buildSideStatsEstimate = context.getStatsProvider().getStats(buildSide);
double buildSideSizeInBytes = buildSideStatsEstimate.getOutputSizeInBytes(buildSide.getOutputSymbols(), context.getSymbolAllocator().getTypes());
return buildSideSizeInBytes <= joinMaxBroadcastTableSize.toBytes() || getSourceTablesSizeInBytes(buildSide, context) <= joinMaxBroadcastTableSize.toBytes();
}
use of io.trino.cost.PlanNodeStatsEstimate in project trino by trinodb.
the class PushJoinIntoTableScan method getJoinStatistics.
private JoinStatistics getJoinStatistics(JoinNode join, TableScanNode left, TableScanNode right, Context context) {
return new JoinStatistics() {
@Override
public Optional<BasicRelationStatistics> getLeftStatistics() {
return getBasicRelationStats(left, left.getOutputSymbols(), context);
}
@Override
public Optional<BasicRelationStatistics> getRightStatistics() {
return getBasicRelationStats(right, right.getOutputSymbols(), context);
}
@Override
public Optional<BasicRelationStatistics> getJoinStatistics() {
return getBasicRelationStats(join, join.getOutputSymbols(), context);
}
private Optional<BasicRelationStatistics> getBasicRelationStats(PlanNode node, List<Symbol> outputSymbols, Context context) {
PlanNodeStatsEstimate stats = context.getStatsProvider().getStats(node);
TypeProvider types = context.getSymbolAllocator().getTypes();
double outputRowCount = stats.getOutputRowCount();
double outputSize = stats.getOutputSizeInBytes(outputSymbols, types);
if (isNaN(outputRowCount) || isNaN(outputSize)) {
return Optional.empty();
}
return Optional.of(new BasicRelationStatistics((long) outputRowCount, (long) outputSize));
}
};
}
use of io.trino.cost.PlanNodeStatsEstimate in project trino by trinodb.
the class TestReorderJoins method testReplicatesWhenNotRestricted.
@Test
public void testReplicatesWhenNotRestricted() {
// variable width so that average row size is respected
Type symbolType = createUnboundedVarcharType();
int aRows = 10_000;
int bRows = 10;
PlanNodeStatsEstimate probeSideStatsEstimate = PlanNodeStatsEstimate.builder().setOutputRowCount(aRows).addSymbolStatistics(ImmutableMap.of(new Symbol("A1"), new SymbolStatsEstimate(0, 100, 0, 640000, 10))).build();
PlanNodeStatsEstimate buildSideStatsEstimate = PlanNodeStatsEstimate.builder().setOutputRowCount(bRows).addSymbolStatistics(ImmutableMap.of(new Symbol("B1"), new SymbolStatsEstimate(0, 100, 0, 640000, 10))).build();
// B table is small enough to be replicated in AUTOMATIC_RESTRICTED mode
assertReorderJoins().setSystemProperty(JOIN_DISTRIBUTION_TYPE, AUTOMATIC.name()).setSystemProperty(JOIN_MAX_BROADCAST_TABLE_SIZE, "100MB").on(p -> {
Symbol a1 = p.symbol("A1", symbolType);
Symbol b1 = p.symbol("B1", symbolType);
return p.join(INNER, p.values(new PlanNodeId("valuesA"), aRows, a1), p.values(new PlanNodeId("valuesB"), bRows, b1), ImmutableList.of(new EquiJoinClause(a1, b1)), ImmutableList.of(a1), ImmutableList.of(b1), Optional.empty());
}).overrideStats("valuesA", probeSideStatsEstimate).overrideStats("valuesB", buildSideStatsEstimate).matches(join(INNER, ImmutableList.of(equiJoinClause("A1", "B1")), Optional.empty(), Optional.of(REPLICATED), values(ImmutableMap.of("A1", 0)), values(ImmutableMap.of("B1", 0))));
probeSideStatsEstimate = PlanNodeStatsEstimate.builder().setOutputRowCount(aRows).addSymbolStatistics(ImmutableMap.of(new Symbol("A1"), new SymbolStatsEstimate(0, 100, 0, 640000d * 10000, 10))).build();
buildSideStatsEstimate = PlanNodeStatsEstimate.builder().setOutputRowCount(bRows).addSymbolStatistics(ImmutableMap.of(new Symbol("B1"), new SymbolStatsEstimate(0, 100, 0, 640000d * 10000, 10))).build();
// B table exceeds AUTOMATIC_RESTRICTED limit therefore it is partitioned
assertReorderJoins().setSystemProperty(JOIN_DISTRIBUTION_TYPE, AUTOMATIC.name()).setSystemProperty(JOIN_MAX_BROADCAST_TABLE_SIZE, "100MB").on(p -> {
Symbol a1 = p.symbol("A1", symbolType);
Symbol b1 = p.symbol("B1", symbolType);
return p.join(INNER, p.values(new PlanNodeId("valuesA"), aRows, a1), p.values(new PlanNodeId("valuesB"), bRows, b1), ImmutableList.of(new EquiJoinClause(a1, b1)), ImmutableList.of(a1), ImmutableList.of(b1), Optional.empty());
}).overrideStats("valuesA", probeSideStatsEstimate).overrideStats("valuesB", buildSideStatsEstimate).matches(join(INNER, ImmutableList.of(equiJoinClause("A1", "B1")), Optional.empty(), Optional.of(PARTITIONED), values(ImmutableMap.of("A1", 0)), values(ImmutableMap.of("B1", 0))));
}
use of io.trino.cost.PlanNodeStatsEstimate in project trino by trinodb.
the class TestDetermineJoinDistributionType method testGetSourceTablesSizeInBytes.
@Test
public void testGetSourceTablesSizeInBytes() {
PlanBuilder planBuilder = new PlanBuilder(new PlanNodeIdAllocator(), tester.getMetadata(), tester.getSession());
Symbol symbol = planBuilder.symbol("col");
Symbol sourceSymbol1 = planBuilder.symbol("source1");
Symbol sourceSymbol2 = planBuilder.symbol("soruce2");
// missing source stats
assertEquals(getSourceTablesSizeInBytes(planBuilder.values(symbol), noLookup(), node -> PlanNodeStatsEstimate.unknown(), planBuilder.getTypes()), NaN);
// two source plan nodes
PlanNodeStatsEstimate sourceStatsEstimate1 = PlanNodeStatsEstimate.builder().setOutputRowCount(10).build();
PlanNodeStatsEstimate sourceStatsEstimate2 = PlanNodeStatsEstimate.builder().setOutputRowCount(20).build();
assertEquals(getSourceTablesSizeInBytes(planBuilder.union(ImmutableListMultimap.<Symbol, Symbol>builder().put(symbol, sourceSymbol1).put(symbol, sourceSymbol2).build(), ImmutableList.of(planBuilder.tableScan(ImmutableList.of(sourceSymbol1), ImmutableMap.of(sourceSymbol1, new TestingColumnHandle("col"))), planBuilder.values(new PlanNodeId("valuesNode"), sourceSymbol2))), noLookup(), node -> {
if (node instanceof TableScanNode) {
return sourceStatsEstimate1;
}
if (node instanceof ValuesNode) {
return sourceStatsEstimate2;
}
return PlanNodeStatsEstimate.unknown();
}, planBuilder.getTypes()), 270.0);
// join node
assertEquals(getSourceTablesSizeInBytes(planBuilder.join(INNER, planBuilder.values(sourceSymbol1), planBuilder.values(sourceSymbol2)), noLookup(), node -> sourceStatsEstimate1, planBuilder.getTypes()), NaN);
}
use of io.trino.cost.PlanNodeStatsEstimate in project trino by trinodb.
the class TestDetermineSemiJoinDistributionType method testReplicatesWhenNotRestricted.
@Test
public void testReplicatesWhenNotRestricted() {
// variable width so that average row size is respected
Type symbolType = createUnboundedVarcharType();
int aRows = 10_000;
int bRows = 10;
PlanNodeStatsEstimate probeSideStatsEstimate = PlanNodeStatsEstimate.builder().setOutputRowCount(aRows).addSymbolStatistics(ImmutableMap.of(new Symbol("A1"), new SymbolStatsEstimate(0, 100, 0, 640000, 10))).build();
PlanNodeStatsEstimate buildSideStatsEstimate = PlanNodeStatsEstimate.builder().setOutputRowCount(bRows).addSymbolStatistics(ImmutableMap.of(new Symbol("B1"), new SymbolStatsEstimate(0, 100, 0, 640000, 10))).build();
// B table is small enough to be replicated in AUTOMATIC_RESTRICTED mode
assertDetermineSemiJoinDistributionType().setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name()).setSystemProperty(JOIN_MAX_BROADCAST_TABLE_SIZE, "100MB").overrideStats("valuesA", probeSideStatsEstimate).overrideStats("valuesB", buildSideStatsEstimate).on(p -> {
Symbol a1 = p.symbol("A1", symbolType);
Symbol b1 = p.symbol("B1", symbolType);
return p.semiJoin(p.values(new PlanNodeId("valuesA"), aRows, a1), p.values(new PlanNodeId("valuesB"), bRows, b1), a1, b1, p.symbol("output"), Optional.empty(), Optional.empty(), Optional.empty());
}).matches(semiJoin("A1", "B1", "output", Optional.of(REPLICATED), values(ImmutableMap.of("A1", 0)), values(ImmutableMap.of("B1", 0))));
probeSideStatsEstimate = PlanNodeStatsEstimate.builder().setOutputRowCount(aRows).addSymbolStatistics(ImmutableMap.of(new Symbol("A1"), new SymbolStatsEstimate(0, 100, 0, 640000d * 10000, 10))).build();
buildSideStatsEstimate = PlanNodeStatsEstimate.builder().setOutputRowCount(bRows).addSymbolStatistics(ImmutableMap.of(new Symbol("B1"), new SymbolStatsEstimate(0, 100, 0, 640000d * 10000, 10))).build();
// B table exceeds AUTOMATIC_RESTRICTED limit therefore it is partitioned
assertDetermineSemiJoinDistributionType().setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name()).setSystemProperty(JOIN_MAX_BROADCAST_TABLE_SIZE, "100MB").overrideStats("valuesA", probeSideStatsEstimate).overrideStats("valuesB", buildSideStatsEstimate).on(p -> {
Symbol a1 = p.symbol("A1", symbolType);
Symbol b1 = p.symbol("B1", symbolType);
return p.semiJoin(p.values(new PlanNodeId("valuesA"), aRows, a1), p.values(new PlanNodeId("valuesB"), bRows, b1), a1, b1, p.symbol("output"), Optional.empty(), Optional.empty(), Optional.empty());
}).matches(semiJoin("A1", "B1", "output", Optional.of(PARTITIONED), values(ImmutableMap.of("A1", 0)), values(ImmutableMap.of("B1", 0))));
}
Aggregations