Search in sources :

Example 1 with EquiJoinClause

use of io.prestosql.spi.plan.JoinNode.EquiJoinClause in project hetu-core by openlookeng.

the class TestReorderJoins method testReplicatesWhenNotRestricted.

@Test
public void testReplicatesWhenNotRestricted() {
    // variable width so that average row size is respected
    Type symbolType = createUnboundedVarcharType();
    int aRows = 10_000;
    int bRows = 10;
    PlanNodeStatsEstimate probeSideStatsEstimate = PlanNodeStatsEstimate.builder().setOutputRowCount(aRows).addSymbolStatistics(ImmutableMap.of(new Symbol("A1"), new SymbolStatsEstimate(0, 100, 0, 640000, 10))).build();
    PlanNodeStatsEstimate buildSideStatsEstimate = PlanNodeStatsEstimate.builder().setOutputRowCount(bRows).addSymbolStatistics(ImmutableMap.of(new Symbol("B1"), new SymbolStatsEstimate(0, 100, 0, 640000, 10))).build();
    // B table is small enough to be replicated in AUTOMATIC_RESTRICTED mode
    assertReorderJoins().setSystemProperty(JOIN_DISTRIBUTION_TYPE, AUTOMATIC.name()).setSystemProperty(JOIN_MAX_BROADCAST_TABLE_SIZE, "100MB").on(p -> {
        Symbol a1 = p.symbol("A1", symbolType);
        Symbol b1 = p.symbol("B1", symbolType);
        return p.join(INNER, p.values(new PlanNodeId("valuesA"), aRows, a1), p.values(new PlanNodeId("valuesB"), bRows, b1), ImmutableList.of(new EquiJoinClause(a1, b1)), ImmutableList.of(a1, b1), Optional.empty());
    }).overrideStats("valuesA", probeSideStatsEstimate).overrideStats("valuesB", buildSideStatsEstimate).matches(join(INNER, ImmutableList.of(equiJoinClause("A1", "B1")), Optional.empty(), Optional.of(REPLICATED), values(ImmutableMap.of("A1", 0)), values(ImmutableMap.of("B1", 0))));
    probeSideStatsEstimate = PlanNodeStatsEstimate.builder().setOutputRowCount(aRows).addSymbolStatistics(ImmutableMap.of(new Symbol("A1"), new SymbolStatsEstimate(0, 100, 0, 640000d * 10000, 10))).build();
    buildSideStatsEstimate = PlanNodeStatsEstimate.builder().setOutputRowCount(bRows).addSymbolStatistics(ImmutableMap.of(new Symbol("B1"), new SymbolStatsEstimate(0, 100, 0, 640000d * 10000, 10))).build();
    // B table exceeds AUTOMATIC_RESTRICTED limit therefore it is partitioned
    assertReorderJoins().setSystemProperty(JOIN_DISTRIBUTION_TYPE, AUTOMATIC.name()).setSystemProperty(JOIN_MAX_BROADCAST_TABLE_SIZE, "100MB").on(p -> {
        Symbol a1 = p.symbol("A1", symbolType);
        Symbol b1 = p.symbol("B1", symbolType);
        return p.join(INNER, p.values(new PlanNodeId("valuesA"), aRows, a1), p.values(new PlanNodeId("valuesB"), bRows, b1), ImmutableList.of(new EquiJoinClause(a1, b1)), ImmutableList.of(a1, b1), Optional.empty());
    }).overrideStats("valuesA", probeSideStatsEstimate).overrideStats("valuesB", buildSideStatsEstimate).matches(join(INNER, ImmutableList.of(equiJoinClause("A1", "B1")), Optional.empty(), Optional.of(PARTITIONED), values(ImmutableMap.of("A1", 0)), values(ImmutableMap.of("B1", 0))));
}
Also used : SymbolStatsEstimate(io.prestosql.cost.SymbolStatsEstimate) REPLICATED(io.prestosql.spi.plan.JoinNode.DistributionType.REPLICATED) EquiJoinClause(io.prestosql.spi.plan.JoinNode.EquiJoinClause) PlanMatchPattern.equiJoinClause(io.prestosql.sql.planner.assertions.PlanMatchPattern.equiJoinClause) QualifiedName(io.prestosql.sql.tree.QualifiedName) BROADCAST(io.prestosql.sql.analyzer.FeaturesConfig.JoinDistributionType.BROADCAST) JoinDistributionType(io.prestosql.sql.analyzer.FeaturesConfig.JoinDistributionType) Test(org.testng.annotations.Test) PARTITIONED(io.prestosql.spi.plan.JoinNode.DistributionType.PARTITIONED) LESS_THAN(io.prestosql.spi.function.OperatorType.LESS_THAN) JOIN_REORDERING_STRATEGY(io.prestosql.SystemSessionProperties.JOIN_REORDERING_STRATEGY) PlanMatchPattern.values(io.prestosql.sql.planner.assertions.PlanMatchPattern.values) ImmutableList(com.google.common.collect.ImmutableList) Expressions.call(io.prestosql.sql.relational.Expressions.call) OperatorType(io.prestosql.spi.function.OperatorType) JoinReorderingStrategy(io.prestosql.sql.analyzer.FeaturesConfig.JoinReorderingStrategy) BOOLEAN(io.prestosql.spi.type.BooleanType.BOOLEAN) Type(io.prestosql.spi.type.Type) RuleAssert(io.prestosql.sql.planner.iterative.rule.test.RuleAssert) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Symbol(io.prestosql.spi.plan.Symbol) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) AfterClass(org.testng.annotations.AfterClass) EQUAL(io.prestosql.spi.function.OperatorType.EQUAL) PlanMatchPattern.join(io.prestosql.sql.planner.assertions.PlanMatchPattern.join) PlanNodeStatsEstimate(io.prestosql.cost.PlanNodeStatsEstimate) ImmutableMap(com.google.common.collect.ImmutableMap) PlanMatchPattern(io.prestosql.sql.planner.assertions.PlanMatchPattern) BeforeClass(org.testng.annotations.BeforeClass) FunctionAndTypeManager.qualifyObjectName(io.prestosql.metadata.FunctionAndTypeManager.qualifyObjectName) CostComparator(io.prestosql.cost.CostComparator) AUTOMATIC(io.prestosql.sql.analyzer.FeaturesConfig.JoinDistributionType.AUTOMATIC) Expressions.variable(io.prestosql.sql.relational.Expressions.variable) List(java.util.List) Closeables.closeAllRuntimeException(io.airlift.testing.Closeables.closeAllRuntimeException) FunctionResolution(io.prestosql.sql.relational.FunctionResolution) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) RowExpression(io.prestosql.spi.relation.RowExpression) INNER(io.prestosql.spi.plan.JoinNode.Type.INNER) Optional(java.util.Optional) JOIN_MAX_BROADCAST_TABLE_SIZE(io.prestosql.SystemSessionProperties.JOIN_MAX_BROADCAST_TABLE_SIZE) RuleTester(io.prestosql.sql.planner.iterative.rule.test.RuleTester) JOIN_DISTRIBUTION_TYPE(io.prestosql.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) JoinDistributionType(io.prestosql.sql.analyzer.FeaturesConfig.JoinDistributionType) OperatorType(io.prestosql.spi.function.OperatorType) Type(io.prestosql.spi.type.Type) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) PlanNodeStatsEstimate(io.prestosql.cost.PlanNodeStatsEstimate) Symbol(io.prestosql.spi.plan.Symbol) EquiJoinClause(io.prestosql.spi.plan.JoinNode.EquiJoinClause) SymbolStatsEstimate(io.prestosql.cost.SymbolStatsEstimate) Test(org.testng.annotations.Test)

Example 2 with EquiJoinClause

use of io.prestosql.spi.plan.JoinNode.EquiJoinClause in project hetu-core by openlookeng.

the class TestReorderJoins method testReplicatesAndFlipsWhenOneTableMuchSmaller.

@Test
public void testReplicatesAndFlipsWhenOneTableMuchSmaller() {
    // variable width so that average row size is respected
    Type symbolType = createUnboundedVarcharType();
    assertReorderJoins().on(p -> {
        Symbol a1 = p.symbol("A1", symbolType);
        Symbol b1 = p.symbol("B1", symbolType);
        return p.join(INNER, p.values(new PlanNodeId("valuesA"), ImmutableList.of(a1), TWO_ROWS), p.values(new PlanNodeId("valuesB"), ImmutableList.of(b1), TWO_ROWS), ImmutableList.of(new EquiJoinClause(a1, b1)), ImmutableList.of(a1, b1), Optional.empty());
    }).overrideStats("valuesA", PlanNodeStatsEstimate.builder().setOutputRowCount(100).addSymbolStatistics(ImmutableMap.of(new Symbol("A1"), new SymbolStatsEstimate(0, 100, 0, 6400, 100))).build()).overrideStats("valuesB", PlanNodeStatsEstimate.builder().setOutputRowCount(10000).addSymbolStatistics(ImmutableMap.of(new Symbol("B1"), new SymbolStatsEstimate(0, 100, 0, 640000, 100))).build()).matches(join(INNER, ImmutableList.of(equiJoinClause("B1", "A1")), Optional.empty(), Optional.of(REPLICATED), values(ImmutableMap.of("B1", 0)), values(ImmutableMap.of("A1", 0))));
}
Also used : SymbolStatsEstimate(io.prestosql.cost.SymbolStatsEstimate) REPLICATED(io.prestosql.spi.plan.JoinNode.DistributionType.REPLICATED) EquiJoinClause(io.prestosql.spi.plan.JoinNode.EquiJoinClause) PlanMatchPattern.equiJoinClause(io.prestosql.sql.planner.assertions.PlanMatchPattern.equiJoinClause) QualifiedName(io.prestosql.sql.tree.QualifiedName) BROADCAST(io.prestosql.sql.analyzer.FeaturesConfig.JoinDistributionType.BROADCAST) JoinDistributionType(io.prestosql.sql.analyzer.FeaturesConfig.JoinDistributionType) Test(org.testng.annotations.Test) PARTITIONED(io.prestosql.spi.plan.JoinNode.DistributionType.PARTITIONED) LESS_THAN(io.prestosql.spi.function.OperatorType.LESS_THAN) JOIN_REORDERING_STRATEGY(io.prestosql.SystemSessionProperties.JOIN_REORDERING_STRATEGY) PlanMatchPattern.values(io.prestosql.sql.planner.assertions.PlanMatchPattern.values) ImmutableList(com.google.common.collect.ImmutableList) Expressions.call(io.prestosql.sql.relational.Expressions.call) OperatorType(io.prestosql.spi.function.OperatorType) JoinReorderingStrategy(io.prestosql.sql.analyzer.FeaturesConfig.JoinReorderingStrategy) BOOLEAN(io.prestosql.spi.type.BooleanType.BOOLEAN) Type(io.prestosql.spi.type.Type) RuleAssert(io.prestosql.sql.planner.iterative.rule.test.RuleAssert) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Symbol(io.prestosql.spi.plan.Symbol) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) AfterClass(org.testng.annotations.AfterClass) EQUAL(io.prestosql.spi.function.OperatorType.EQUAL) PlanMatchPattern.join(io.prestosql.sql.planner.assertions.PlanMatchPattern.join) PlanNodeStatsEstimate(io.prestosql.cost.PlanNodeStatsEstimate) ImmutableMap(com.google.common.collect.ImmutableMap) PlanMatchPattern(io.prestosql.sql.planner.assertions.PlanMatchPattern) BeforeClass(org.testng.annotations.BeforeClass) FunctionAndTypeManager.qualifyObjectName(io.prestosql.metadata.FunctionAndTypeManager.qualifyObjectName) CostComparator(io.prestosql.cost.CostComparator) AUTOMATIC(io.prestosql.sql.analyzer.FeaturesConfig.JoinDistributionType.AUTOMATIC) Expressions.variable(io.prestosql.sql.relational.Expressions.variable) List(java.util.List) Closeables.closeAllRuntimeException(io.airlift.testing.Closeables.closeAllRuntimeException) FunctionResolution(io.prestosql.sql.relational.FunctionResolution) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) RowExpression(io.prestosql.spi.relation.RowExpression) INNER(io.prestosql.spi.plan.JoinNode.Type.INNER) Optional(java.util.Optional) JOIN_MAX_BROADCAST_TABLE_SIZE(io.prestosql.SystemSessionProperties.JOIN_MAX_BROADCAST_TABLE_SIZE) RuleTester(io.prestosql.sql.planner.iterative.rule.test.RuleTester) JOIN_DISTRIBUTION_TYPE(io.prestosql.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) JoinDistributionType(io.prestosql.sql.analyzer.FeaturesConfig.JoinDistributionType) OperatorType(io.prestosql.spi.function.OperatorType) Type(io.prestosql.spi.type.Type) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) Symbol(io.prestosql.spi.plan.Symbol) EquiJoinClause(io.prestosql.spi.plan.JoinNode.EquiJoinClause) SymbolStatsEstimate(io.prestosql.cost.SymbolStatsEstimate) Test(org.testng.annotations.Test)

Example 3 with EquiJoinClause

use of io.prestosql.spi.plan.JoinNode.EquiJoinClause in project hetu-core by openlookeng.

the class JoinStatsRule method filterByEquiJoinClauses.

private PlanNodeStatsEstimate filterByEquiJoinClauses(PlanNodeStatsEstimate stats, EquiJoinClause drivingClause, Collection<EquiJoinClause> remainingClauses, Session session, TypeProvider types) {
    ComparisonExpression drivingPredicate = new ComparisonExpression(EQUAL, toSymbolReference(drivingClause.getLeft()), toSymbolReference(drivingClause.getRight()));
    PlanNodeStatsEstimate filteredStats = filterStatsCalculator.filterStats(stats, drivingPredicate, session, types);
    for (EquiJoinClause clause : remainingClauses) {
        filteredStats = filterByAuxiliaryClause(filteredStats, clause, types);
    }
    return filteredStats;
}
Also used : ComparisonExpression(io.prestosql.sql.tree.ComparisonExpression) EquiJoinClause(io.prestosql.spi.plan.JoinNode.EquiJoinClause)

Example 4 with EquiJoinClause

use of io.prestosql.spi.plan.JoinNode.EquiJoinClause in project hetu-core by openlookeng.

the class JoinStatsRule method filterByEquiJoinClauses.

private PlanNodeStatsEstimate filterByEquiJoinClauses(PlanNodeStatsEstimate stats, Collection<EquiJoinClause> clauses, Session session, TypeProvider types) {
    checkArgument(!clauses.isEmpty(), "clauses is empty");
    PlanNodeStatsEstimate result = PlanNodeStatsEstimate.unknown();
    // Join equality clauses are usually correlated. Therefore we shouldn't treat each join equality
    // clause separately because stats estimates would be way off. Instead we choose so called
    // "driving clause" which mostly reduces join output rows cardinality and apply UNKNOWN_FILTER_COEFFICIENT
    // for other (auxiliary) clauses.
    Queue<EquiJoinClause> remainingClauses = new LinkedList<>(clauses);
    EquiJoinClause drivingClause = remainingClauses.poll();
    for (int i = 0; i < clauses.size(); i++) {
        PlanNodeStatsEstimate estimate = filterByEquiJoinClauses(stats, drivingClause, remainingClauses, session, types);
        if (result.isOutputRowCountUnknown() || (!estimate.isOutputRowCountUnknown() && estimate.getOutputRowCount() < result.getOutputRowCount())) {
            result = estimate;
        }
        remainingClauses.add(drivingClause);
        drivingClause = remainingClauses.poll();
    }
    return result;
}
Also used : EquiJoinClause(io.prestosql.spi.plan.JoinNode.EquiJoinClause) LinkedList(java.util.LinkedList)

Example 5 with EquiJoinClause

use of io.prestosql.spi.plan.JoinNode.EquiJoinClause in project hetu-core by openlookeng.

the class TestJoinStatsRule method testStatsForInnerJoinWithTwoEquiClausesAndNonEqualityFunction.

@Test
public void testStatsForInnerJoinWithTwoEquiClausesAndNonEqualityFunction() {
    double innerJoinRowCount = // driver join clause
    LEFT_ROWS_COUNT * RIGHT_ROWS_COUNT / LEFT_JOIN_COLUMN_2_NDV * LEFT_JOIN_COLUMN_2_NON_NULLS * RIGHT_JOIN_COLUMN_2_NON_NULLS * // auxiliary join clause
    UNKNOWN_FILTER_COEFFICIENT * // LEFT_JOIN_COLUMN < 10 non equality filter
    0.3333333333;
    PlanNodeStatsEstimate innerJoinStats = planNodeStats(innerJoinRowCount, symbolStatistics(LEFT_JOIN_COLUMN, 5.0, 10.0, 0.0, RIGHT_JOIN_COLUMN_NDV * 0.3333333333), symbolStatistics(RIGHT_JOIN_COLUMN, 5.0, 20.0, 0.0, RIGHT_JOIN_COLUMN_NDV), symbolStatistics(LEFT_JOIN_COLUMN_2, 100.0, 200.0, 0.0, RIGHT_JOIN_COLUMN_2_NDV), symbolStatistics(RIGHT_JOIN_COLUMN_2, 100.0, 200.0, 0.0, RIGHT_JOIN_COLUMN_2_NDV));
    tester().assertStatsFor(pb -> {
        Symbol leftJoinColumnSymbol = pb.symbol(LEFT_JOIN_COLUMN, BIGINT);
        Symbol rightJoinColumnSymbol = pb.symbol(RIGHT_JOIN_COLUMN, DOUBLE);
        Symbol leftJoinColumnSymbol2 = pb.symbol(LEFT_JOIN_COLUMN_2, BIGINT);
        Symbol rightJoinColumnSymbol2 = pb.symbol(RIGHT_JOIN_COLUMN_2, DOUBLE);
        ComparisonExpression leftJoinColumnLessThanTen = new ComparisonExpression(ComparisonExpression.Operator.LESS_THAN, toSymbolReference(leftJoinColumnSymbol), new LongLiteral("10"));
        return pb.join(INNER, pb.values(leftJoinColumnSymbol, leftJoinColumnSymbol2), pb.values(rightJoinColumnSymbol, rightJoinColumnSymbol2), ImmutableList.of(new EquiJoinClause(leftJoinColumnSymbol2, rightJoinColumnSymbol2), new EquiJoinClause(leftJoinColumnSymbol, rightJoinColumnSymbol)), ImmutableList.of(leftJoinColumnSymbol, leftJoinColumnSymbol2, rightJoinColumnSymbol, rightJoinColumnSymbol2), Optional.of(castToRowExpression(leftJoinColumnLessThanTen)));
    }).withSourceStats(0, planNodeStats(LEFT_ROWS_COUNT, LEFT_JOIN_COLUMN_STATS, LEFT_JOIN_COLUMN_2_STATS)).withSourceStats(1, planNodeStats(RIGHT_ROWS_COUNT, RIGHT_JOIN_COLUMN_STATS, RIGHT_JOIN_COLUMN_2_STATS)).check(stats -> stats.equalTo(innerJoinStats));
}
Also used : EquiJoinClause(io.prestosql.spi.plan.JoinNode.EquiJoinClause) TypeProvider(io.prestosql.sql.planner.TypeProvider) Assert.assertEquals(org.testng.Assert.assertEquals) Test(org.testng.annotations.Test) PlanNodeStatsAssertion.assertThat(io.prestosql.cost.PlanNodeStatsAssertion.assertThat) ImmutableList(com.google.common.collect.ImmutableList) NaN(java.lang.Double.NaN) OriginalExpressionUtils.castToRowExpression(io.prestosql.sql.relational.OriginalExpressionUtils.castToRowExpression) DOUBLE(io.prestosql.spi.type.DoubleType.DOUBLE) Type(io.prestosql.spi.type.Type) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) JoinNode(io.prestosql.spi.plan.JoinNode) Symbol(io.prestosql.spi.plan.Symbol) UNKNOWN_FILTER_COEFFICIENT(io.prestosql.cost.FilterStatsCalculator.UNKNOWN_FILTER_COEFFICIENT) ImmutableMap(com.google.common.collect.ImmutableMap) FULL(io.prestosql.spi.plan.JoinNode.Type.FULL) MetadataManager.createTestMetadataManager(io.prestosql.metadata.MetadataManager.createTestMetadataManager) ComparisonExpression(io.prestosql.sql.tree.ComparisonExpression) Metadata(io.prestosql.metadata.Metadata) SymbolUtils.toSymbolReference(io.prestosql.sql.planner.SymbolUtils.toSymbolReference) RIGHT(io.prestosql.spi.plan.JoinNode.Type.RIGHT) LongLiteral(io.prestosql.sql.tree.LongLiteral) INNER(io.prestosql.spi.plan.JoinNode.Type.INNER) Optional(java.util.Optional) LEFT(io.prestosql.spi.plan.JoinNode.Type.LEFT) ComparisonExpression(io.prestosql.sql.tree.ComparisonExpression) LongLiteral(io.prestosql.sql.tree.LongLiteral) Symbol(io.prestosql.spi.plan.Symbol) EquiJoinClause(io.prestosql.spi.plan.JoinNode.EquiJoinClause) Test(org.testng.annotations.Test)

Aggregations

EquiJoinClause (io.prestosql.spi.plan.JoinNode.EquiJoinClause)12 Symbol (io.prestosql.spi.plan.Symbol)10 Test (org.testng.annotations.Test)8 Optional (java.util.Optional)6 ImmutableList (com.google.common.collect.ImmutableList)5 ImmutableMap (com.google.common.collect.ImmutableMap)5 INNER (io.prestosql.spi.plan.JoinNode.Type.INNER)5 RowExpression (io.prestosql.spi.relation.RowExpression)5 Type (io.prestosql.spi.type.Type)5 SymbolStatsEstimate (io.prestosql.cost.SymbolStatsEstimate)4 OperatorType (io.prestosql.spi.function.OperatorType)4 PlanNodeId (io.prestosql.spi.plan.PlanNodeId)4 VarcharType.createUnboundedVarcharType (io.prestosql.spi.type.VarcharType.createUnboundedVarcharType)4 JoinDistributionType (io.prestosql.sql.analyzer.FeaturesConfig.JoinDistributionType)4 FunctionResolution (io.prestosql.sql.relational.FunctionResolution)4 Closeables.closeAllRuntimeException (io.airlift.testing.Closeables.closeAllRuntimeException)3 JOIN_DISTRIBUTION_TYPE (io.prestosql.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE)3 JOIN_MAX_BROADCAST_TABLE_SIZE (io.prestosql.SystemSessionProperties.JOIN_MAX_BROADCAST_TABLE_SIZE)3 JOIN_REORDERING_STRATEGY (io.prestosql.SystemSessionProperties.JOIN_REORDERING_STRATEGY)3 CostComparator (io.prestosql.cost.CostComparator)3