Search in sources :

Example 61 with Join

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join in project beam by apache.

the class ThreeTablesSchema method testSystemNotReorderingWithoutRules.

@Test
public void testSystemNotReorderingWithoutRules() {
    TestTableProvider tableProvider = new TestTableProvider();
    createThreeTables(tableProvider);
    List<RelOptRule> ruleSet = BeamRuleSets.getRuleSets().stream().flatMap(rules -> StreamSupport.stream(rules.spliterator(), false)).filter(rule -> !(rule instanceof BeamJoinPushThroughJoinRule)).filter(rule -> !(rule instanceof BeamJoinAssociateRule)).filter(rule -> !(rule instanceof JoinCommuteRule)).collect(Collectors.toList());
    BeamSqlEnv env = BeamSqlEnv.builder(tableProvider).setPipelineOptions(PipelineOptionsFactory.create()).setRuleSets(ImmutableList.of(RuleSets.ofList(ruleSet))).build();
    // This is Join(Join(medium, large), small) which should be converted to a join that large table
    // is on the top.
    BeamRelNode parsedQuery = env.parseQuery("select * from medium_table " + " JOIN large_table on large_table.medium_key = medium_table.large_key " + " JOIN small_table on medium_table.small_key = small_table.medium_key ");
    assertTopTableInJoins(parsedQuery, "small_table");
}
Also used : TestTableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.test.TestTableProvider) Linq4j(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.linq4j.Linq4j) RelNode(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelNode) Arrays(java.util.Arrays) AbstractTable(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.schema.impl.AbstractTable) Enumerable(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.linq4j.Enumerable) BeamRelNode(org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode) BeamRuleSets(org.apache.beam.sdk.extensions.sql.impl.planner.BeamRuleSets) Frameworks(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.tools.Frameworks) RelFieldCollation(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelFieldCollation) ImmutableBitSet(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet) Planner(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.tools.Planner) Map(java.util.Map) EnumerableConvention(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.adapter.enumerable.EnumerableConvention) JoinCommuteRule(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.rules.JoinCommuteRule) RelRoot(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelRoot) ImmutableMap(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableMap) TestTableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.test.TestTableProvider) Collectors(java.util.stream.Collectors) Table(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.schema.Table) TableScan(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.TableScan) Statistic(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.schema.Statistic) List(java.util.List) ImmutableList(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableList) RelCollationTraitDef(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelCollationTraitDef) AbstractSchema(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.schema.impl.AbstractSchema) RuleSets(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.tools.RuleSets) RuleSet(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.tools.RuleSet) Programs(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.tools.Programs) RelDataTypeFactory(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.type.RelDataTypeFactory) FrameworkConfig(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.tools.FrameworkConfig) RelCollations(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelCollations) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) Function(java.util.function.Function) ArrayList(java.util.ArrayList) Statistics(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.schema.Statistics) RelOptRule(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.plan.RelOptRule) Join(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join) ScannableTable(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.schema.ScannableTable) StreamSupport(java.util.stream.StreamSupport) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) SqlNode(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.sql.SqlNode) SqlParser(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.sql.parser.SqlParser) EnumerableRules(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.adapter.enumerable.EnumerableRules) Test(org.junit.Test) ConventionTraitDef(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.plan.ConventionTraitDef) RelDataType(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.type.RelDataType) RelTraitSet(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.plan.RelTraitSet) DataContext(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.DataContext) SchemaPlus(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.schema.SchemaPlus) Assert(org.junit.Assert) CoreRules(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.rules.CoreRules) BeamRelNode(org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) JoinCommuteRule(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.rules.JoinCommuteRule) RelOptRule(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.plan.RelOptRule) Test(org.junit.Test)

Example 62 with Join

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join in project beam by apache.

the class ThreeTablesSchema method testBeamJoinPushThroughJoinRuleRight.

@Test
public void testBeamJoinPushThroughJoinRuleRight() throws Exception {
    RuleSet prepareRules = RuleSets.ofList(CoreRules.SORT_PROJECT_TRANSPOSE, EnumerableRules.ENUMERABLE_JOIN_RULE, EnumerableRules.ENUMERABLE_PROJECT_RULE, EnumerableRules.ENUMERABLE_SORT_RULE, EnumerableRules.ENUMERABLE_TABLE_SCAN_RULE);
    String sqlQuery = "select * from \"tt\".\"medium_table\" as medium_table " + " JOIN \"tt\".\"large_table\" as large_table on large_table.\"medium_key\" = medium_table.\"large_key\" " + " JOIN \"tt\".\"small_table\" as small_table on medium_table.\"small_key\" = small_table.\"medium_key\" ";
    RelNode originalPlan = transform(sqlQuery, prepareRules);
    RelNode optimizedPlan = transform(sqlQuery, RuleSets.ofList(ImmutableList.<RelOptRule>builder().addAll(prepareRules).add(BeamJoinPushThroughJoinRule.RIGHT).build()));
    assertTopTableInJoins(originalPlan, "small_table");
    assertTopTableInJoins(optimizedPlan, "large_table");
}
Also used : RuleSet(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.tools.RuleSet) RelNode(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelNode) BeamRelNode(org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode) Test(org.junit.Test)

Example 63 with Join

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join in project beam by apache.

the class ThreeTablesSchema method assertTopTableInJoins.

private void assertTopTableInJoins(RelNode parsedQuery, String expectedTableName) {
    RelNode firstJoin = parsedQuery;
    while (!(firstJoin instanceof Join)) {
        firstJoin = firstJoin.getInput(0);
    }
    RelNode topRight = ((Join) firstJoin).getRight();
    while (!(topRight instanceof Join) && !(topRight instanceof TableScan)) {
        topRight = topRight.getInput(0);
    }
    if (topRight instanceof TableScan) {
        Assert.assertTrue(topRight.getDescription().contains(expectedTableName));
    } else {
        RelNode topLeft = ((Join) firstJoin).getLeft();
        while (!(topLeft instanceof TableScan)) {
            topLeft = topLeft.getInput(0);
        }
        Assert.assertTrue(topLeft.getDescription().contains(expectedTableName));
    }
}
Also used : TableScan(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.TableScan) RelNode(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelNode) BeamRelNode(org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode) Join(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join)

Example 64 with Join

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join in project beam by apache.

the class BeamCoGBKJoinRelUnboundedVsUnboundedTest method testNodeStatsEstimation.

@Test
public void testNodeStatsEstimation() {
    String sql = "SELECT * FROM " + "(select order_id, sum(site_id) as sum_site_id FROM ORDER_DETAILS " + "          GROUP BY order_id, TUMBLE(order_time, INTERVAL '1' HOUR)) o1 " + " JOIN " + "(select order_id, sum(site_id) as sum_site_id FROM ORDER_DETAILS " + "          GROUP BY order_id, TUMBLE(order_time, INTERVAL '1' HOUR)) o2 " + " on " + " o1.order_id=o2.order_id";
    RelNode root = env.parseQuery(sql);
    while (!(root instanceof BeamCoGBKJoinRel)) {
        root = root.getInput(0);
    }
    NodeStats estimate = BeamSqlRelUtils.getNodeStats(root, (BeamRelMetadataQuery) root.getCluster().getMetadataQuery());
    NodeStats leftEstimate = BeamSqlRelUtils.getNodeStats(((BeamCoGBKJoinRel) root).getLeft(), (BeamRelMetadataQuery) root.getCluster().getMetadataQuery());
    NodeStats rightEstimate = BeamSqlRelUtils.getNodeStats(((BeamCoGBKJoinRel) root).getRight(), (BeamRelMetadataQuery) root.getCluster().getMetadataQuery());
    Assert.assertFalse(estimate.isUnknown());
    Assert.assertEquals(0d, estimate.getRowCount(), 0.01);
    Assert.assertNotEquals(0d, estimate.getRate(), 0.001);
    Assert.assertTrue(estimate.getRate() < leftEstimate.getRate() * rightEstimate.getWindow() + rightEstimate.getRate() * leftEstimate.getWindow());
    Assert.assertNotEquals(0d, estimate.getWindow(), 0.001);
    Assert.assertTrue(estimate.getWindow() < leftEstimate.getWindow() * rightEstimate.getWindow());
}
Also used : NodeStats(org.apache.beam.sdk.extensions.sql.impl.planner.NodeStats) RelNode(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelNode) Test(org.junit.Test)

Example 65 with Join

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join in project beam by apache.

the class BeamSideInputJoinRel method sideInputJoin.

public PCollection<Row> sideInputJoin(PCollection<Row> leftRows, PCollection<Row> rightRows, FieldAccessDescriptor leftKeyFields, FieldAccessDescriptor rightKeyFields) {
    // we always make the Unbounded table on the left to do the sideInput join
    // (will convert the result accordingly before return)
    boolean swapped = (leftRows.isBounded() == PCollection.IsBounded.BOUNDED);
    JoinRelType realJoinType = joinType;
    if (swapped && joinType != JoinRelType.INNER) {
        Preconditions.checkArgument(realJoinType != JoinRelType.LEFT);
        realJoinType = JoinRelType.LEFT;
    }
    PCollection<Row> realLeftRows = swapped ? rightRows : leftRows;
    PCollection<Row> realRightRows = swapped ? leftRows : rightRows;
    FieldAccessDescriptor realLeftKeyFields = swapped ? rightKeyFields : leftKeyFields;
    FieldAccessDescriptor realRightKeyFields = swapped ? leftKeyFields : rightKeyFields;
    PCollection<Row> joined;
    switch(realJoinType) {
        case INNER:
            joined = realLeftRows.apply(org.apache.beam.sdk.schemas.transforms.Join.<Row, Row>innerBroadcastJoin(realRightRows).on(FieldsEqual.left(realLeftKeyFields).right(realRightKeyFields)));
            break;
        case LEFT:
            joined = realLeftRows.apply(org.apache.beam.sdk.schemas.transforms.Join.<Row, Row>leftOuterBroadcastJoin(realRightRows).on(FieldsEqual.left(realLeftKeyFields).right(realRightKeyFields)));
            break;
        default:
            throw new RuntimeException("Unexpected join type " + realJoinType);
    }
    Schema schema = CalciteUtils.toSchema(getRowType());
    String lhsSelect = org.apache.beam.sdk.schemas.transforms.Join.LHS_TAG + ".*";
    String rhsSelect = org.apache.beam.sdk.schemas.transforms.Join.RHS_TAG + ".*";
    PCollection<Row> selected = !swapped ? joined.apply(Select.<Row>fieldNames(lhsSelect, rhsSelect).withOutputSchema(schema)) : joined.apply(Select.<Row>fieldNames(rhsSelect, lhsSelect).withOutputSchema(schema));
    return selected;
}
Also used : JoinRelType(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.JoinRelType) FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) Schema(org.apache.beam.sdk.schemas.Schema) Row(org.apache.beam.sdk.values.Row)

Aggregations

Join (org.apache.calcite.rel.core.Join)73 RelNode (org.apache.calcite.rel.RelNode)45 RexNode (org.apache.calcite.rex.RexNode)40 ArrayList (java.util.ArrayList)31 LogicalJoin (org.apache.calcite.rel.logical.LogicalJoin)25 Project (org.apache.calcite.rel.core.Project)22 RexBuilder (org.apache.calcite.rex.RexBuilder)20 ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)18 RelBuilder (org.apache.calcite.tools.RelBuilder)17 HiveJoin (org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin)14 Aggregate (org.apache.calcite.rel.core.Aggregate)13 Test (org.junit.Test)13 Filter (org.apache.calcite.rel.core.Filter)12 RelNode (org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelNode)11 SemiJoin (org.apache.calcite.rel.core.SemiJoin)11 RelOptCluster (org.apache.calcite.plan.RelOptCluster)10 JoinRelType (org.apache.calcite.rel.core.JoinRelType)9 RelMetadataQuery (org.apache.calcite.rel.metadata.RelMetadataQuery)9 Mappings (org.apache.calcite.util.mapping.Mappings)9 List (java.util.List)8