Search in sources :

Example 6 with SamzaSqlDslConverterFactory

use of org.apache.samza.sql.dsl.SamzaSqlDslConverterFactory in project samza by apache.

the class TestQueryPlanner method testRemoteJoinNoFilterPushDownWithUdfInFilterAndOptimizer.

@Test
public void testRemoteJoinNoFilterPushDownWithUdfInFilterAndOptimizer() throws SamzaSqlValidatorException {
    Map<String, String> staticConfigs = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(1);
    String sql = "Insert into testavro.enrichedPageViewTopic " + "select pv.pageKey as __key__, pv.pageKey as pageKey, coalesce(null, 'N/A') as companyName," + "       p.name as profileName, p.address as profileAddress " + "from testRemoteStore.Profile.`$table` as p " + "join testavro.PAGEVIEW as pv " + " on p.__key__ = pv.profileId" + " where p.name = pv.pageKey AND p.name = 'Mike' AND pv.profileId = MyTestPoly(p.name)";
    staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_STMT, sql);
    staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_ENABLE_PLAN_OPTIMIZER, Boolean.toString(true));
    Config samzaConfig = new MapConfig(staticConfigs);
    DslConverter dslConverter = new SamzaSqlDslConverterFactory().create(samzaConfig);
    Collection<RelRoot> relRootsWithOptimization = dslConverter.convertDsl(sql);
    staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_ENABLE_PLAN_OPTIMIZER, Boolean.toString(false));
    samzaConfig = new MapConfig(staticConfigs);
    dslConverter = new SamzaSqlDslConverterFactory().create(samzaConfig);
    Collection<RelRoot> relRootsWithoutOptimization = dslConverter.convertDsl(sql);
    /*
      LogicalProject(__key__=[$9], pageKey=[$9], companyName=['N/A'], profileName=[$2], profileAddress=[$4])
        LogicalFilter(condition=[AND(=($2, $9), =($2, 'Mike'), =($10, CAST(MyTestPoly($10)):INTEGER))])
          LogicalJoin(condition=[=($0, $10)], joinType=[inner])
            LogicalTableScan(table=[[testRemoteStore, Profile, $table]])
            LogicalTableScan(table=[[testavro, PAGEVIEW]])
     */
    // None of the conditions in the filter could be pushed down as they all require a remote call. Hence the plans
    // with and without optimization should be the same.
    assertEquals(RelOptUtil.toString(relRootsWithOptimization.iterator().next().rel, SqlExplainLevel.EXPPLAN_ATTRIBUTES), RelOptUtil.toString(relRootsWithoutOptimization.iterator().next().rel, SqlExplainLevel.EXPPLAN_ATTRIBUTES));
}
Also used : DslConverter(org.apache.samza.sql.interfaces.DslConverter) SamzaSqlTestConfig(org.apache.samza.sql.util.SamzaSqlTestConfig) SamzaSqlApplicationConfig(org.apache.samza.sql.runner.SamzaSqlApplicationConfig) Config(org.apache.samza.config.Config) MapConfig(org.apache.samza.config.MapConfig) SamzaSqlDslConverterFactory(org.apache.samza.sql.dsl.SamzaSqlDslConverterFactory) RelRoot(org.apache.calcite.rel.RelRoot) MapConfig(org.apache.samza.config.MapConfig) Test(org.junit.Test)

Example 7 with SamzaSqlDslConverterFactory

use of org.apache.samza.sql.dsl.SamzaSqlDslConverterFactory in project samza by apache.

the class TestQueryPlanner method testRemoteJoinWithFilterHelper.

void testRemoteJoinWithFilterHelper(boolean enableOptimizer) throws SamzaSqlValidatorException {
    Map<String, String> staticConfigs = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(1);
    String sql = "Insert into testavro.enrichedPageViewTopic " + "select pv.pageKey as __key__, pv.pageKey as pageKey, coalesce(null, 'N/A') as companyName," + "       p.name as profileName, p.address as profileAddress " + "from testavro.PAGEVIEW as pv " + "join testRemoteStore.Profile.`$table` as p " + " on p.__key__ = pv.profileId" + " where p.name = pv.pageKey AND p.name = 'Mike' AND pv.profileId = 1";
    staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_STMT, sql);
    staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_ENABLE_PLAN_OPTIMIZER, Boolean.toString(enableOptimizer));
    Config samzaConfig = new MapConfig(staticConfigs);
    DslConverter dslConverter = new SamzaSqlDslConverterFactory().create(samzaConfig);
    Collection<RelRoot> relRoots = dslConverter.convertDsl(sql);
    /*
      Query plan without optimization:
      LogicalProject(__key__=[$1], pageKey=[$1], companyName=['N/A'], profileName=[$5], profileAddress=[$7])
        LogicalFilter(condition=[AND(=($5, $1), =($5, 'Mike'), =($2, 1))])
          LogicalJoin(condition=[=($3, $2)], joinType=[inner])
            LogicalTableScan(table=[[testavro, PAGEVIEW]])
            LogicalTableScan(table=[[testRemoteStore, Profile, $table]])

      Query plan with optimization:
      LogicalProject(__key__=[$1], pageKey=[$1], companyName=['N/A'], profileName=[$5], profileAddress=[$7])
        LogicalFilter(condition=[AND(=($5, $1), =($5, 'Mike'))])
          LogicalJoin(condition=[=($3, $2)], joinType=[inner])
            LogicalFilter(condition=[=($2, 1)])
              LogicalTableScan(table=[[testavro, PAGEVIEW]])
            LogicalTableScan(table=[[testRemoteStore, Profile, $table]])
     */
    assertEquals(1, relRoots.size());
    RelRoot relRoot = relRoots.iterator().next();
    RelNode relNode = relRoot.rel;
    assertTrue(relNode instanceof LogicalProject);
    relNode = relNode.getInput(0);
    assertTrue(relNode instanceof LogicalFilter);
    if (enableOptimizer) {
        assertEquals("AND(=($1, $5), =($5, 'Mike'))", ((LogicalFilter) relNode).getCondition().toString());
    } else {
        assertEquals("AND(=(1, $2), =($1, $5), =($5, 'Mike'))", ((LogicalFilter) relNode).getCondition().toString());
    }
    relNode = relNode.getInput(0);
    assertTrue(relNode instanceof LogicalJoin);
    assertEquals(2, relNode.getInputs().size());
    LogicalJoin join = (LogicalJoin) relNode;
    RelNode left = join.getLeft();
    RelNode right = join.getRight();
    assertTrue(right instanceof LogicalTableScan);
    if (enableOptimizer) {
        assertTrue(left instanceof LogicalFilter);
        assertEquals("=(1, $2)", ((LogicalFilter) left).getCondition().toString());
        assertTrue(left.getInput(0) instanceof LogicalTableScan);
    } else {
        assertTrue(left instanceof LogicalTableScan);
    }
}
Also used : DslConverter(org.apache.samza.sql.interfaces.DslConverter) SamzaSqlTestConfig(org.apache.samza.sql.util.SamzaSqlTestConfig) SamzaSqlApplicationConfig(org.apache.samza.sql.runner.SamzaSqlApplicationConfig) Config(org.apache.samza.config.Config) MapConfig(org.apache.samza.config.MapConfig) LogicalFilter(org.apache.calcite.rel.logical.LogicalFilter) SamzaSqlDslConverterFactory(org.apache.samza.sql.dsl.SamzaSqlDslConverterFactory) RelRoot(org.apache.calcite.rel.RelRoot) LogicalTableScan(org.apache.calcite.rel.logical.LogicalTableScan) RelNode(org.apache.calcite.rel.RelNode) LogicalJoin(org.apache.calcite.rel.logical.LogicalJoin) MapConfig(org.apache.samza.config.MapConfig) LogicalProject(org.apache.calcite.rel.logical.LogicalProject)

Example 8 with SamzaSqlDslConverterFactory

use of org.apache.samza.sql.dsl.SamzaSqlDslConverterFactory in project samza by apache.

the class SamzaSqlApplicationConfig method populateSystemStreamsAndGetRelRoots.

public static Collection<RelRoot> populateSystemStreamsAndGetRelRoots(List<String> dslStmts, Config config, List<String> inputSystemStreams, List<String> outputSystemStreams) {
    // TODO: Get the converter factory based on the file type. Create abstraction around this.
    DslConverterFactory dslConverterFactory = new SamzaSqlDslConverterFactory();
    DslConverter dslConverter = dslConverterFactory.create(config);
    Collection<RelRoot> relRoots = dslConverter.convertDsl(String.join("\n", dslStmts));
    // RelRoot does not have sink node for Samza SQL dsl, so we can not traverse the relRoot tree to get
    // "outputSystemStreams"
    // FIXME: the snippet below does not work for Samza SQL dsl but is required for other dsls. Future fix could be
    // for samza sql to build TableModify for sink and stick it to the relRoot, so we could get output stream out of it.
    // for (RelRoot relRoot : relRoots) {
    // SamzaSqlApplicationConfig.populateSystemStreams(relRoot.project(), inputSystemStreams, outputSystemStreams);
    // }
    // The below code is specific to Samza SQL dsl and should be removed once Samza SQL includes sink as part of
    // relRoot and the above code in uncommented.
    List<String> sqlStmts = SamzaSqlDslConverter.fetchSqlFromConfig(config);
    List<SamzaSqlQueryParser.QueryInfo> queryInfo = SamzaSqlDslConverter.fetchQueryInfo(sqlStmts);
    inputSystemStreams.addAll(queryInfo.stream().map(SamzaSqlQueryParser.QueryInfo::getSources).flatMap(Collection::stream).collect(Collectors.toList()));
    outputSystemStreams.addAll(queryInfo.stream().map(SamzaSqlQueryParser.QueryInfo::getSink).collect(Collectors.toList()));
    return relRoots;
}
Also used : SamzaSqlDslConverter(org.apache.samza.sql.dsl.SamzaSqlDslConverter) DslConverter(org.apache.samza.sql.interfaces.DslConverter) SamzaSqlDslConverterFactory(org.apache.samza.sql.dsl.SamzaSqlDslConverterFactory) DslConverterFactory(org.apache.samza.sql.interfaces.DslConverterFactory) Collection(java.util.Collection) SamzaSqlDslConverterFactory(org.apache.samza.sql.dsl.SamzaSqlDslConverterFactory) RelRoot(org.apache.calcite.rel.RelRoot) SamzaSqlQueryParser(org.apache.samza.sql.util.SamzaSqlQueryParser)

Aggregations

RelRoot (org.apache.calcite.rel.RelRoot)8 SamzaSqlDslConverterFactory (org.apache.samza.sql.dsl.SamzaSqlDslConverterFactory)8 DslConverter (org.apache.samza.sql.interfaces.DslConverter)7 Config (org.apache.samza.config.Config)6 MapConfig (org.apache.samza.config.MapConfig)6 SamzaSqlApplicationConfig (org.apache.samza.sql.runner.SamzaSqlApplicationConfig)6 SamzaSqlTestConfig (org.apache.samza.sql.util.SamzaSqlTestConfig)6 Test (org.junit.Test)4 RelNode (org.apache.calcite.rel.RelNode)3 LogicalFilter (org.apache.calcite.rel.logical.LogicalFilter)3 LogicalJoin (org.apache.calcite.rel.logical.LogicalJoin)3 LogicalProject (org.apache.calcite.rel.logical.LogicalProject)3 LogicalTableScan (org.apache.calcite.rel.logical.LogicalTableScan)3 SamzaSqlDslConverter (org.apache.samza.sql.dsl.SamzaSqlDslConverter)2 Collection (java.util.Collection)1 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)1 DslConverterFactory (org.apache.samza.sql.interfaces.DslConverterFactory)1 SqlSchema (org.apache.samza.sql.schema.SqlSchema)1 SamzaSqlQueryParser (org.apache.samza.sql.util.SamzaSqlQueryParser)1