use of org.apache.samza.sql.dsl.SamzaSqlDslConverterFactory in project samza by apache.
the class TestQueryPlanner method testRemoteJoinNoFilterPushDownWithUdfInFilterAndOptimizer.
@Test
public void testRemoteJoinNoFilterPushDownWithUdfInFilterAndOptimizer() throws SamzaSqlValidatorException {
Map<String, String> staticConfigs = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(1);
String sql = "Insert into testavro.enrichedPageViewTopic " + "select pv.pageKey as __key__, pv.pageKey as pageKey, coalesce(null, 'N/A') as companyName," + " p.name as profileName, p.address as profileAddress " + "from testRemoteStore.Profile.`$table` as p " + "join testavro.PAGEVIEW as pv " + " on p.__key__ = pv.profileId" + " where p.name = pv.pageKey AND p.name = 'Mike' AND pv.profileId = MyTestPoly(p.name)";
staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_STMT, sql);
staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_ENABLE_PLAN_OPTIMIZER, Boolean.toString(true));
Config samzaConfig = new MapConfig(staticConfigs);
DslConverter dslConverter = new SamzaSqlDslConverterFactory().create(samzaConfig);
Collection<RelRoot> relRootsWithOptimization = dslConverter.convertDsl(sql);
staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_ENABLE_PLAN_OPTIMIZER, Boolean.toString(false));
samzaConfig = new MapConfig(staticConfigs);
dslConverter = new SamzaSqlDslConverterFactory().create(samzaConfig);
Collection<RelRoot> relRootsWithoutOptimization = dslConverter.convertDsl(sql);
/*
LogicalProject(__key__=[$9], pageKey=[$9], companyName=['N/A'], profileName=[$2], profileAddress=[$4])
LogicalFilter(condition=[AND(=($2, $9), =($2, 'Mike'), =($10, CAST(MyTestPoly($10)):INTEGER))])
LogicalJoin(condition=[=($0, $10)], joinType=[inner])
LogicalTableScan(table=[[testRemoteStore, Profile, $table]])
LogicalTableScan(table=[[testavro, PAGEVIEW]])
*/
// None of the conditions in the filter could be pushed down as they all require a remote call. Hence the plans
// with and without optimization should be the same.
assertEquals(RelOptUtil.toString(relRootsWithOptimization.iterator().next().rel, SqlExplainLevel.EXPPLAN_ATTRIBUTES), RelOptUtil.toString(relRootsWithoutOptimization.iterator().next().rel, SqlExplainLevel.EXPPLAN_ATTRIBUTES));
}
use of org.apache.samza.sql.dsl.SamzaSqlDslConverterFactory in project samza by apache.
the class TestQueryPlanner method testRemoteJoinWithFilterHelper.
void testRemoteJoinWithFilterHelper(boolean enableOptimizer) throws SamzaSqlValidatorException {
Map<String, String> staticConfigs = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(1);
String sql = "Insert into testavro.enrichedPageViewTopic " + "select pv.pageKey as __key__, pv.pageKey as pageKey, coalesce(null, 'N/A') as companyName," + " p.name as profileName, p.address as profileAddress " + "from testavro.PAGEVIEW as pv " + "join testRemoteStore.Profile.`$table` as p " + " on p.__key__ = pv.profileId" + " where p.name = pv.pageKey AND p.name = 'Mike' AND pv.profileId = 1";
staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_STMT, sql);
staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_ENABLE_PLAN_OPTIMIZER, Boolean.toString(enableOptimizer));
Config samzaConfig = new MapConfig(staticConfigs);
DslConverter dslConverter = new SamzaSqlDslConverterFactory().create(samzaConfig);
Collection<RelRoot> relRoots = dslConverter.convertDsl(sql);
/*
Query plan without optimization:
LogicalProject(__key__=[$1], pageKey=[$1], companyName=['N/A'], profileName=[$5], profileAddress=[$7])
LogicalFilter(condition=[AND(=($5, $1), =($5, 'Mike'), =($2, 1))])
LogicalJoin(condition=[=($3, $2)], joinType=[inner])
LogicalTableScan(table=[[testavro, PAGEVIEW]])
LogicalTableScan(table=[[testRemoteStore, Profile, $table]])
Query plan with optimization:
LogicalProject(__key__=[$1], pageKey=[$1], companyName=['N/A'], profileName=[$5], profileAddress=[$7])
LogicalFilter(condition=[AND(=($5, $1), =($5, 'Mike'))])
LogicalJoin(condition=[=($3, $2)], joinType=[inner])
LogicalFilter(condition=[=($2, 1)])
LogicalTableScan(table=[[testavro, PAGEVIEW]])
LogicalTableScan(table=[[testRemoteStore, Profile, $table]])
*/
assertEquals(1, relRoots.size());
RelRoot relRoot = relRoots.iterator().next();
RelNode relNode = relRoot.rel;
assertTrue(relNode instanceof LogicalProject);
relNode = relNode.getInput(0);
assertTrue(relNode instanceof LogicalFilter);
if (enableOptimizer) {
assertEquals("AND(=($1, $5), =($5, 'Mike'))", ((LogicalFilter) relNode).getCondition().toString());
} else {
assertEquals("AND(=(1, $2), =($1, $5), =($5, 'Mike'))", ((LogicalFilter) relNode).getCondition().toString());
}
relNode = relNode.getInput(0);
assertTrue(relNode instanceof LogicalJoin);
assertEquals(2, relNode.getInputs().size());
LogicalJoin join = (LogicalJoin) relNode;
RelNode left = join.getLeft();
RelNode right = join.getRight();
assertTrue(right instanceof LogicalTableScan);
if (enableOptimizer) {
assertTrue(left instanceof LogicalFilter);
assertEquals("=(1, $2)", ((LogicalFilter) left).getCondition().toString());
assertTrue(left.getInput(0) instanceof LogicalTableScan);
} else {
assertTrue(left instanceof LogicalTableScan);
}
}
use of org.apache.samza.sql.dsl.SamzaSqlDslConverterFactory in project samza by apache.
the class SamzaSqlApplicationConfig method populateSystemStreamsAndGetRelRoots.
public static Collection<RelRoot> populateSystemStreamsAndGetRelRoots(List<String> dslStmts, Config config, List<String> inputSystemStreams, List<String> outputSystemStreams) {
// TODO: Get the converter factory based on the file type. Create abstraction around this.
DslConverterFactory dslConverterFactory = new SamzaSqlDslConverterFactory();
DslConverter dslConverter = dslConverterFactory.create(config);
Collection<RelRoot> relRoots = dslConverter.convertDsl(String.join("\n", dslStmts));
// RelRoot does not have sink node for Samza SQL dsl, so we can not traverse the relRoot tree to get
// "outputSystemStreams"
// FIXME: the snippet below does not work for Samza SQL dsl but is required for other dsls. Future fix could be
// for samza sql to build TableModify for sink and stick it to the relRoot, so we could get output stream out of it.
// for (RelRoot relRoot : relRoots) {
// SamzaSqlApplicationConfig.populateSystemStreams(relRoot.project(), inputSystemStreams, outputSystemStreams);
// }
// The below code is specific to Samza SQL dsl and should be removed once Samza SQL includes sink as part of
// relRoot and the above code in uncommented.
List<String> sqlStmts = SamzaSqlDslConverter.fetchSqlFromConfig(config);
List<SamzaSqlQueryParser.QueryInfo> queryInfo = SamzaSqlDslConverter.fetchQueryInfo(sqlStmts);
inputSystemStreams.addAll(queryInfo.stream().map(SamzaSqlQueryParser.QueryInfo::getSources).flatMap(Collection::stream).collect(Collectors.toList()));
outputSystemStreams.addAll(queryInfo.stream().map(SamzaSqlQueryParser.QueryInfo::getSink).collect(Collectors.toList()));
return relRoots;
}
Aggregations