use of org.apache.flink.table.api.TableEnvironment in project flink by apache.
the class HiveTableSourceITCase method testPartitionFilterDateTimestamp.
@Test
public void testPartitionFilterDateTimestamp() throws Exception {
TableEnvironment tableEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
TestPartitionFilterCatalog catalog = new TestPartitionFilterCatalog(hiveCatalog.getName(), hiveCatalog.getDefaultDatabase(), hiveCatalog.getHiveConf(), hiveCatalog.getHiveVersion());
tableEnv.registerCatalog(catalog.getName(), catalog);
tableEnv.useCatalog(catalog.getName());
tableEnv.executeSql("create database db1");
try {
tableEnv.executeSql("create table db1.part(x int) partitioned by (p1 date,p2 timestamp)");
HiveTestUtils.createTextTableInserter(hiveCatalog, "db1", "part").addRow(new Object[] { 1 }).commit("p1='2018-08-08',p2='2018-08-08 08:08:08.1'");
HiveTestUtils.createTextTableInserter(hiveCatalog, "db1", "part").addRow(new Object[] { 2 }).commit("p1='2018-08-09',p2='2018-08-08 08:08:09.1'");
HiveTestUtils.createTextTableInserter(hiveCatalog, "db1", "part").addRow(new Object[] { 3 }).commit("p1='2018-08-10',p2='2018-08-08 08:08:10.1'");
Table query = tableEnv.sqlQuery("select x from db1.part where p1>cast('2018-08-09' as date) and p2<>cast('2018-08-08 08:08:09.1' as timestamp)");
String[] explain = query.explain().split("==.*==\n");
assertTrue(catalog.fallback);
String optimizedPlan = explain[2];
assertTrue(optimizedPlan, optimizedPlan.contains("table=[[test-catalog, db1, part, partitions=[{p1=2018-08-10, p2=2018-08-08 08:08:10.1}]"));
List<Row> results = CollectionUtil.iteratorToList(query.execute().collect());
assertEquals("[+I[3]]", results.toString());
// filter by timestamp partition
query = tableEnv.sqlQuery("select x from db1.part where timestamp '2018-08-08 08:08:09.1' = p2");
results = CollectionUtil.iteratorToList(query.execute().collect());
assertEquals("[+I[2]]", results.toString());
} finally {
tableEnv.executeSql("drop database db1 cascade");
}
}
use of org.apache.flink.table.api.TableEnvironment in project flink by apache.
the class HiveTableSourceITCase method testParallelismWithoutParallelismInfer.
@Test
public void testParallelismWithoutParallelismInfer() throws Exception {
final String dbName = "source_db";
final String tblName = "test_parallelism_no_infer";
TableEnvironment tEnv = TableEnvironment.create(EnvironmentSettings.inBatchMode());
tEnv.getConfig().setSqlDialect(SqlDialect.HIVE);
tEnv.registerCatalog("hive", hiveCatalog);
tEnv.useCatalog("hive");
tEnv.getConfig().getConfiguration().setBoolean(HiveOptions.TABLE_EXEC_HIVE_INFER_SOURCE_PARALLELISM, false);
tEnv.executeSql("CREATE TABLE source_db.test_parallelism_no_infer " + "(`year` STRING, `value` INT) partitioned by (pt int)");
HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { "2014", 3 }).addRow(new Object[] { "2014", 4 }).commit("pt=0");
HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { "2015", 2 }).addRow(new Object[] { "2015", 5 }).commit("pt=1");
Table table = tEnv.sqlQuery("select * from hive.source_db.test_parallelism_no_infer limit 1");
PlannerBase planner = (PlannerBase) ((TableEnvironmentImpl) tEnv).getPlanner();
RelNode relNode = planner.optimize(TableTestUtil.toRelNode(table));
ExecNode<?> execNode = planner.translateToExecNodeGraph(toScala(Collections.singletonList(relNode))).getRootNodes().get(0);
Transformation<?> transformation = (execNode.translateToPlan(planner).getInputs().get(0)).getInputs().get(0);
// when there's no infer, should use the default parallelism
Assert.assertEquals(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM.defaultValue().intValue(), transformation.getParallelism());
}
use of org.apache.flink.table.api.TableEnvironment in project flink by apache.
the class HiveTableSourceITCase method testCaseInsensitive.
private void testCaseInsensitive(String format) throws Exception {
TableEnvironment tEnv = createTableEnvWithHiveCatalog(hiveCatalog);
String folderURI = TEMPORARY_FOLDER.newFolder().toURI().toString();
// Flink to write sensitive fields to parquet file
tEnv.executeSql(String.format("create table parquet_t (I int, J int) with (" + "'connector'='filesystem','format'='%s','path'='%s')", format, folderURI));
tEnv.executeSql("insert into parquet_t select 1, 2").await();
tEnv.executeSql("drop table parquet_t");
// Hive to read parquet file
tEnv.getConfig().setSqlDialect(SqlDialect.HIVE);
tEnv.executeSql(String.format("create external table parquet_t (i int, j int) stored as %s location '%s'", format, folderURI));
Assert.assertEquals(Row.of(1, 2), tEnv.executeSql("select * from parquet_t").collect().next());
}
use of org.apache.flink.table.api.TableEnvironment in project flink by apache.
the class HiveTableSourceITCase method testSourceConfig.
private void testSourceConfig(boolean fallbackMR, boolean inferParallelism) throws Exception {
HiveDynamicTableFactory tableFactorySpy = spy((HiveDynamicTableFactory) hiveCatalog.getFactory().get());
doAnswer(invocation -> {
TableSourceFactory.Context context = invocation.getArgument(0);
assertEquals(fallbackMR, context.getConfiguration().get(HiveOptions.TABLE_EXEC_HIVE_FALLBACK_MAPRED_READER));
return new TestConfigSource(new JobConf(hiveCatalog.getHiveConf()), context.getConfiguration(), context.getObjectIdentifier().toObjectPath(), context.getTable(), inferParallelism);
}).when(tableFactorySpy).createDynamicTableSource(any(DynamicTableFactory.Context.class));
HiveCatalog catalogSpy = spy(hiveCatalog);
doReturn(Optional.of(tableFactorySpy)).when(catalogSpy).getTableFactory();
TableEnvironment tableEnv = HiveTestUtils.createTableEnvInBatchMode();
tableEnv.getConfig().getConfiguration().setBoolean(HiveOptions.TABLE_EXEC_HIVE_FALLBACK_MAPRED_READER, fallbackMR);
tableEnv.getConfig().getConfiguration().setBoolean(HiveOptions.TABLE_EXEC_HIVE_INFER_SOURCE_PARALLELISM, inferParallelism);
tableEnv.getConfig().getConfiguration().setInteger(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 2);
tableEnv.registerCatalog(catalogSpy.getName(), catalogSpy);
tableEnv.useCatalog(catalogSpy.getName());
List<Row> results = CollectionUtil.iteratorToList(tableEnv.sqlQuery("select * from db1.src order by x").execute().collect());
assertEquals("[+I[1, a], +I[2, b]]", results.toString());
}
use of org.apache.flink.table.api.TableEnvironment in project flink by apache.
the class HiveTableSourceITCase method testParallelismOnLimitPushDown.
@Test
public void testParallelismOnLimitPushDown() throws Exception {
final String dbName = "source_db";
final String tblName = "test_parallelism_limit_pushdown";
TableEnvironment tEnv = createTableEnv();
tEnv.getConfig().getConfiguration().setBoolean(HiveOptions.TABLE_EXEC_HIVE_INFER_SOURCE_PARALLELISM, false);
tEnv.getConfig().getConfiguration().setInteger(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 2);
tEnv.executeSql("CREATE TABLE source_db.test_parallelism_limit_pushdown " + "(`year` STRING, `value` INT) partitioned by (pt int)");
HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { "2014", 3 }).addRow(new Object[] { "2014", 4 }).commit("pt=0");
HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { "2015", 2 }).addRow(new Object[] { "2015", 5 }).commit("pt=1");
Table table = tEnv.sqlQuery("select * from hive.source_db.test_parallelism_limit_pushdown limit 1");
PlannerBase planner = (PlannerBase) ((TableEnvironmentImpl) tEnv).getPlanner();
RelNode relNode = planner.optimize(TableTestUtil.toRelNode(table));
ExecNode<?> execNode = planner.translateToExecNodeGraph(toScala(Collections.singletonList(relNode))).getRootNodes().get(0);
Transformation<?> transformation = (execNode.translateToPlan(planner).getInputs().get(0)).getInputs().get(0);
// when there's no infer, should use the default parallelism configured
Assert.assertEquals(2, transformation.getParallelism());
}
Aggregations