Search in sources :

Example 61 with TableEnvironment

use of org.apache.flink.table.api.TableEnvironment in project flink by apache.

the class HiveTableSourceITCase method testPartitionFilterDateTimestamp.

@Test
public void testPartitionFilterDateTimestamp() throws Exception {
    TableEnvironment tableEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
    TestPartitionFilterCatalog catalog = new TestPartitionFilterCatalog(hiveCatalog.getName(), hiveCatalog.getDefaultDatabase(), hiveCatalog.getHiveConf(), hiveCatalog.getHiveVersion());
    tableEnv.registerCatalog(catalog.getName(), catalog);
    tableEnv.useCatalog(catalog.getName());
    tableEnv.executeSql("create database db1");
    try {
        tableEnv.executeSql("create table db1.part(x int) partitioned by (p1 date,p2 timestamp)");
        HiveTestUtils.createTextTableInserter(hiveCatalog, "db1", "part").addRow(new Object[] { 1 }).commit("p1='2018-08-08',p2='2018-08-08 08:08:08.1'");
        HiveTestUtils.createTextTableInserter(hiveCatalog, "db1", "part").addRow(new Object[] { 2 }).commit("p1='2018-08-09',p2='2018-08-08 08:08:09.1'");
        HiveTestUtils.createTextTableInserter(hiveCatalog, "db1", "part").addRow(new Object[] { 3 }).commit("p1='2018-08-10',p2='2018-08-08 08:08:10.1'");
        Table query = tableEnv.sqlQuery("select x from db1.part where p1>cast('2018-08-09' as date) and p2<>cast('2018-08-08 08:08:09.1' as timestamp)");
        String[] explain = query.explain().split("==.*==\n");
        assertTrue(catalog.fallback);
        String optimizedPlan = explain[2];
        assertTrue(optimizedPlan, optimizedPlan.contains("table=[[test-catalog, db1, part, partitions=[{p1=2018-08-10, p2=2018-08-08 08:08:10.1}]"));
        List<Row> results = CollectionUtil.iteratorToList(query.execute().collect());
        assertEquals("[+I[3]]", results.toString());
        // filter by timestamp partition
        query = tableEnv.sqlQuery("select x from db1.part where timestamp '2018-08-08 08:08:09.1' = p2");
        results = CollectionUtil.iteratorToList(query.execute().collect());
        assertEquals("[+I[2]]", results.toString());
    } finally {
        tableEnv.executeSql("drop database db1 cascade");
    }
}
Also used : CatalogTable(org.apache.flink.table.catalog.CatalogTable) Table(org.apache.flink.table.api.Table) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) TableEnvironment(org.apache.flink.table.api.TableEnvironment) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 62 with TableEnvironment

use of org.apache.flink.table.api.TableEnvironment in project flink by apache.

the class HiveTableSourceITCase method testParallelismWithoutParallelismInfer.

@Test
public void testParallelismWithoutParallelismInfer() throws Exception {
    final String dbName = "source_db";
    final String tblName = "test_parallelism_no_infer";
    TableEnvironment tEnv = TableEnvironment.create(EnvironmentSettings.inBatchMode());
    tEnv.getConfig().setSqlDialect(SqlDialect.HIVE);
    tEnv.registerCatalog("hive", hiveCatalog);
    tEnv.useCatalog("hive");
    tEnv.getConfig().getConfiguration().setBoolean(HiveOptions.TABLE_EXEC_HIVE_INFER_SOURCE_PARALLELISM, false);
    tEnv.executeSql("CREATE TABLE source_db.test_parallelism_no_infer " + "(`year` STRING, `value` INT) partitioned by (pt int)");
    HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { "2014", 3 }).addRow(new Object[] { "2014", 4 }).commit("pt=0");
    HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { "2015", 2 }).addRow(new Object[] { "2015", 5 }).commit("pt=1");
    Table table = tEnv.sqlQuery("select * from hive.source_db.test_parallelism_no_infer limit 1");
    PlannerBase planner = (PlannerBase) ((TableEnvironmentImpl) tEnv).getPlanner();
    RelNode relNode = planner.optimize(TableTestUtil.toRelNode(table));
    ExecNode<?> execNode = planner.translateToExecNodeGraph(toScala(Collections.singletonList(relNode))).getRootNodes().get(0);
    Transformation<?> transformation = (execNode.translateToPlan(planner).getInputs().get(0)).getInputs().get(0);
    // when there's no infer, should use the default parallelism
    Assert.assertEquals(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM.defaultValue().intValue(), transformation.getParallelism());
}
Also used : CatalogTable(org.apache.flink.table.catalog.CatalogTable) Table(org.apache.flink.table.api.Table) RelNode(org.apache.calcite.rel.RelNode) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) TableEnvironment(org.apache.flink.table.api.TableEnvironment) PlannerBase(org.apache.flink.table.planner.delegation.PlannerBase) Test(org.junit.Test)

Example 63 with TableEnvironment

use of org.apache.flink.table.api.TableEnvironment in project flink by apache.

the class HiveTableSourceITCase method testCaseInsensitive.

private void testCaseInsensitive(String format) throws Exception {
    TableEnvironment tEnv = createTableEnvWithHiveCatalog(hiveCatalog);
    String folderURI = TEMPORARY_FOLDER.newFolder().toURI().toString();
    // Flink to write sensitive fields to parquet file
    tEnv.executeSql(String.format("create table parquet_t (I int, J int) with (" + "'connector'='filesystem','format'='%s','path'='%s')", format, folderURI));
    tEnv.executeSql("insert into parquet_t select 1, 2").await();
    tEnv.executeSql("drop table parquet_t");
    // Hive to read parquet file
    tEnv.getConfig().setSqlDialect(SqlDialect.HIVE);
    tEnv.executeSql(String.format("create external table parquet_t (i int, j int) stored as %s location '%s'", format, folderURI));
    Assert.assertEquals(Row.of(1, 2), tEnv.executeSql("select * from parquet_t").collect().next());
}
Also used : StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) TableEnvironment(org.apache.flink.table.api.TableEnvironment)

Example 64 with TableEnvironment

use of org.apache.flink.table.api.TableEnvironment in project flink by apache.

the class HiveTableSourceITCase method testSourceConfig.

private void testSourceConfig(boolean fallbackMR, boolean inferParallelism) throws Exception {
    HiveDynamicTableFactory tableFactorySpy = spy((HiveDynamicTableFactory) hiveCatalog.getFactory().get());
    doAnswer(invocation -> {
        TableSourceFactory.Context context = invocation.getArgument(0);
        assertEquals(fallbackMR, context.getConfiguration().get(HiveOptions.TABLE_EXEC_HIVE_FALLBACK_MAPRED_READER));
        return new TestConfigSource(new JobConf(hiveCatalog.getHiveConf()), context.getConfiguration(), context.getObjectIdentifier().toObjectPath(), context.getTable(), inferParallelism);
    }).when(tableFactorySpy).createDynamicTableSource(any(DynamicTableFactory.Context.class));
    HiveCatalog catalogSpy = spy(hiveCatalog);
    doReturn(Optional.of(tableFactorySpy)).when(catalogSpy).getTableFactory();
    TableEnvironment tableEnv = HiveTestUtils.createTableEnvInBatchMode();
    tableEnv.getConfig().getConfiguration().setBoolean(HiveOptions.TABLE_EXEC_HIVE_FALLBACK_MAPRED_READER, fallbackMR);
    tableEnv.getConfig().getConfiguration().setBoolean(HiveOptions.TABLE_EXEC_HIVE_INFER_SOURCE_PARALLELISM, inferParallelism);
    tableEnv.getConfig().getConfiguration().setInteger(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 2);
    tableEnv.registerCatalog(catalogSpy.getName(), catalogSpy);
    tableEnv.useCatalog(catalogSpy.getName());
    List<Row> results = CollectionUtil.iteratorToList(tableEnv.sqlQuery("select * from db1.src order by x").execute().collect());
    assertEquals("[+I[1, a], +I[2, b]]", results.toString());
}
Also used : ProviderContext(org.apache.flink.table.connector.ProviderContext) TableSourceFactory(org.apache.flink.table.factories.TableSourceFactory) HiveTestUtils.createTableEnvWithHiveCatalog(org.apache.flink.table.catalog.hive.HiveTestUtils.createTableEnvWithHiveCatalog) HiveCatalog(org.apache.flink.table.catalog.hive.HiveCatalog) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) TableEnvironment(org.apache.flink.table.api.TableEnvironment) Row(org.apache.flink.types.Row) JobConf(org.apache.hadoop.mapred.JobConf)

Example 65 with TableEnvironment

use of org.apache.flink.table.api.TableEnvironment in project flink by apache.

the class HiveTableSourceITCase method testParallelismOnLimitPushDown.

@Test
public void testParallelismOnLimitPushDown() throws Exception {
    final String dbName = "source_db";
    final String tblName = "test_parallelism_limit_pushdown";
    TableEnvironment tEnv = createTableEnv();
    tEnv.getConfig().getConfiguration().setBoolean(HiveOptions.TABLE_EXEC_HIVE_INFER_SOURCE_PARALLELISM, false);
    tEnv.getConfig().getConfiguration().setInteger(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 2);
    tEnv.executeSql("CREATE TABLE source_db.test_parallelism_limit_pushdown " + "(`year` STRING, `value` INT) partitioned by (pt int)");
    HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { "2014", 3 }).addRow(new Object[] { "2014", 4 }).commit("pt=0");
    HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { "2015", 2 }).addRow(new Object[] { "2015", 5 }).commit("pt=1");
    Table table = tEnv.sqlQuery("select * from hive.source_db.test_parallelism_limit_pushdown limit 1");
    PlannerBase planner = (PlannerBase) ((TableEnvironmentImpl) tEnv).getPlanner();
    RelNode relNode = planner.optimize(TableTestUtil.toRelNode(table));
    ExecNode<?> execNode = planner.translateToExecNodeGraph(toScala(Collections.singletonList(relNode))).getRootNodes().get(0);
    Transformation<?> transformation = (execNode.translateToPlan(planner).getInputs().get(0)).getInputs().get(0);
    // when there's no infer, should use the default parallelism configured
    Assert.assertEquals(2, transformation.getParallelism());
}
Also used : CatalogTable(org.apache.flink.table.catalog.CatalogTable) Table(org.apache.flink.table.api.Table) RelNode(org.apache.calcite.rel.RelNode) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) TableEnvironment(org.apache.flink.table.api.TableEnvironment) PlannerBase(org.apache.flink.table.planner.delegation.PlannerBase) Test(org.junit.Test)

Aggregations

TableEnvironment (org.apache.flink.table.api.TableEnvironment)137 Test (org.junit.Test)95 Row (org.apache.flink.types.Row)58 StreamTableEnvironment (org.apache.flink.table.api.bridge.java.StreamTableEnvironment)38 Table (org.apache.flink.table.api.Table)27 ObjectPath (org.apache.flink.table.catalog.ObjectPath)19 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)14 ArrayList (java.util.ArrayList)13 CatalogTable (org.apache.flink.table.catalog.CatalogTable)12 HashMap (java.util.HashMap)11 EnvironmentSettings (org.apache.flink.table.api.EnvironmentSettings)10 CatalogBaseTable (org.apache.flink.table.catalog.CatalogBaseTable)10 TableResult (org.apache.flink.table.api.TableResult)8 File (java.io.File)7 Constructor (java.lang.reflect.Constructor)7 TableImpl (org.apache.flink.table.api.internal.TableImpl)7 TableException (org.apache.flink.table.api.TableException)5 List (java.util.List)4 Configuration (org.apache.flink.configuration.Configuration)4 TableSchema (org.apache.flink.table.api.TableSchema)4