Search in sources :

Example 6 with TableEnvironment

use of org.apache.flink.table.api.TableEnvironment in project flink by apache.

the class HiveTableSourceITCase method testPartitionFilter.

@Test
public void testPartitionFilter() throws Exception {
    TableEnvironment tableEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
    TestPartitionFilterCatalog catalog = new TestPartitionFilterCatalog(hiveCatalog.getName(), hiveCatalog.getDefaultDatabase(), hiveCatalog.getHiveConf(), hiveCatalog.getHiveVersion());
    tableEnv.registerCatalog(catalog.getName(), catalog);
    tableEnv.useCatalog(catalog.getName());
    tableEnv.executeSql("create database db1");
    try {
        tableEnv.executeSql("create table db1.part(x int) partitioned by (p1 int,p2 string)");
        HiveTestUtils.createTextTableInserter(hiveCatalog, "db1", "part").addRow(new Object[] { 1 }).commit("p1=1,p2='a'");
        HiveTestUtils.createTextTableInserter(hiveCatalog, "db1", "part").addRow(new Object[] { 2 }).commit("p1=2,p2='b'");
        HiveTestUtils.createTextTableInserter(hiveCatalog, "db1", "part").addRow(new Object[] { 3 }).commit("p1=3,p2='c'");
        // test string partition columns with special characters
        HiveTestUtils.createTextTableInserter(hiveCatalog, "db1", "part").addRow(new Object[] { 4 }).commit("p1=4,p2='c:2'");
        Table query = tableEnv.sqlQuery("select x from db1.part where p1>1 or p2<>'a' order by x");
        String[] explain = query.explain().split("==.*==\n");
        assertFalse(catalog.fallback);
        String optimizedPlan = explain[2];
        assertTrue(optimizedPlan, optimizedPlan.contains("table=[[test-catalog, db1, part, partitions=[{p1=2, p2=b}, {p1=3, p2=c}, {p1=4, p2=c:2}]"));
        List<Row> results = CollectionUtil.iteratorToList(query.execute().collect());
        assertEquals("[+I[2], +I[3], +I[4]]", results.toString());
        query = tableEnv.sqlQuery("select x from db1.part where p1>2 and p2<='a' order by x");
        explain = query.explain().split("==.*==\n");
        assertFalse(catalog.fallback);
        optimizedPlan = explain[2];
        assertTrue(optimizedPlan, optimizedPlan.contains("table=[[test-catalog, db1, part, partitions=[], project=[x]]]"));
        results = CollectionUtil.iteratorToList(query.execute().collect());
        assertEquals("[]", results.toString());
        query = tableEnv.sqlQuery("select x from db1.part where p1 in (1,3,5) order by x");
        explain = query.explain().split("==.*==\n");
        assertFalse(catalog.fallback);
        optimizedPlan = explain[2];
        assertTrue(optimizedPlan, optimizedPlan.contains("table=[[test-catalog, db1, part, partitions=[{p1=1, p2=a}, {p1=3, p2=c}], project=[x]]]"));
        results = CollectionUtil.iteratorToList(query.execute().collect());
        assertEquals("[+I[1], +I[3]]", results.toString());
        query = tableEnv.sqlQuery("select x from db1.part where (p1=1 and p2='a') or ((p1=2 and p2='b') or p2='d') order by x");
        explain = query.explain().split("==.*==\n");
        assertFalse(catalog.fallback);
        optimizedPlan = explain[2];
        assertTrue(optimizedPlan, optimizedPlan.contains("table=[[test-catalog, db1, part, partitions=[{p1=1, p2=a}, {p1=2, p2=b}], project=[x]]]"));
        results = CollectionUtil.iteratorToList(query.execute().collect());
        assertEquals("[+I[1], +I[2]]", results.toString());
        query = tableEnv.sqlQuery("select x from db1.part where p2 = 'c:2' order by x");
        explain = query.explain().split("==.*==\n");
        assertFalse(catalog.fallback);
        optimizedPlan = explain[2];
        assertTrue(optimizedPlan, optimizedPlan.contains("table=[[test-catalog, db1, part, partitions=[{p1=4, p2=c:2}], project=[x]]]"));
        results = CollectionUtil.iteratorToList(query.execute().collect());
        assertEquals("[+I[4]]", results.toString());
        query = tableEnv.sqlQuery("select x from db1.part where '' = p2");
        explain = query.explain().split("==.*==\n");
        assertFalse(catalog.fallback);
        optimizedPlan = explain[2];
        assertTrue(optimizedPlan, optimizedPlan.contains("table=[[test-catalog, db1, part, partitions=[], project=[x]]]"));
        results = CollectionUtil.iteratorToList(query.execute().collect());
        assertEquals("[]", results.toString());
    } finally {
        tableEnv.executeSql("drop database db1 cascade");
    }
}
Also used : CatalogTable(org.apache.flink.table.catalog.CatalogTable) Table(org.apache.flink.table.api.Table) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) TableEnvironment(org.apache.flink.table.api.TableEnvironment) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 7 with TableEnvironment

use of org.apache.flink.table.api.TableEnvironment in project flink by apache.

the class HiveLookupJoinITCase method testLookupJoinBoundedPartitionedTable.

@Test
public void testLookupJoinBoundedPartitionedTable() throws Exception {
    // constructs test data using dynamic partition
    TableEnvironment batchEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
    batchEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
    batchEnv.useCatalog(hiveCatalog.getName());
    batchEnv.executeSql("insert overwrite bounded_partition_table values " + "(1,'a',08,2019,'08','01')," + "(1,'a',10,2020,'08','31')," + "(2,'a',21,2020,'08','31')," + "(2,'b',22,2020,'08','31')").await();
    TableImpl flinkTable = (TableImpl) tableEnv.sqlQuery("select p.x, p.y, b.z, b.pt_year, b.pt_mon, b.pt_day from " + " default_catalog.default_database.probe as p" + " join bounded_partition_table for system_time as of p.p as b on p.x=b.x and p.y=b.y");
    List<Row> results = CollectionUtil.iteratorToList(flinkTable.execute().collect());
    assertEquals("[+I[1, a, 8, 2019, 08, 01], +I[1, a, 10, 2020, 08, 31], +I[2, b, 22, 2020, 08, 31]]", results.toString());
}
Also used : TableImpl(org.apache.flink.table.api.internal.TableImpl) TableEnvironment(org.apache.flink.table.api.TableEnvironment) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 8 with TableEnvironment

use of org.apache.flink.table.api.TableEnvironment in project flink by apache.

the class HiveLookupJoinITCase method testPartitionFetcherAndReader.

@Test
public void testPartitionFetcherAndReader() throws Exception {
    // constructs test data using dynamic partition
    TableEnvironment batchEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
    batchEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
    batchEnv.useCatalog(hiveCatalog.getName());
    batchEnv.executeSql("insert overwrite partition_table values " + "(1,'a',08,2019,'08','01')," + "(1,'a',10,2020,'08','31')," + "(2,'a',21,2020,'08','31')," + "(2,'b',22,2020,'08','31')," + "(3,'c',33,2020,'09','31')").await();
    FileSystemLookupFunction<HiveTablePartition> lookupFunction = getLookupFunction("partition_table");
    lookupFunction.open(null);
    PartitionFetcher<HiveTablePartition> fetcher = lookupFunction.getPartitionFetcher();
    PartitionFetcher.Context<HiveTablePartition> context = lookupFunction.getFetcherContext();
    List<HiveTablePartition> partitions = fetcher.fetch(context);
    // fetch latest partition by partition-name
    assertEquals(1, partitions.size());
    PartitionReader<HiveTablePartition, RowData> reader = lookupFunction.getPartitionReader();
    reader.open(partitions);
    List<RowData> res = new ArrayList<>();
    ObjectIdentifier tableIdentifier = ObjectIdentifier.of(hiveCatalog.getName(), "default", "partition_table");
    CatalogTable catalogTable = (CatalogTable) hiveCatalog.getTable(tableIdentifier.toObjectPath());
    GenericRowData reuse = new GenericRowData(catalogTable.getSchema().getFieldCount());
    TypeSerializer<RowData> serializer = InternalSerializers.create(catalogTable.getSchema().toRowDataType().getLogicalType());
    RowData row;
    while ((row = reader.read(reuse)) != null) {
        res.add(serializer.copy(row));
    }
    res.sort(Comparator.comparingInt(o -> o.getInt(0)));
    assertEquals("[+I(3,c,33,2020,09,31)]", res.toString());
}
Also used : PartitionReader(org.apache.flink.connector.file.table.PartitionReader) PARTITION_TIME_EXTRACTOR_TIMESTAMP_PATTERN(org.apache.flink.connector.file.table.FileSystemConnectorOptions.PARTITION_TIME_EXTRACTOR_TIMESTAMP_PATTERN) Arrays(java.util.Arrays) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) BeforeClass(org.junit.BeforeClass) PARTITION_TIME_EXTRACTOR_KIND(org.apache.flink.connector.file.table.FileSystemConnectorOptions.PARTITION_TIME_EXTRACTOR_KIND) CatalogTable(org.apache.flink.table.catalog.CatalogTable) STREAMING_SOURCE_PARTITION_INCLUDE(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_PARTITION_INCLUDE) HiveCatalog(org.apache.flink.table.catalog.hive.HiveCatalog) ArrayList(java.util.ArrayList) GenericRowData(org.apache.flink.table.data.GenericRowData) InternalSerializers(org.apache.flink.table.runtime.typeutils.InternalSerializers) Duration(java.time.Duration) DynamicTableSourceFactory(org.apache.flink.table.factories.DynamicTableSourceFactory) STREAMING_SOURCE_PARTITION_ORDER(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_PARTITION_ORDER) TableEnvironment(org.apache.flink.table.api.TableEnvironment) AfterClass(org.junit.AfterClass) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) RowData(org.apache.flink.table.data.RowData) PartitionFetcher(org.apache.flink.connector.file.table.PartitionFetcher) TestValuesTableFactory(org.apache.flink.table.planner.factories.TestValuesTableFactory) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) TableImpl(org.apache.flink.table.api.internal.TableImpl) TestCollectionTableFactory(org.apache.flink.table.planner.factories.utils.TestCollectionTableFactory) HiveTestUtils(org.apache.flink.table.catalog.hive.HiveTestUtils) STREAMING_SOURCE_ENABLE(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_ENABLE) List(java.util.List) FactoryUtil(org.apache.flink.table.factories.FactoryUtil) SqlDialect(org.apache.flink.table.api.SqlDialect) EnvironmentSettings(org.apache.flink.table.api.EnvironmentSettings) Row(org.apache.flink.types.Row) Comparator(java.util.Comparator) STREAMING_SOURCE_MONITOR_INTERVAL(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_MONITOR_INTERVAL) TableEnvironmentInternal(org.apache.flink.table.api.internal.TableEnvironmentInternal) Assert.assertEquals(org.junit.Assert.assertEquals) ArrayList(java.util.ArrayList) TableEnvironment(org.apache.flink.table.api.TableEnvironment) CatalogTable(org.apache.flink.table.catalog.CatalogTable) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) PartitionFetcher(org.apache.flink.connector.file.table.PartitionFetcher) GenericRowData(org.apache.flink.table.data.GenericRowData) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) Test(org.junit.Test)

Example 9 with TableEnvironment

use of org.apache.flink.table.api.TableEnvironment in project flink by apache.

the class HiveLookupJoinITCase method testLookupJoinPartitionedTableWithCreateTime.

@Test
public void testLookupJoinPartitionedTableWithCreateTime() throws Exception {
    // constructs test data using dynamic partition
    TableEnvironment batchEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
    batchEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
    batchEnv.useCatalog(hiveCatalog.getName());
    batchEnv.executeSql("insert overwrite partition_table_3 values " + "(1,'a',08,2020,'month1','01')," + "(1,'a',10,2020,'month2','02')," + "(2,'a',21,2020,'month1','02')," + "(2,'b',22,2020,'month3','20')," + "(3,'c',22,2020,'month3','20')," + "(3,'c',33,2017,'08','31')," + "(1,'a',101,2017,'09','01')," + "(2,'a',121,2019,'09','01')," + "(2,'b',122,2019,'09','01')").await();
    // inert a new partition
    batchEnv.executeSql("insert overwrite partition_table_3 values " + "(1,'a',101,2020,'08','01')," + "(2,'a',121,2020,'08','01')," + "(2,'b',122,2020,'08','01')").await();
    TableImpl flinkTable = (TableImpl) tableEnv.sqlQuery("select p.x, p.y, b.z, b.pt_year, b.pt_mon, b.pt_day from " + " default_catalog.default_database.probe as p" + " join partition_table_3 for system_time as of p.p as b on p.x=b.x and p.y=b.y");
    List<Row> results = CollectionUtil.iteratorToList(flinkTable.execute().collect());
    assertEquals("[+I[1, a, 101, 2020, 08, 01], +I[2, b, 122, 2020, 08, 01]]", results.toString());
}
Also used : TableImpl(org.apache.flink.table.api.internal.TableImpl) TableEnvironment(org.apache.flink.table.api.TableEnvironment) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 10 with TableEnvironment

use of org.apache.flink.table.api.TableEnvironment in project flink by apache.

the class HiveLookupJoinITCase method testLookupJoinTableWithColumnarStorage.

@Test
public void testLookupJoinTableWithColumnarStorage() throws Exception {
    // constructs test data, as the DEFAULT_SIZE of VectorizedColumnBatch is 2048, we should
    // write as least 2048 records to the test table.
    List<Row> testData = new ArrayList<>(4096);
    for (int i = 0; i < 4096; i++) {
        testData.add(Row.of(String.valueOf(i)));
    }
    // constructs test data using values table
    TableEnvironment batchEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.DEFAULT);
    batchEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
    batchEnv.useCatalog(hiveCatalog.getName());
    String dataId = TestValuesTableFactory.registerData(testData);
    batchEnv.executeSql(String.format("create table value_source(x string, p as proctime()) with (" + "'connector' = 'values', 'data-id' = '%s', 'bounded'='true')", dataId));
    batchEnv.executeSql("insert overwrite columnar_table select x from value_source").await();
    TableImpl flinkTable = (TableImpl) tableEnv.sqlQuery("select t.x as x1, c.x as x2 from value_source t " + "left join columnar_table for system_time as of t.p c " + "on t.x = c.x where c.x is null");
    List<Row> results = CollectionUtil.iteratorToList(flinkTable.execute().collect());
    assertTrue("All records should be able to be joined, and the final results should be empty.", results.size() == 0);
}
Also used : ArrayList(java.util.ArrayList) TableImpl(org.apache.flink.table.api.internal.TableImpl) TableEnvironment(org.apache.flink.table.api.TableEnvironment) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Aggregations

TableEnvironment (org.apache.flink.table.api.TableEnvironment)137 Test (org.junit.Test)95 Row (org.apache.flink.types.Row)58 StreamTableEnvironment (org.apache.flink.table.api.bridge.java.StreamTableEnvironment)38 Table (org.apache.flink.table.api.Table)27 ObjectPath (org.apache.flink.table.catalog.ObjectPath)19 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)14 ArrayList (java.util.ArrayList)13 CatalogTable (org.apache.flink.table.catalog.CatalogTable)12 HashMap (java.util.HashMap)11 EnvironmentSettings (org.apache.flink.table.api.EnvironmentSettings)10 CatalogBaseTable (org.apache.flink.table.catalog.CatalogBaseTable)10 TableResult (org.apache.flink.table.api.TableResult)8 File (java.io.File)7 Constructor (java.lang.reflect.Constructor)7 TableImpl (org.apache.flink.table.api.internal.TableImpl)7 TableException (org.apache.flink.table.api.TableException)5 List (java.util.List)4 Configuration (org.apache.flink.configuration.Configuration)4 TableSchema (org.apache.flink.table.api.TableSchema)4