use of org.apache.flink.table.api.TableEnvironment in project flink by apache.
the class HiveTableSourceITCase method testPartitionFilter.
@Test
public void testPartitionFilter() throws Exception {
TableEnvironment tableEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
TestPartitionFilterCatalog catalog = new TestPartitionFilterCatalog(hiveCatalog.getName(), hiveCatalog.getDefaultDatabase(), hiveCatalog.getHiveConf(), hiveCatalog.getHiveVersion());
tableEnv.registerCatalog(catalog.getName(), catalog);
tableEnv.useCatalog(catalog.getName());
tableEnv.executeSql("create database db1");
try {
tableEnv.executeSql("create table db1.part(x int) partitioned by (p1 int,p2 string)");
HiveTestUtils.createTextTableInserter(hiveCatalog, "db1", "part").addRow(new Object[] { 1 }).commit("p1=1,p2='a'");
HiveTestUtils.createTextTableInserter(hiveCatalog, "db1", "part").addRow(new Object[] { 2 }).commit("p1=2,p2='b'");
HiveTestUtils.createTextTableInserter(hiveCatalog, "db1", "part").addRow(new Object[] { 3 }).commit("p1=3,p2='c'");
// test string partition columns with special characters
HiveTestUtils.createTextTableInserter(hiveCatalog, "db1", "part").addRow(new Object[] { 4 }).commit("p1=4,p2='c:2'");
Table query = tableEnv.sqlQuery("select x from db1.part where p1>1 or p2<>'a' order by x");
String[] explain = query.explain().split("==.*==\n");
assertFalse(catalog.fallback);
String optimizedPlan = explain[2];
assertTrue(optimizedPlan, optimizedPlan.contains("table=[[test-catalog, db1, part, partitions=[{p1=2, p2=b}, {p1=3, p2=c}, {p1=4, p2=c:2}]"));
List<Row> results = CollectionUtil.iteratorToList(query.execute().collect());
assertEquals("[+I[2], +I[3], +I[4]]", results.toString());
query = tableEnv.sqlQuery("select x from db1.part where p1>2 and p2<='a' order by x");
explain = query.explain().split("==.*==\n");
assertFalse(catalog.fallback);
optimizedPlan = explain[2];
assertTrue(optimizedPlan, optimizedPlan.contains("table=[[test-catalog, db1, part, partitions=[], project=[x]]]"));
results = CollectionUtil.iteratorToList(query.execute().collect());
assertEquals("[]", results.toString());
query = tableEnv.sqlQuery("select x from db1.part where p1 in (1,3,5) order by x");
explain = query.explain().split("==.*==\n");
assertFalse(catalog.fallback);
optimizedPlan = explain[2];
assertTrue(optimizedPlan, optimizedPlan.contains("table=[[test-catalog, db1, part, partitions=[{p1=1, p2=a}, {p1=3, p2=c}], project=[x]]]"));
results = CollectionUtil.iteratorToList(query.execute().collect());
assertEquals("[+I[1], +I[3]]", results.toString());
query = tableEnv.sqlQuery("select x from db1.part where (p1=1 and p2='a') or ((p1=2 and p2='b') or p2='d') order by x");
explain = query.explain().split("==.*==\n");
assertFalse(catalog.fallback);
optimizedPlan = explain[2];
assertTrue(optimizedPlan, optimizedPlan.contains("table=[[test-catalog, db1, part, partitions=[{p1=1, p2=a}, {p1=2, p2=b}], project=[x]]]"));
results = CollectionUtil.iteratorToList(query.execute().collect());
assertEquals("[+I[1], +I[2]]", results.toString());
query = tableEnv.sqlQuery("select x from db1.part where p2 = 'c:2' order by x");
explain = query.explain().split("==.*==\n");
assertFalse(catalog.fallback);
optimizedPlan = explain[2];
assertTrue(optimizedPlan, optimizedPlan.contains("table=[[test-catalog, db1, part, partitions=[{p1=4, p2=c:2}], project=[x]]]"));
results = CollectionUtil.iteratorToList(query.execute().collect());
assertEquals("[+I[4]]", results.toString());
query = tableEnv.sqlQuery("select x from db1.part where '' = p2");
explain = query.explain().split("==.*==\n");
assertFalse(catalog.fallback);
optimizedPlan = explain[2];
assertTrue(optimizedPlan, optimizedPlan.contains("table=[[test-catalog, db1, part, partitions=[], project=[x]]]"));
results = CollectionUtil.iteratorToList(query.execute().collect());
assertEquals("[]", results.toString());
} finally {
tableEnv.executeSql("drop database db1 cascade");
}
}
use of org.apache.flink.table.api.TableEnvironment in project flink by apache.
the class HiveLookupJoinITCase method testLookupJoinBoundedPartitionedTable.
@Test
public void testLookupJoinBoundedPartitionedTable() throws Exception {
// constructs test data using dynamic partition
TableEnvironment batchEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
batchEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
batchEnv.useCatalog(hiveCatalog.getName());
batchEnv.executeSql("insert overwrite bounded_partition_table values " + "(1,'a',08,2019,'08','01')," + "(1,'a',10,2020,'08','31')," + "(2,'a',21,2020,'08','31')," + "(2,'b',22,2020,'08','31')").await();
TableImpl flinkTable = (TableImpl) tableEnv.sqlQuery("select p.x, p.y, b.z, b.pt_year, b.pt_mon, b.pt_day from " + " default_catalog.default_database.probe as p" + " join bounded_partition_table for system_time as of p.p as b on p.x=b.x and p.y=b.y");
List<Row> results = CollectionUtil.iteratorToList(flinkTable.execute().collect());
assertEquals("[+I[1, a, 8, 2019, 08, 01], +I[1, a, 10, 2020, 08, 31], +I[2, b, 22, 2020, 08, 31]]", results.toString());
}
use of org.apache.flink.table.api.TableEnvironment in project flink by apache.
the class HiveLookupJoinITCase method testPartitionFetcherAndReader.
@Test
public void testPartitionFetcherAndReader() throws Exception {
// constructs test data using dynamic partition
TableEnvironment batchEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
batchEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
batchEnv.useCatalog(hiveCatalog.getName());
batchEnv.executeSql("insert overwrite partition_table values " + "(1,'a',08,2019,'08','01')," + "(1,'a',10,2020,'08','31')," + "(2,'a',21,2020,'08','31')," + "(2,'b',22,2020,'08','31')," + "(3,'c',33,2020,'09','31')").await();
FileSystemLookupFunction<HiveTablePartition> lookupFunction = getLookupFunction("partition_table");
lookupFunction.open(null);
PartitionFetcher<HiveTablePartition> fetcher = lookupFunction.getPartitionFetcher();
PartitionFetcher.Context<HiveTablePartition> context = lookupFunction.getFetcherContext();
List<HiveTablePartition> partitions = fetcher.fetch(context);
// fetch latest partition by partition-name
assertEquals(1, partitions.size());
PartitionReader<HiveTablePartition, RowData> reader = lookupFunction.getPartitionReader();
reader.open(partitions);
List<RowData> res = new ArrayList<>();
ObjectIdentifier tableIdentifier = ObjectIdentifier.of(hiveCatalog.getName(), "default", "partition_table");
CatalogTable catalogTable = (CatalogTable) hiveCatalog.getTable(tableIdentifier.toObjectPath());
GenericRowData reuse = new GenericRowData(catalogTable.getSchema().getFieldCount());
TypeSerializer<RowData> serializer = InternalSerializers.create(catalogTable.getSchema().toRowDataType().getLogicalType());
RowData row;
while ((row = reader.read(reuse)) != null) {
res.add(serializer.copy(row));
}
res.sort(Comparator.comparingInt(o -> o.getInt(0)));
assertEquals("[+I(3,c,33,2020,09,31)]", res.toString());
}
use of org.apache.flink.table.api.TableEnvironment in project flink by apache.
the class HiveLookupJoinITCase method testLookupJoinPartitionedTableWithCreateTime.
@Test
public void testLookupJoinPartitionedTableWithCreateTime() throws Exception {
// constructs test data using dynamic partition
TableEnvironment batchEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
batchEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
batchEnv.useCatalog(hiveCatalog.getName());
batchEnv.executeSql("insert overwrite partition_table_3 values " + "(1,'a',08,2020,'month1','01')," + "(1,'a',10,2020,'month2','02')," + "(2,'a',21,2020,'month1','02')," + "(2,'b',22,2020,'month3','20')," + "(3,'c',22,2020,'month3','20')," + "(3,'c',33,2017,'08','31')," + "(1,'a',101,2017,'09','01')," + "(2,'a',121,2019,'09','01')," + "(2,'b',122,2019,'09','01')").await();
// inert a new partition
batchEnv.executeSql("insert overwrite partition_table_3 values " + "(1,'a',101,2020,'08','01')," + "(2,'a',121,2020,'08','01')," + "(2,'b',122,2020,'08','01')").await();
TableImpl flinkTable = (TableImpl) tableEnv.sqlQuery("select p.x, p.y, b.z, b.pt_year, b.pt_mon, b.pt_day from " + " default_catalog.default_database.probe as p" + " join partition_table_3 for system_time as of p.p as b on p.x=b.x and p.y=b.y");
List<Row> results = CollectionUtil.iteratorToList(flinkTable.execute().collect());
assertEquals("[+I[1, a, 101, 2020, 08, 01], +I[2, b, 122, 2020, 08, 01]]", results.toString());
}
use of org.apache.flink.table.api.TableEnvironment in project flink by apache.
the class HiveLookupJoinITCase method testLookupJoinTableWithColumnarStorage.
@Test
public void testLookupJoinTableWithColumnarStorage() throws Exception {
// constructs test data, as the DEFAULT_SIZE of VectorizedColumnBatch is 2048, we should
// write as least 2048 records to the test table.
List<Row> testData = new ArrayList<>(4096);
for (int i = 0; i < 4096; i++) {
testData.add(Row.of(String.valueOf(i)));
}
// constructs test data using values table
TableEnvironment batchEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.DEFAULT);
batchEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
batchEnv.useCatalog(hiveCatalog.getName());
String dataId = TestValuesTableFactory.registerData(testData);
batchEnv.executeSql(String.format("create table value_source(x string, p as proctime()) with (" + "'connector' = 'values', 'data-id' = '%s', 'bounded'='true')", dataId));
batchEnv.executeSql("insert overwrite columnar_table select x from value_source").await();
TableImpl flinkTable = (TableImpl) tableEnv.sqlQuery("select t.x as x1, c.x as x2 from value_source t " + "left join columnar_table for system_time as of t.p c " + "on t.x = c.x where c.x is null");
List<Row> results = CollectionUtil.iteratorToList(flinkTable.execute().collect());
assertTrue("All records should be able to be joined, and the final results should be empty.", results.size() == 0);
}
Aggregations