use of org.apache.flink.table.api.TableEnvironment in project flink by apache.
the class HiveCatalogUdfITCase method testUdf.
private void testUdf(boolean batch) throws Exception {
StreamExecutionEnvironment env = null;
TableEnvironment tEnv;
EnvironmentSettings.Builder settingsBuilder = EnvironmentSettings.newInstance();
if (batch) {
settingsBuilder.inBatchMode();
} else {
settingsBuilder.inStreamingMode();
}
if (batch) {
tEnv = TableEnvironment.create(settingsBuilder.build());
} else {
env = StreamExecutionEnvironment.getExecutionEnvironment();
tEnv = StreamTableEnvironment.create(env, settingsBuilder.build());
}
BatchTestBase.configForMiniCluster(tEnv.getConfig());
tEnv.registerCatalog("myhive", hiveCatalog);
tEnv.useCatalog("myhive");
String innerSql = format("select mygenericudf(myudf(name), 1) as a, mygenericudf(myudf(age), 1) as b," + " s from %s, lateral table(myudtf(name, 1)) as T(s)", sourceTableName);
String selectSql = format("select a, s, sum(b), myudaf(b) from (%s) group by a, s", innerSql);
List<String> results;
if (batch) {
Path p = Paths.get(tempFolder.newFolder().getAbsolutePath(), "test.csv");
final TableSchema sinkSchema = TableSchema.builder().field("name1", Types.STRING()).field("name2", Types.STRING()).field("sum1", Types.INT()).field("sum2", Types.LONG()).build();
final Map<String, String> sinkOptions = new HashMap<>();
sinkOptions.put("connector.type", "filesystem");
sinkOptions.put("connector.path", p.toAbsolutePath().toString());
sinkOptions.put("format.type", "csv");
final CatalogTable sink = new CatalogTableImpl(sinkSchema, sinkOptions, "Comment.");
hiveCatalog.createTable(new ObjectPath(HiveCatalog.DEFAULT_DB, sinkTableName), sink, false);
tEnv.executeSql(format("insert into %s " + selectSql, sinkTableName)).await();
// assert written result
StringBuilder builder = new StringBuilder();
try (Stream<Path> paths = Files.walk(Paths.get(p.toAbsolutePath().toString()))) {
paths.filter(Files::isRegularFile).forEach(path -> {
try {
String content = FileUtils.readFileUtf8(path.toFile());
if (content.isEmpty()) {
return;
}
builder.append(content);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
}
results = Arrays.stream(builder.toString().split("\n")).filter(s -> !s.isEmpty()).collect(Collectors.toList());
} else {
StreamTableEnvironment streamTEnv = (StreamTableEnvironment) tEnv;
TestingRetractSink sink = new TestingRetractSink();
streamTEnv.toRetractStream(tEnv.sqlQuery(selectSql), Row.class).map(new JavaToScala()).addSink((SinkFunction) sink);
env.execute("");
results = JavaScalaConversionUtil.toJava(sink.getRetractResults());
}
results = new ArrayList<>(results);
results.sort(String::compareTo);
Assert.assertEquals(Arrays.asList("1,1,2,2", "2,2,4,4", "3,3,6,6"), results);
}
use of org.apache.flink.table.api.TableEnvironment in project flink by apache.
the class HiveCatalogUdfITCase method testDateUDF.
@Test
public void testDateUDF() throws Exception {
TableEnvironment tableEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
tableEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
tableEnv.useCatalog(hiveCatalog.getName());
tableEnv.executeSql(String.format("create function mymonth as '%s'", UDFMonth.class.getName()));
tableEnv.executeSql("create table src(dt date)");
try {
HiveTestUtils.createTextTableInserter(hiveCatalog, "default", "src").addRow(new Object[] { Date.valueOf("2019-01-19") }).addRow(new Object[] { Date.valueOf("2019-03-02") }).commit();
List<Row> results = CollectionUtil.iteratorToList(tableEnv.sqlQuery("select mymonth(dt) as m from src order by m").execute().collect());
Assert.assertEquals(2, results.size());
Assert.assertEquals("[+I[1], +I[3]]", results.toString());
} finally {
tableEnv.executeSql("drop table src");
}
}
use of org.apache.flink.table.api.TableEnvironment in project flink by apache.
the class UpdatingTopCityExample method main.
public static void main(String[] args) throws Exception {
// prepare the session
final EnvironmentSettings settings = EnvironmentSettings.newInstance().inStreamingMode().build();
final TableEnvironment env = TableEnvironment.create(settings);
// create an empty temporary CSV directory for this example
final String populationDirPath = createTemporaryDirectory();
// register a table in the catalog that points to the CSV file
env.executeSql("CREATE TABLE PopulationUpdates (" + " city STRING," + " state STRING," + " update_year INT," + " population_diff INT" + ") WITH (" + " 'connector' = 'filesystem'," + " 'path' = '" + populationDirPath + "'," + " 'format' = 'csv'" + ")");
// insert some example data into the table
final TableResult insertionResult = env.executeSql("INSERT INTO PopulationUpdates VALUES" + " ('Los Angeles', 'CA', 2013, 13106100), " + " ('Los Angeles', 'CA', 2014, 72600), " + " ('Los Angeles', 'CA', 2015, 72300), " + " ('Chicago', 'IL', 2013, 9553270), " + " ('Chicago', 'IL', 2014, 11340), " + " ('Chicago', 'IL', 2015, -6730), " + " ('Houston', 'TX', 2013, 6330660), " + " ('Houston', 'TX', 2014, 172960), " + " ('Houston', 'TX', 2015, 172940), " + " ('Phoenix', 'AZ', 2013, 4404680), " + " ('Phoenix', 'AZ', 2014, 86740), " + " ('Phoenix', 'AZ', 2015, 89700), " + " ('San Antonio', 'TX', 2013, 2280580), " + " ('San Antonio', 'TX', 2014, 49180), " + " ('San Antonio', 'TX', 2015, 50870), " + " ('San Francisco', 'CA', 2013, 4521310), " + " ('San Francisco', 'CA', 2014, 65940), " + " ('San Francisco', 'CA', 2015, 62290), " + " ('Dallas', 'TX', 2013, 6817520), " + " ('Dallas', 'TX', 2014, 137740), " + " ('Dallas', 'TX', 2015, 154020)");
// since all cluster operations of the Table API are executed asynchronously,
// we need to wait until the insertion has been completed,
// an exception is thrown in case of an error
insertionResult.await();
// read from table and aggregate the total population per city
final Table currentPopulation = env.sqlQuery("SELECT city, state, MAX(update_year) AS latest_year, SUM(population_diff) AS population " + "FROM PopulationUpdates " + "GROUP BY city, state");
// either define a nested SQL statement with sub-queries
// or divide the problem into sub-views which will be optimized
// as a whole during planning
env.createTemporaryView("CurrentPopulation", currentPopulation);
// find the top 2 cities with the highest population per state,
// we use a sub-query that is correlated with every unique state,
// for every state we rank by population and return the top 2 cities
final Table topCitiesPerState = env.sqlQuery("SELECT state, city, latest_year, population " + "FROM " + " (SELECT DISTINCT state FROM CurrentPopulation) States," + " LATERAL (" + " SELECT city, latest_year, population" + " FROM CurrentPopulation" + " WHERE state = States.state" + " ORDER BY population DESC, latest_year DESC" + " LIMIT 2" + " )");
// execute().collect() and a List where we maintain updates
try (CloseableIterator<Row> iterator = topCitiesPerState.execute().collect()) {
final List<Row> materializedUpdates = new ArrayList<>();
iterator.forEachRemaining(row -> {
final RowKind kind = row.getKind();
switch(kind) {
case INSERT:
case UPDATE_AFTER:
// for full equality
row.setKind(RowKind.INSERT);
materializedUpdates.add(row);
break;
case UPDATE_BEFORE:
case DELETE:
// for full equality
row.setKind(RowKind.INSERT);
materializedUpdates.remove(row);
break;
}
});
// show the final output table if the result is bounded,
// the output should exclude San Antonio because it has a smaller population than
// Houston or Dallas in Texas (TX)
materializedUpdates.forEach(System.out::println);
}
}
use of org.apache.flink.table.api.TableEnvironment in project flink by apache.
the class WordCountSQLExample method main.
public static void main(String[] args) throws Exception {
// set up the Table API
final EnvironmentSettings settings = EnvironmentSettings.newInstance().inBatchMode().build();
final TableEnvironment tableEnv = TableEnvironment.create(settings);
// execute a Flink SQL job and print the result locally
tableEnv.executeSql(// define the aggregation
"SELECT word, SUM(frequency) AS `count`\n" + // read from an artificial fixed-size table with rows and columns
"FROM (\n" + " VALUES ('Hello', 1), ('Ciao', 1), ('Hello', 2)\n" + ")\n" + // name the table and its columns
"AS WordTable(word, frequency)\n" + // group for aggregation
"GROUP BY word").print();
}
use of org.apache.flink.table.api.TableEnvironment in project flink by apache.
the class TpcdsTestProgram method prepareTableEnv.
/**
* Prepare TableEnvironment for query.
*
* @param sourceTablePath
* @return
*/
private static TableEnvironment prepareTableEnv(String sourceTablePath, Boolean useTableStats) {
// init Table Env
EnvironmentSettings environmentSettings = EnvironmentSettings.inBatchMode();
TableEnvironment tEnv = TableEnvironment.create(environmentSettings);
// config Optimizer parameters
// TODO use the default shuffle mode of batch runtime mode once FLINK-23470 is implemented
tEnv.getConfig().getConfiguration().setString(ExecutionConfigOptions.TABLE_EXEC_SHUFFLE_MODE, GlobalStreamExchangeMode.POINTWISE_EDGES_PIPELINED.toString());
tEnv.getConfig().getConfiguration().setLong(OptimizerConfigOptions.TABLE_OPTIMIZER_BROADCAST_JOIN_THRESHOLD, 10 * 1024 * 1024);
tEnv.getConfig().getConfiguration().setBoolean(OptimizerConfigOptions.TABLE_OPTIMIZER_JOIN_REORDER_ENABLED, true);
// register TPC-DS tables
TPCDS_TABLES.forEach(table -> {
TpcdsSchema schema = TpcdsSchemaProvider.getTableSchema(table);
CsvTableSource.Builder builder = CsvTableSource.builder();
builder.path(sourceTablePath + FILE_SEPARATOR + table + DATA_SUFFIX);
for (int i = 0; i < schema.getFieldNames().size(); i++) {
builder.field(schema.getFieldNames().get(i), TypeConversions.fromDataTypeToLegacyInfo(schema.getFieldTypes().get(i)));
}
builder.fieldDelimiter(COL_DELIMITER);
builder.emptyColumnAsNull();
builder.lineDelimiter("\n");
CsvTableSource tableSource = builder.build();
ConnectorCatalogTable catalogTable = ConnectorCatalogTable.source(tableSource, true);
tEnv.getCatalog(tEnv.getCurrentCatalog()).ifPresent(catalog -> {
try {
catalog.createTable(new ObjectPath(tEnv.getCurrentDatabase(), table), catalogTable, false);
} catch (Exception e) {
throw new RuntimeException(e);
}
});
});
// register statistics info
if (useTableStats) {
TpcdsStatsProvider.registerTpcdsStats(tEnv);
}
return tEnv;
}
Aggregations