use of org.apache.flink.table.api.EnvironmentSettings in project flink by apache.
the class HiveCatalogITCase method prepareTable.
private TableEnvironment prepareTable(boolean isStreaming) {
EnvironmentSettings settings;
if (isStreaming) {
settings = EnvironmentSettings.inStreamingMode();
} else {
settings = EnvironmentSettings.inBatchMode();
}
TableEnvironment tableEnv = TableEnvironment.create(settings);
tableEnv.getConfig().getConfiguration().setInteger(TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 1);
tableEnv.registerCatalog("myhive", hiveCatalog);
tableEnv.useCatalog("myhive");
String srcPath = this.getClass().getResource("/csv/test3.csv").getPath();
tableEnv.executeSql("CREATE TABLE proctime_src (" + "price DECIMAL(10, 2)," + "currency STRING," + "ts6 TIMESTAMP(6)," + "ts AS CAST(ts6 AS TIMESTAMP(3))," + "WATERMARK FOR ts AS ts," + "l_proctime AS PROCTIME( )) " + // test " " in proctime()
String.format("WITH (" + "'connector.type' = 'filesystem'," + "'connector.path' = 'file://%s'," + "'format.type' = 'csv')", srcPath));
return tableEnv;
}
use of org.apache.flink.table.api.EnvironmentSettings in project flink by apache.
the class UpdatingTopCityExample method main.
public static void main(String[] args) throws Exception {
// prepare the session
final EnvironmentSettings settings = EnvironmentSettings.newInstance().inStreamingMode().build();
final TableEnvironment env = TableEnvironment.create(settings);
// create an empty temporary CSV directory for this example
final String populationDirPath = createTemporaryDirectory();
// register a table in the catalog that points to the CSV file
env.executeSql("CREATE TABLE PopulationUpdates (" + " city STRING," + " state STRING," + " update_year INT," + " population_diff INT" + ") WITH (" + " 'connector' = 'filesystem'," + " 'path' = '" + populationDirPath + "'," + " 'format' = 'csv'" + ")");
// insert some example data into the table
final TableResult insertionResult = env.executeSql("INSERT INTO PopulationUpdates VALUES" + " ('Los Angeles', 'CA', 2013, 13106100), " + " ('Los Angeles', 'CA', 2014, 72600), " + " ('Los Angeles', 'CA', 2015, 72300), " + " ('Chicago', 'IL', 2013, 9553270), " + " ('Chicago', 'IL', 2014, 11340), " + " ('Chicago', 'IL', 2015, -6730), " + " ('Houston', 'TX', 2013, 6330660), " + " ('Houston', 'TX', 2014, 172960), " + " ('Houston', 'TX', 2015, 172940), " + " ('Phoenix', 'AZ', 2013, 4404680), " + " ('Phoenix', 'AZ', 2014, 86740), " + " ('Phoenix', 'AZ', 2015, 89700), " + " ('San Antonio', 'TX', 2013, 2280580), " + " ('San Antonio', 'TX', 2014, 49180), " + " ('San Antonio', 'TX', 2015, 50870), " + " ('San Francisco', 'CA', 2013, 4521310), " + " ('San Francisco', 'CA', 2014, 65940), " + " ('San Francisco', 'CA', 2015, 62290), " + " ('Dallas', 'TX', 2013, 6817520), " + " ('Dallas', 'TX', 2014, 137740), " + " ('Dallas', 'TX', 2015, 154020)");
// since all cluster operations of the Table API are executed asynchronously,
// we need to wait until the insertion has been completed,
// an exception is thrown in case of an error
insertionResult.await();
// read from table and aggregate the total population per city
final Table currentPopulation = env.sqlQuery("SELECT city, state, MAX(update_year) AS latest_year, SUM(population_diff) AS population " + "FROM PopulationUpdates " + "GROUP BY city, state");
// either define a nested SQL statement with sub-queries
// or divide the problem into sub-views which will be optimized
// as a whole during planning
env.createTemporaryView("CurrentPopulation", currentPopulation);
// find the top 2 cities with the highest population per state,
// we use a sub-query that is correlated with every unique state,
// for every state we rank by population and return the top 2 cities
final Table topCitiesPerState = env.sqlQuery("SELECT state, city, latest_year, population " + "FROM " + " (SELECT DISTINCT state FROM CurrentPopulation) States," + " LATERAL (" + " SELECT city, latest_year, population" + " FROM CurrentPopulation" + " WHERE state = States.state" + " ORDER BY population DESC, latest_year DESC" + " LIMIT 2" + " )");
// execute().collect() and a List where we maintain updates
try (CloseableIterator<Row> iterator = topCitiesPerState.execute().collect()) {
final List<Row> materializedUpdates = new ArrayList<>();
iterator.forEachRemaining(row -> {
final RowKind kind = row.getKind();
switch(kind) {
case INSERT:
case UPDATE_AFTER:
// for full equality
row.setKind(RowKind.INSERT);
materializedUpdates.add(row);
break;
case UPDATE_BEFORE:
case DELETE:
// for full equality
row.setKind(RowKind.INSERT);
materializedUpdates.remove(row);
break;
}
});
// show the final output table if the result is bounded,
// the output should exclude San Antonio because it has a smaller population than
// Houston or Dallas in Texas (TX)
materializedUpdates.forEach(System.out::println);
}
}
use of org.apache.flink.table.api.EnvironmentSettings in project flink by apache.
the class WordCountSQLExample method main.
public static void main(String[] args) throws Exception {
// set up the Table API
final EnvironmentSettings settings = EnvironmentSettings.newInstance().inBatchMode().build();
final TableEnvironment tableEnv = TableEnvironment.create(settings);
// execute a Flink SQL job and print the result locally
tableEnv.executeSql(// define the aggregation
"SELECT word, SUM(frequency) AS `count`\n" + // read from an artificial fixed-size table with rows and columns
"FROM (\n" + " VALUES ('Hello', 1), ('Ciao', 1), ('Hello', 2)\n" + ")\n" + // name the table and its columns
"AS WordTable(word, frequency)\n" + // group for aggregation
"GROUP BY word").print();
}
use of org.apache.flink.table.api.EnvironmentSettings in project flink by apache.
the class TpcdsTestProgram method prepareTableEnv.
/**
* Prepare TableEnvironment for query.
*
* @param sourceTablePath
* @return
*/
private static TableEnvironment prepareTableEnv(String sourceTablePath, Boolean useTableStats) {
// init Table Env
EnvironmentSettings environmentSettings = EnvironmentSettings.inBatchMode();
TableEnvironment tEnv = TableEnvironment.create(environmentSettings);
// config Optimizer parameters
// TODO use the default shuffle mode of batch runtime mode once FLINK-23470 is implemented
tEnv.getConfig().getConfiguration().setString(ExecutionConfigOptions.TABLE_EXEC_SHUFFLE_MODE, GlobalStreamExchangeMode.POINTWISE_EDGES_PIPELINED.toString());
tEnv.getConfig().getConfiguration().setLong(OptimizerConfigOptions.TABLE_OPTIMIZER_BROADCAST_JOIN_THRESHOLD, 10 * 1024 * 1024);
tEnv.getConfig().getConfiguration().setBoolean(OptimizerConfigOptions.TABLE_OPTIMIZER_JOIN_REORDER_ENABLED, true);
// register TPC-DS tables
TPCDS_TABLES.forEach(table -> {
TpcdsSchema schema = TpcdsSchemaProvider.getTableSchema(table);
CsvTableSource.Builder builder = CsvTableSource.builder();
builder.path(sourceTablePath + FILE_SEPARATOR + table + DATA_SUFFIX);
for (int i = 0; i < schema.getFieldNames().size(); i++) {
builder.field(schema.getFieldNames().get(i), TypeConversions.fromDataTypeToLegacyInfo(schema.getFieldTypes().get(i)));
}
builder.fieldDelimiter(COL_DELIMITER);
builder.emptyColumnAsNull();
builder.lineDelimiter("\n");
CsvTableSource tableSource = builder.build();
ConnectorCatalogTable catalogTable = ConnectorCatalogTable.source(tableSource, true);
tEnv.getCatalog(tEnv.getCurrentCatalog()).ifPresent(catalog -> {
try {
catalog.createTable(new ObjectPath(tEnv.getCurrentDatabase(), table), catalogTable, false);
} catch (Exception e) {
throw new RuntimeException(e);
}
});
});
// register statistics info
if (useTableStats) {
TpcdsStatsProvider.registerTpcdsStats(tEnv);
}
return tEnv;
}
use of org.apache.flink.table.api.EnvironmentSettings in project flink by apache.
the class SessionContext method create.
// --------------------------------------------------------------------------------------------
// Helper method to create
// --------------------------------------------------------------------------------------------
public static SessionContext create(DefaultContext defaultContext, String sessionId) {
// --------------------------------------------------------------------------------------------------------------
// Init config
// --------------------------------------------------------------------------------------------------------------
Configuration configuration = defaultContext.getFlinkConfig().clone();
// --------------------------------------------------------------------------------------------------------------
// Init classloader
// --------------------------------------------------------------------------------------------------------------
URLClassLoader classLoader = ClientUtils.buildUserCodeClassLoader(defaultContext.getDependencies(), Collections.emptyList(), SessionContext.class.getClassLoader(), configuration);
// --------------------------------------------------------------------------------------------------------------
// Init session state
// --------------------------------------------------------------------------------------------------------------
ModuleManager moduleManager = new ModuleManager();
final EnvironmentSettings settings = EnvironmentSettings.fromConfiguration(configuration);
CatalogManager catalogManager = CatalogManager.newBuilder().classLoader(classLoader).config(configuration).defaultCatalog(settings.getBuiltInCatalogName(), new GenericInMemoryCatalog(settings.getBuiltInCatalogName(), settings.getBuiltInDatabaseName())).build();
FunctionCatalog functionCatalog = new FunctionCatalog(configuration, catalogManager, moduleManager);
SessionState sessionState = new SessionState(catalogManager, moduleManager, functionCatalog);
// --------------------------------------------------------------------------------------------------------------
// Init ExecutionContext
// --------------------------------------------------------------------------------------------------------------
ExecutionContext executionContext = new ExecutionContext(configuration, classLoader, sessionState);
return new SessionContext(defaultContext, sessionId, configuration, classLoader, sessionState, executionContext);
}
Aggregations