Search in sources :

Example 1 with EnvironmentSettings

use of org.apache.flink.table.api.EnvironmentSettings in project flink by apache.

the class HiveCatalogITCase method prepareTable.

private TableEnvironment prepareTable(boolean isStreaming) {
    EnvironmentSettings settings;
    if (isStreaming) {
        settings = EnvironmentSettings.inStreamingMode();
    } else {
        settings = EnvironmentSettings.inBatchMode();
    }
    TableEnvironment tableEnv = TableEnvironment.create(settings);
    tableEnv.getConfig().getConfiguration().setInteger(TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 1);
    tableEnv.registerCatalog("myhive", hiveCatalog);
    tableEnv.useCatalog("myhive");
    String srcPath = this.getClass().getResource("/csv/test3.csv").getPath();
    tableEnv.executeSql("CREATE TABLE proctime_src (" + "price DECIMAL(10, 2)," + "currency STRING," + "ts6 TIMESTAMP(6)," + "ts AS CAST(ts6 AS TIMESTAMP(3))," + "WATERMARK FOR ts AS ts," + "l_proctime AS PROCTIME( )) " + // test " " in proctime()
    String.format("WITH (" + "'connector.type' = 'filesystem'," + "'connector.path' = 'file://%s'," + "'format.type' = 'csv')", srcPath));
    return tableEnv;
}
Also used : EnvironmentSettings(org.apache.flink.table.api.EnvironmentSettings) TableEnvironment(org.apache.flink.table.api.TableEnvironment)

Example 2 with EnvironmentSettings

use of org.apache.flink.table.api.EnvironmentSettings in project flink by apache.

the class UpdatingTopCityExample method main.

public static void main(String[] args) throws Exception {
    // prepare the session
    final EnvironmentSettings settings = EnvironmentSettings.newInstance().inStreamingMode().build();
    final TableEnvironment env = TableEnvironment.create(settings);
    // create an empty temporary CSV directory for this example
    final String populationDirPath = createTemporaryDirectory();
    // register a table in the catalog that points to the CSV file
    env.executeSql("CREATE TABLE PopulationUpdates (" + "  city STRING," + "  state STRING," + "  update_year INT," + "  population_diff INT" + ") WITH (" + "  'connector' = 'filesystem'," + "  'path' = '" + populationDirPath + "'," + "  'format' = 'csv'" + ")");
    // insert some example data into the table
    final TableResult insertionResult = env.executeSql("INSERT INTO PopulationUpdates VALUES" + "  ('Los Angeles', 'CA', 2013, 13106100), " + "  ('Los Angeles', 'CA', 2014, 72600), " + "  ('Los Angeles', 'CA', 2015, 72300), " + "  ('Chicago', 'IL', 2013, 9553270), " + "  ('Chicago', 'IL', 2014, 11340), " + "  ('Chicago', 'IL', 2015, -6730), " + "  ('Houston', 'TX', 2013, 6330660), " + "  ('Houston', 'TX', 2014, 172960), " + "  ('Houston', 'TX', 2015, 172940), " + "  ('Phoenix', 'AZ', 2013, 4404680), " + "  ('Phoenix', 'AZ', 2014, 86740), " + "  ('Phoenix', 'AZ', 2015, 89700), " + "  ('San Antonio', 'TX', 2013, 2280580), " + "  ('San Antonio', 'TX', 2014, 49180), " + "  ('San Antonio', 'TX', 2015, 50870), " + "  ('San Francisco', 'CA', 2013, 4521310), " + "  ('San Francisco', 'CA', 2014, 65940), " + "  ('San Francisco', 'CA', 2015, 62290), " + "  ('Dallas', 'TX', 2013, 6817520), " + "  ('Dallas', 'TX', 2014, 137740), " + "  ('Dallas', 'TX', 2015, 154020)");
    // since all cluster operations of the Table API are executed asynchronously,
    // we need to wait until the insertion has been completed,
    // an exception is thrown in case of an error
    insertionResult.await();
    // read from table and aggregate the total population per city
    final Table currentPopulation = env.sqlQuery("SELECT city, state, MAX(update_year) AS latest_year, SUM(population_diff) AS population " + "FROM PopulationUpdates " + "GROUP BY city, state");
    // either define a nested SQL statement with sub-queries
    // or divide the problem into sub-views which will be optimized
    // as a whole during planning
    env.createTemporaryView("CurrentPopulation", currentPopulation);
    // find the top 2 cities with the highest population per state,
    // we use a sub-query that is correlated with every unique state,
    // for every state we rank by population and return the top 2 cities
    final Table topCitiesPerState = env.sqlQuery("SELECT state, city, latest_year, population " + "FROM " + "  (SELECT DISTINCT state FROM CurrentPopulation) States," + "  LATERAL (" + "    SELECT city, latest_year, population" + "    FROM CurrentPopulation" + "    WHERE state = States.state" + "    ORDER BY population DESC, latest_year DESC" + "    LIMIT 2" + "  )");
    // execute().collect() and a List where we maintain updates
    try (CloseableIterator<Row> iterator = topCitiesPerState.execute().collect()) {
        final List<Row> materializedUpdates = new ArrayList<>();
        iterator.forEachRemaining(row -> {
            final RowKind kind = row.getKind();
            switch(kind) {
                case INSERT:
                case UPDATE_AFTER:
                    // for full equality
                    row.setKind(RowKind.INSERT);
                    materializedUpdates.add(row);
                    break;
                case UPDATE_BEFORE:
                case DELETE:
                    // for full equality
                    row.setKind(RowKind.INSERT);
                    materializedUpdates.remove(row);
                    break;
            }
        });
        // show the final output table if the result is bounded,
        // the output should exclude San Antonio because it has a smaller population than
        // Houston or Dallas in Texas (TX)
        materializedUpdates.forEach(System.out::println);
    }
}
Also used : EnvironmentSettings(org.apache.flink.table.api.EnvironmentSettings) TableResult(org.apache.flink.table.api.TableResult) Table(org.apache.flink.table.api.Table) RowKind(org.apache.flink.types.RowKind) ArrayList(java.util.ArrayList) TableEnvironment(org.apache.flink.table.api.TableEnvironment) Row(org.apache.flink.types.Row)

Example 3 with EnvironmentSettings

use of org.apache.flink.table.api.EnvironmentSettings in project flink by apache.

the class WordCountSQLExample method main.

public static void main(String[] args) throws Exception {
    // set up the Table API
    final EnvironmentSettings settings = EnvironmentSettings.newInstance().inBatchMode().build();
    final TableEnvironment tableEnv = TableEnvironment.create(settings);
    // execute a Flink SQL job and print the result locally
    tableEnv.executeSql(// define the aggregation
    "SELECT word, SUM(frequency) AS `count`\n" + // read from an artificial fixed-size table with rows and columns
    "FROM (\n" + "  VALUES ('Hello', 1), ('Ciao', 1), ('Hello', 2)\n" + ")\n" + // name the table and its columns
    "AS WordTable(word, frequency)\n" + // group for aggregation
    "GROUP BY word").print();
}
Also used : EnvironmentSettings(org.apache.flink.table.api.EnvironmentSettings) TableEnvironment(org.apache.flink.table.api.TableEnvironment)

Example 4 with EnvironmentSettings

use of org.apache.flink.table.api.EnvironmentSettings in project flink by apache.

the class TpcdsTestProgram method prepareTableEnv.

/**
 * Prepare TableEnvironment for query.
 *
 * @param sourceTablePath
 * @return
 */
private static TableEnvironment prepareTableEnv(String sourceTablePath, Boolean useTableStats) {
    // init Table Env
    EnvironmentSettings environmentSettings = EnvironmentSettings.inBatchMode();
    TableEnvironment tEnv = TableEnvironment.create(environmentSettings);
    // config Optimizer parameters
    // TODO use the default shuffle mode of batch runtime mode once FLINK-23470 is implemented
    tEnv.getConfig().getConfiguration().setString(ExecutionConfigOptions.TABLE_EXEC_SHUFFLE_MODE, GlobalStreamExchangeMode.POINTWISE_EDGES_PIPELINED.toString());
    tEnv.getConfig().getConfiguration().setLong(OptimizerConfigOptions.TABLE_OPTIMIZER_BROADCAST_JOIN_THRESHOLD, 10 * 1024 * 1024);
    tEnv.getConfig().getConfiguration().setBoolean(OptimizerConfigOptions.TABLE_OPTIMIZER_JOIN_REORDER_ENABLED, true);
    // register TPC-DS tables
    TPCDS_TABLES.forEach(table -> {
        TpcdsSchema schema = TpcdsSchemaProvider.getTableSchema(table);
        CsvTableSource.Builder builder = CsvTableSource.builder();
        builder.path(sourceTablePath + FILE_SEPARATOR + table + DATA_SUFFIX);
        for (int i = 0; i < schema.getFieldNames().size(); i++) {
            builder.field(schema.getFieldNames().get(i), TypeConversions.fromDataTypeToLegacyInfo(schema.getFieldTypes().get(i)));
        }
        builder.fieldDelimiter(COL_DELIMITER);
        builder.emptyColumnAsNull();
        builder.lineDelimiter("\n");
        CsvTableSource tableSource = builder.build();
        ConnectorCatalogTable catalogTable = ConnectorCatalogTable.source(tableSource, true);
        tEnv.getCatalog(tEnv.getCurrentCatalog()).ifPresent(catalog -> {
            try {
                catalog.createTable(new ObjectPath(tEnv.getCurrentDatabase(), table), catalogTable, false);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        });
    });
    // register statistics info
    if (useTableStats) {
        TpcdsStatsProvider.registerTpcdsStats(tEnv);
    }
    return tEnv;
}
Also used : EnvironmentSettings(org.apache.flink.table.api.EnvironmentSettings) ObjectPath(org.apache.flink.table.catalog.ObjectPath) ConnectorCatalogTable(org.apache.flink.table.catalog.ConnectorCatalogTable) CsvTableSource(org.apache.flink.table.sources.CsvTableSource) TableEnvironment(org.apache.flink.table.api.TableEnvironment) TpcdsSchema(org.apache.flink.table.tpcds.schema.TpcdsSchema)

Example 5 with EnvironmentSettings

use of org.apache.flink.table.api.EnvironmentSettings in project flink by apache.

the class SessionContext method create.

// --------------------------------------------------------------------------------------------
// Helper method to create
// --------------------------------------------------------------------------------------------
public static SessionContext create(DefaultContext defaultContext, String sessionId) {
    // --------------------------------------------------------------------------------------------------------------
    // Init config
    // --------------------------------------------------------------------------------------------------------------
    Configuration configuration = defaultContext.getFlinkConfig().clone();
    // --------------------------------------------------------------------------------------------------------------
    // Init classloader
    // --------------------------------------------------------------------------------------------------------------
    URLClassLoader classLoader = ClientUtils.buildUserCodeClassLoader(defaultContext.getDependencies(), Collections.emptyList(), SessionContext.class.getClassLoader(), configuration);
    // --------------------------------------------------------------------------------------------------------------
    // Init session state
    // --------------------------------------------------------------------------------------------------------------
    ModuleManager moduleManager = new ModuleManager();
    final EnvironmentSettings settings = EnvironmentSettings.fromConfiguration(configuration);
    CatalogManager catalogManager = CatalogManager.newBuilder().classLoader(classLoader).config(configuration).defaultCatalog(settings.getBuiltInCatalogName(), new GenericInMemoryCatalog(settings.getBuiltInCatalogName(), settings.getBuiltInDatabaseName())).build();
    FunctionCatalog functionCatalog = new FunctionCatalog(configuration, catalogManager, moduleManager);
    SessionState sessionState = new SessionState(catalogManager, moduleManager, functionCatalog);
    // --------------------------------------------------------------------------------------------------------------
    // Init ExecutionContext
    // --------------------------------------------------------------------------------------------------------------
    ExecutionContext executionContext = new ExecutionContext(configuration, classLoader, sessionState);
    return new SessionContext(defaultContext, sessionId, configuration, classLoader, sessionState, executionContext);
}
Also used : FunctionCatalog(org.apache.flink.table.catalog.FunctionCatalog) EnvironmentSettings(org.apache.flink.table.api.EnvironmentSettings) Configuration(org.apache.flink.configuration.Configuration) URLClassLoader(java.net.URLClassLoader) ModuleManager(org.apache.flink.table.module.ModuleManager) CatalogManager(org.apache.flink.table.catalog.CatalogManager) GenericInMemoryCatalog(org.apache.flink.table.catalog.GenericInMemoryCatalog)

Aggregations

EnvironmentSettings (org.apache.flink.table.api.EnvironmentSettings)16 TableEnvironment (org.apache.flink.table.api.TableEnvironment)6 Executor (org.apache.flink.table.delegation.Executor)5 AttributedString (org.jline.utils.AttributedString)4 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)3 TableException (org.apache.flink.table.api.TableException)3 Planner (org.apache.flink.table.delegation.Planner)3 IOException (java.io.IOException)2 Method (java.lang.reflect.Method)2 Table (org.apache.flink.table.api.Table)2 ExecutorFactory (org.apache.flink.table.delegation.ExecutorFactory)2 PlannerFactory (org.apache.flink.table.delegation.PlannerFactory)2 Row (org.apache.flink.types.Row)2 FlinkException (org.apache.flink.util.FlinkException)2 Before (org.junit.Before)2 URLClassLoader (java.net.URLClassLoader)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 Configuration (org.apache.flink.configuration.Configuration)1 TableConfig (org.apache.flink.table.api.TableConfig)1