Search in sources :

Example 36 with TableEnvironment

use of org.apache.flink.table.api.TableEnvironment in project flink by apache.

the class HiveCatalogUdfITCase method testUdf.

private void testUdf(boolean batch) throws Exception {
    StreamExecutionEnvironment env = null;
    TableEnvironment tEnv;
    EnvironmentSettings.Builder settingsBuilder = EnvironmentSettings.newInstance();
    if (batch) {
        settingsBuilder.inBatchMode();
    } else {
        settingsBuilder.inStreamingMode();
    }
    if (batch) {
        tEnv = TableEnvironment.create(settingsBuilder.build());
    } else {
        env = StreamExecutionEnvironment.getExecutionEnvironment();
        tEnv = StreamTableEnvironment.create(env, settingsBuilder.build());
    }
    BatchTestBase.configForMiniCluster(tEnv.getConfig());
    tEnv.registerCatalog("myhive", hiveCatalog);
    tEnv.useCatalog("myhive");
    String innerSql = format("select mygenericudf(myudf(name), 1) as a, mygenericudf(myudf(age), 1) as b," + " s from %s, lateral table(myudtf(name, 1)) as T(s)", sourceTableName);
    String selectSql = format("select a, s, sum(b), myudaf(b) from (%s) group by a, s", innerSql);
    List<String> results;
    if (batch) {
        Path p = Paths.get(tempFolder.newFolder().getAbsolutePath(), "test.csv");
        final TableSchema sinkSchema = TableSchema.builder().field("name1", Types.STRING()).field("name2", Types.STRING()).field("sum1", Types.INT()).field("sum2", Types.LONG()).build();
        final Map<String, String> sinkOptions = new HashMap<>();
        sinkOptions.put("connector.type", "filesystem");
        sinkOptions.put("connector.path", p.toAbsolutePath().toString());
        sinkOptions.put("format.type", "csv");
        final CatalogTable sink = new CatalogTableImpl(sinkSchema, sinkOptions, "Comment.");
        hiveCatalog.createTable(new ObjectPath(HiveCatalog.DEFAULT_DB, sinkTableName), sink, false);
        tEnv.executeSql(format("insert into %s " + selectSql, sinkTableName)).await();
        // assert written result
        StringBuilder builder = new StringBuilder();
        try (Stream<Path> paths = Files.walk(Paths.get(p.toAbsolutePath().toString()))) {
            paths.filter(Files::isRegularFile).forEach(path -> {
                try {
                    String content = FileUtils.readFileUtf8(path.toFile());
                    if (content.isEmpty()) {
                        return;
                    }
                    builder.append(content);
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            });
        }
        results = Arrays.stream(builder.toString().split("\n")).filter(s -> !s.isEmpty()).collect(Collectors.toList());
    } else {
        StreamTableEnvironment streamTEnv = (StreamTableEnvironment) tEnv;
        TestingRetractSink sink = new TestingRetractSink();
        streamTEnv.toRetractStream(tEnv.sqlQuery(selectSql), Row.class).map(new JavaToScala()).addSink((SinkFunction) sink);
        env.execute("");
        results = JavaScalaConversionUtil.toJava(sink.getRetractResults());
    }
    results = new ArrayList<>(results);
    results.sort(String::compareTo);
    Assert.assertEquals(Arrays.asList("1,1,2,2", "2,2,4,4", "3,3,6,6"), results);
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) Path(java.nio.file.Path) EnvironmentSettings(org.apache.flink.table.api.EnvironmentSettings) ObjectPath(org.apache.flink.table.catalog.ObjectPath) TableSchema(org.apache.flink.table.api.TableSchema) HashMap(java.util.HashMap) TestingRetractSink(org.apache.flink.table.planner.runtime.utils.TestingRetractSink) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) TableEnvironment(org.apache.flink.table.api.TableEnvironment) CatalogTable(org.apache.flink.table.catalog.CatalogTable) IOException(java.io.IOException) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment)

Example 37 with TableEnvironment

use of org.apache.flink.table.api.TableEnvironment in project flink by apache.

the class HiveCatalogUdfITCase method testDateUDF.

@Test
public void testDateUDF() throws Exception {
    TableEnvironment tableEnv = HiveTestUtils.createTableEnvInBatchMode(SqlDialect.HIVE);
    tableEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
    tableEnv.useCatalog(hiveCatalog.getName());
    tableEnv.executeSql(String.format("create function mymonth as '%s'", UDFMonth.class.getName()));
    tableEnv.executeSql("create table src(dt date)");
    try {
        HiveTestUtils.createTextTableInserter(hiveCatalog, "default", "src").addRow(new Object[] { Date.valueOf("2019-01-19") }).addRow(new Object[] { Date.valueOf("2019-03-02") }).commit();
        List<Row> results = CollectionUtil.iteratorToList(tableEnv.sqlQuery("select mymonth(dt) as m from src order by m").execute().collect());
        Assert.assertEquals(2, results.size());
        Assert.assertEquals("[+I[1], +I[3]]", results.toString());
    } finally {
        tableEnv.executeSql("drop table src");
    }
}
Also used : StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) TableEnvironment(org.apache.flink.table.api.TableEnvironment) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 38 with TableEnvironment

use of org.apache.flink.table.api.TableEnvironment in project flink by apache.

the class UpdatingTopCityExample method main.

public static void main(String[] args) throws Exception {
    // prepare the session
    final EnvironmentSettings settings = EnvironmentSettings.newInstance().inStreamingMode().build();
    final TableEnvironment env = TableEnvironment.create(settings);
    // create an empty temporary CSV directory for this example
    final String populationDirPath = createTemporaryDirectory();
    // register a table in the catalog that points to the CSV file
    env.executeSql("CREATE TABLE PopulationUpdates (" + "  city STRING," + "  state STRING," + "  update_year INT," + "  population_diff INT" + ") WITH (" + "  'connector' = 'filesystem'," + "  'path' = '" + populationDirPath + "'," + "  'format' = 'csv'" + ")");
    // insert some example data into the table
    final TableResult insertionResult = env.executeSql("INSERT INTO PopulationUpdates VALUES" + "  ('Los Angeles', 'CA', 2013, 13106100), " + "  ('Los Angeles', 'CA', 2014, 72600), " + "  ('Los Angeles', 'CA', 2015, 72300), " + "  ('Chicago', 'IL', 2013, 9553270), " + "  ('Chicago', 'IL', 2014, 11340), " + "  ('Chicago', 'IL', 2015, -6730), " + "  ('Houston', 'TX', 2013, 6330660), " + "  ('Houston', 'TX', 2014, 172960), " + "  ('Houston', 'TX', 2015, 172940), " + "  ('Phoenix', 'AZ', 2013, 4404680), " + "  ('Phoenix', 'AZ', 2014, 86740), " + "  ('Phoenix', 'AZ', 2015, 89700), " + "  ('San Antonio', 'TX', 2013, 2280580), " + "  ('San Antonio', 'TX', 2014, 49180), " + "  ('San Antonio', 'TX', 2015, 50870), " + "  ('San Francisco', 'CA', 2013, 4521310), " + "  ('San Francisco', 'CA', 2014, 65940), " + "  ('San Francisco', 'CA', 2015, 62290), " + "  ('Dallas', 'TX', 2013, 6817520), " + "  ('Dallas', 'TX', 2014, 137740), " + "  ('Dallas', 'TX', 2015, 154020)");
    // since all cluster operations of the Table API are executed asynchronously,
    // we need to wait until the insertion has been completed,
    // an exception is thrown in case of an error
    insertionResult.await();
    // read from table and aggregate the total population per city
    final Table currentPopulation = env.sqlQuery("SELECT city, state, MAX(update_year) AS latest_year, SUM(population_diff) AS population " + "FROM PopulationUpdates " + "GROUP BY city, state");
    // either define a nested SQL statement with sub-queries
    // or divide the problem into sub-views which will be optimized
    // as a whole during planning
    env.createTemporaryView("CurrentPopulation", currentPopulation);
    // find the top 2 cities with the highest population per state,
    // we use a sub-query that is correlated with every unique state,
    // for every state we rank by population and return the top 2 cities
    final Table topCitiesPerState = env.sqlQuery("SELECT state, city, latest_year, population " + "FROM " + "  (SELECT DISTINCT state FROM CurrentPopulation) States," + "  LATERAL (" + "    SELECT city, latest_year, population" + "    FROM CurrentPopulation" + "    WHERE state = States.state" + "    ORDER BY population DESC, latest_year DESC" + "    LIMIT 2" + "  )");
    // execute().collect() and a List where we maintain updates
    try (CloseableIterator<Row> iterator = topCitiesPerState.execute().collect()) {
        final List<Row> materializedUpdates = new ArrayList<>();
        iterator.forEachRemaining(row -> {
            final RowKind kind = row.getKind();
            switch(kind) {
                case INSERT:
                case UPDATE_AFTER:
                    // for full equality
                    row.setKind(RowKind.INSERT);
                    materializedUpdates.add(row);
                    break;
                case UPDATE_BEFORE:
                case DELETE:
                    // for full equality
                    row.setKind(RowKind.INSERT);
                    materializedUpdates.remove(row);
                    break;
            }
        });
        // show the final output table if the result is bounded,
        // the output should exclude San Antonio because it has a smaller population than
        // Houston or Dallas in Texas (TX)
        materializedUpdates.forEach(System.out::println);
    }
}
Also used : EnvironmentSettings(org.apache.flink.table.api.EnvironmentSettings) TableResult(org.apache.flink.table.api.TableResult) Table(org.apache.flink.table.api.Table) RowKind(org.apache.flink.types.RowKind) ArrayList(java.util.ArrayList) TableEnvironment(org.apache.flink.table.api.TableEnvironment) Row(org.apache.flink.types.Row)

Example 39 with TableEnvironment

use of org.apache.flink.table.api.TableEnvironment in project flink by apache.

the class WordCountSQLExample method main.

public static void main(String[] args) throws Exception {
    // set up the Table API
    final EnvironmentSettings settings = EnvironmentSettings.newInstance().inBatchMode().build();
    final TableEnvironment tableEnv = TableEnvironment.create(settings);
    // execute a Flink SQL job and print the result locally
    tableEnv.executeSql(// define the aggregation
    "SELECT word, SUM(frequency) AS `count`\n" + // read from an artificial fixed-size table with rows and columns
    "FROM (\n" + "  VALUES ('Hello', 1), ('Ciao', 1), ('Hello', 2)\n" + ")\n" + // name the table and its columns
    "AS WordTable(word, frequency)\n" + // group for aggregation
    "GROUP BY word").print();
}
Also used : EnvironmentSettings(org.apache.flink.table.api.EnvironmentSettings) TableEnvironment(org.apache.flink.table.api.TableEnvironment)

Example 40 with TableEnvironment

use of org.apache.flink.table.api.TableEnvironment in project flink by apache.

the class TpcdsTestProgram method prepareTableEnv.

/**
 * Prepare TableEnvironment for query.
 *
 * @param sourceTablePath
 * @return
 */
private static TableEnvironment prepareTableEnv(String sourceTablePath, Boolean useTableStats) {
    // init Table Env
    EnvironmentSettings environmentSettings = EnvironmentSettings.inBatchMode();
    TableEnvironment tEnv = TableEnvironment.create(environmentSettings);
    // config Optimizer parameters
    // TODO use the default shuffle mode of batch runtime mode once FLINK-23470 is implemented
    tEnv.getConfig().getConfiguration().setString(ExecutionConfigOptions.TABLE_EXEC_SHUFFLE_MODE, GlobalStreamExchangeMode.POINTWISE_EDGES_PIPELINED.toString());
    tEnv.getConfig().getConfiguration().setLong(OptimizerConfigOptions.TABLE_OPTIMIZER_BROADCAST_JOIN_THRESHOLD, 10 * 1024 * 1024);
    tEnv.getConfig().getConfiguration().setBoolean(OptimizerConfigOptions.TABLE_OPTIMIZER_JOIN_REORDER_ENABLED, true);
    // register TPC-DS tables
    TPCDS_TABLES.forEach(table -> {
        TpcdsSchema schema = TpcdsSchemaProvider.getTableSchema(table);
        CsvTableSource.Builder builder = CsvTableSource.builder();
        builder.path(sourceTablePath + FILE_SEPARATOR + table + DATA_SUFFIX);
        for (int i = 0; i < schema.getFieldNames().size(); i++) {
            builder.field(schema.getFieldNames().get(i), TypeConversions.fromDataTypeToLegacyInfo(schema.getFieldTypes().get(i)));
        }
        builder.fieldDelimiter(COL_DELIMITER);
        builder.emptyColumnAsNull();
        builder.lineDelimiter("\n");
        CsvTableSource tableSource = builder.build();
        ConnectorCatalogTable catalogTable = ConnectorCatalogTable.source(tableSource, true);
        tEnv.getCatalog(tEnv.getCurrentCatalog()).ifPresent(catalog -> {
            try {
                catalog.createTable(new ObjectPath(tEnv.getCurrentDatabase(), table), catalogTable, false);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        });
    });
    // register statistics info
    if (useTableStats) {
        TpcdsStatsProvider.registerTpcdsStats(tEnv);
    }
    return tEnv;
}
Also used : EnvironmentSettings(org.apache.flink.table.api.EnvironmentSettings) ObjectPath(org.apache.flink.table.catalog.ObjectPath) ConnectorCatalogTable(org.apache.flink.table.catalog.ConnectorCatalogTable) CsvTableSource(org.apache.flink.table.sources.CsvTableSource) TableEnvironment(org.apache.flink.table.api.TableEnvironment) TpcdsSchema(org.apache.flink.table.tpcds.schema.TpcdsSchema)

Aggregations

TableEnvironment (org.apache.flink.table.api.TableEnvironment)137 Test (org.junit.Test)95 Row (org.apache.flink.types.Row)58 StreamTableEnvironment (org.apache.flink.table.api.bridge.java.StreamTableEnvironment)38 Table (org.apache.flink.table.api.Table)27 ObjectPath (org.apache.flink.table.catalog.ObjectPath)19 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)14 ArrayList (java.util.ArrayList)13 CatalogTable (org.apache.flink.table.catalog.CatalogTable)12 HashMap (java.util.HashMap)11 EnvironmentSettings (org.apache.flink.table.api.EnvironmentSettings)10 CatalogBaseTable (org.apache.flink.table.catalog.CatalogBaseTable)10 TableResult (org.apache.flink.table.api.TableResult)8 File (java.io.File)7 Constructor (java.lang.reflect.Constructor)7 TableImpl (org.apache.flink.table.api.internal.TableImpl)7 TableException (org.apache.flink.table.api.TableException)5 List (java.util.List)4 Configuration (org.apache.flink.configuration.Configuration)4 TableSchema (org.apache.flink.table.api.TableSchema)4