Search in sources :

Example 6 with TableResult

use of org.apache.flink.table.api.TableResult in project flink by apache.

the class UpdatingTopCityExample method main.

public static void main(String[] args) throws Exception {
    // prepare the session
    final EnvironmentSettings settings = EnvironmentSettings.newInstance().inStreamingMode().build();
    final TableEnvironment env = TableEnvironment.create(settings);
    // create an empty temporary CSV directory for this example
    final String populationDirPath = createTemporaryDirectory();
    // register a table in the catalog that points to the CSV file
    env.executeSql("CREATE TABLE PopulationUpdates (" + "  city STRING," + "  state STRING," + "  update_year INT," + "  population_diff INT" + ") WITH (" + "  'connector' = 'filesystem'," + "  'path' = '" + populationDirPath + "'," + "  'format' = 'csv'" + ")");
    // insert some example data into the table
    final TableResult insertionResult = env.executeSql("INSERT INTO PopulationUpdates VALUES" + "  ('Los Angeles', 'CA', 2013, 13106100), " + "  ('Los Angeles', 'CA', 2014, 72600), " + "  ('Los Angeles', 'CA', 2015, 72300), " + "  ('Chicago', 'IL', 2013, 9553270), " + "  ('Chicago', 'IL', 2014, 11340), " + "  ('Chicago', 'IL', 2015, -6730), " + "  ('Houston', 'TX', 2013, 6330660), " + "  ('Houston', 'TX', 2014, 172960), " + "  ('Houston', 'TX', 2015, 172940), " + "  ('Phoenix', 'AZ', 2013, 4404680), " + "  ('Phoenix', 'AZ', 2014, 86740), " + "  ('Phoenix', 'AZ', 2015, 89700), " + "  ('San Antonio', 'TX', 2013, 2280580), " + "  ('San Antonio', 'TX', 2014, 49180), " + "  ('San Antonio', 'TX', 2015, 50870), " + "  ('San Francisco', 'CA', 2013, 4521310), " + "  ('San Francisco', 'CA', 2014, 65940), " + "  ('San Francisco', 'CA', 2015, 62290), " + "  ('Dallas', 'TX', 2013, 6817520), " + "  ('Dallas', 'TX', 2014, 137740), " + "  ('Dallas', 'TX', 2015, 154020)");
    // since all cluster operations of the Table API are executed asynchronously,
    // we need to wait until the insertion has been completed,
    // an exception is thrown in case of an error
    insertionResult.await();
    // read from table and aggregate the total population per city
    final Table currentPopulation = env.sqlQuery("SELECT city, state, MAX(update_year) AS latest_year, SUM(population_diff) AS population " + "FROM PopulationUpdates " + "GROUP BY city, state");
    // either define a nested SQL statement with sub-queries
    // or divide the problem into sub-views which will be optimized
    // as a whole during planning
    env.createTemporaryView("CurrentPopulation", currentPopulation);
    // find the top 2 cities with the highest population per state,
    // we use a sub-query that is correlated with every unique state,
    // for every state we rank by population and return the top 2 cities
    final Table topCitiesPerState = env.sqlQuery("SELECT state, city, latest_year, population " + "FROM " + "  (SELECT DISTINCT state FROM CurrentPopulation) States," + "  LATERAL (" + "    SELECT city, latest_year, population" + "    FROM CurrentPopulation" + "    WHERE state = States.state" + "    ORDER BY population DESC, latest_year DESC" + "    LIMIT 2" + "  )");
    // execute().collect() and a List where we maintain updates
    try (CloseableIterator<Row> iterator = topCitiesPerState.execute().collect()) {
        final List<Row> materializedUpdates = new ArrayList<>();
        iterator.forEachRemaining(row -> {
            final RowKind kind = row.getKind();
            switch(kind) {
                case INSERT:
                case UPDATE_AFTER:
                    // for full equality
                    row.setKind(RowKind.INSERT);
                    materializedUpdates.add(row);
                    break;
                case UPDATE_BEFORE:
                case DELETE:
                    // for full equality
                    row.setKind(RowKind.INSERT);
                    materializedUpdates.remove(row);
                    break;
            }
        });
        // show the final output table if the result is bounded,
        // the output should exclude San Antonio because it has a smaller population than
        // Houston or Dallas in Texas (TX)
        materializedUpdates.forEach(System.out::println);
    }
}
Also used : EnvironmentSettings(org.apache.flink.table.api.EnvironmentSettings) TableResult(org.apache.flink.table.api.TableResult) Table(org.apache.flink.table.api.Table) RowKind(org.apache.flink.types.RowKind) ArrayList(java.util.ArrayList) TableEnvironment(org.apache.flink.table.api.TableEnvironment) Row(org.apache.flink.types.Row)

Example 7 with TableResult

use of org.apache.flink.table.api.TableResult in project flink by apache.

the class TpcdsTestProgram method main.

public static void main(String[] args) throws Exception {
    ParameterTool params = ParameterTool.fromArgs(args);
    String sourceTablePath = params.getRequired("sourceTablePath");
    String queryPath = params.getRequired("queryPath");
    String sinkTablePath = params.getRequired("sinkTablePath");
    Boolean useTableStats = params.getBoolean("useTableStats");
    TableEnvironment tableEnvironment = prepareTableEnv(sourceTablePath, useTableStats);
    // execute TPC-DS queries
    for (String queryId : TPCDS_QUERIES) {
        System.out.println("[INFO]Run TPC-DS query " + queryId + " ...");
        String queryName = QUERY_PREFIX + queryId + QUERY_SUFFIX;
        String queryFilePath = queryPath + FILE_SEPARATOR + queryName;
        String queryString = loadFile2String(queryFilePath);
        Table resultTable = tableEnvironment.sqlQuery(queryString);
        // register sink table
        String sinkTableName = QUERY_PREFIX + queryId + "_sinkTable";
        ((TableEnvironmentInternal) tableEnvironment).registerTableSinkInternal(sinkTableName, new CsvTableSink(sinkTablePath + FILE_SEPARATOR + queryId + RESULT_SUFFIX, COL_DELIMITER, 1, FileSystem.WriteMode.OVERWRITE, resultTable.getSchema().getFieldNames(), resultTable.getSchema().getFieldDataTypes()));
        TableResult tableResult = resultTable.executeInsert(sinkTableName);
        // wait job finish
        tableResult.getJobClient().get().getJobExecutionResult().get();
        System.out.println("[INFO]Run TPC-DS query " + queryId + " success.");
    }
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) ConnectorCatalogTable(org.apache.flink.table.catalog.ConnectorCatalogTable) Table(org.apache.flink.table.api.Table) TableEnvironmentInternal(org.apache.flink.table.api.internal.TableEnvironmentInternal) TableResult(org.apache.flink.table.api.TableResult) CsvTableSink(org.apache.flink.table.sinks.CsvTableSink) TableEnvironment(org.apache.flink.table.api.TableEnvironment)

Example 8 with TableResult

use of org.apache.flink.table.api.TableResult in project flink by apache.

the class BatchSQLTestProgram method main.

public static void main(String[] args) throws Exception {
    ParameterTool params = ParameterTool.fromArgs(args);
    String outputPath = params.getRequired("outputPath");
    String sqlStatement = params.getRequired("sqlStatement");
    TableEnvironment tEnv = TableEnvironment.create(EnvironmentSettings.inBatchMode());
    ((TableEnvironmentInternal) tEnv).registerTableSourceInternal("table1", new GeneratorTableSource(10, 100, 60, 0));
    ((TableEnvironmentInternal) tEnv).registerTableSourceInternal("table2", new GeneratorTableSource(5, 0.2f, 60, 5));
    ((TableEnvironmentInternal) tEnv).registerTableSinkInternal("sinkTable", new CsvTableSink(outputPath).configure(new String[] { "f0", "f1" }, new TypeInformation[] { Types.INT, Types.SQL_TIMESTAMP }));
    TableResult result = tEnv.executeSql(sqlStatement);
    // wait job finish
    result.getJobClient().get().getJobExecutionResult().get();
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) TableEnvironmentInternal(org.apache.flink.table.api.internal.TableEnvironmentInternal) TableResult(org.apache.flink.table.api.TableResult) CsvTableSink(org.apache.flink.table.sinks.CsvTableSink) TableEnvironment(org.apache.flink.table.api.TableEnvironment) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation)

Example 9 with TableResult

use of org.apache.flink.table.api.TableResult in project flink by apache.

the class OrcFileSystemITCase method testLimitableBulkFormat.

@Test
public void testLimitableBulkFormat() throws ExecutionException, InterruptedException {
    super.tableEnv().executeSql("insert into orcLimitTable select x, y, " + "1 as a " + "from originalT").await();
    TableResult tableResult1 = super.tableEnv().executeSql("SELECT * FROM orcLimitTable limit 5");
    List<Row> rows1 = CollectionUtil.iteratorToList(tableResult1.collect());
    assertEquals(5, rows1.size());
    check("select a from orcLimitTable limit 5", Arrays.asList(Row.of(1), Row.of(1), Row.of(1), Row.of(1), Row.of(1)));
}
Also used : TableResult(org.apache.flink.table.api.TableResult) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 10 with TableResult

use of org.apache.flink.table.api.TableResult in project flink by apache.

the class OrcFileSystemITCase method testNestedTypes.

@Test
public void testNestedTypes() throws Exception {
    String path = initNestedTypesFile(initNestedTypesData());
    super.tableEnv().executeSql(String.format("create table orcNestedTypesTable (" + "_col0 string," + "_col1 int," + "_col2 ARRAY<ROW<_col2_col0 string>>," + "_col3 MAP<string,ROW<_col3_col0 string,_col3_col1 timestamp>>" + ") with (" + "'connector' = 'filesystem'," + "'format' = 'orc'," + "'path' = '%s')", path));
    TableResult tableResult = super.tableEnv().executeSql("SELECT * FROM orcNestedTypesTable");
    List<Row> rows = CollectionUtil.iteratorToList(tableResult.collect());
    assertEquals(4, rows.size());
    assertEquals("+I[_col_0_string_1, 1, [+I[_col_2_row_0_string_1], +I[_col_2_row_1_string_1]], {_col_3_map_key_1=+I[_col_3_map_value_string_1, " + new Timestamp(3600000).toLocalDateTime() + "]}]", rows.get(0).toString());
    assertEquals("+I[_col_0_string_2, 2, null, null]", rows.get(1).toString());
    assertEquals("+I[_col_0_string_3, 3, [], {}]", rows.get(2).toString());
    assertEquals("+I[_col_0_string_4, 4, [], {null=null}]", rows.get(3).toString());
}
Also used : TableResult(org.apache.flink.table.api.TableResult) Row(org.apache.flink.types.Row) Timestamp(java.sql.Timestamp) Test(org.junit.Test)

Aggregations

TableResult (org.apache.flink.table.api.TableResult)39 Test (org.junit.Test)26 Row (org.apache.flink.types.Row)20 StreamTableEnvironment (org.apache.flink.table.api.bridge.java.StreamTableEnvironment)15 ArrayList (java.util.ArrayList)7 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)7 Table (org.apache.flink.table.api.Table)7 TableEnvironment (org.apache.flink.table.api.TableEnvironment)7 JobClient (org.apache.flink.core.execution.JobClient)4 Configuration (org.apache.flink.configuration.Configuration)3 ParameterTool (org.apache.flink.api.java.utils.ParameterTool)2 TableDescriptor (org.apache.flink.table.api.TableDescriptor)2 TableEnvironmentInternal (org.apache.flink.table.api.internal.TableEnvironmentInternal)2 CsvTableSink (org.apache.flink.table.sinks.CsvTableSink)2 DataType (org.apache.flink.table.types.DataType)2 File (java.io.File)1 IOException (java.io.IOException)1 BigDecimal (java.math.BigDecimal)1 Timestamp (java.sql.Timestamp)1 Random (java.util.Random)1