Search in sources :

Example 31 with RowTypeInfo

use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.

the class RowCsvInputFormatSplitTest method test.

private void test(String content, long offset, long length, char escapeChar, List<Row> expected, TypeInformation[] fieldTypes) throws Exception {
    FileInputSplit split = createTempFile(content, offset, length);
    RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), PATH).setEscapeCharacter(escapeChar);
    RowCsvInputFormat format = builder.build();
    format.configure(new Configuration());
    format.open(split);
    List<Row> rows = new ArrayList<>();
    while (!format.reachedEnd()) {
        Row result = new Row(3);
        result = format.nextRecord(result);
        if (result == null) {
            break;
        } else {
            rows.add(result);
        }
    }
    assertEquals(expected, rows);
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) ArrayList(java.util.ArrayList) Row(org.apache.flink.types.Row) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo)

Example 32 with RowTypeInfo

use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.

the class HiveTableSinkITCase method testStreamingWrite.

private void testStreamingWrite(boolean part, boolean useMr, String format, Consumer<String> pathConsumer) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    env.enableCheckpointing(100);
    StreamTableEnvironment tEnv = HiveTestUtils.createTableEnvInStreamingMode(env);
    tEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
    tEnv.useCatalog(hiveCatalog.getName());
    tEnv.getConfig().setSqlDialect(SqlDialect.HIVE);
    if (useMr) {
        tEnv.getConfig().getConfiguration().set(HiveOptions.TABLE_EXEC_HIVE_FALLBACK_MAPRED_WRITER, true);
    } else {
        tEnv.getConfig().getConfiguration().set(HiveOptions.TABLE_EXEC_HIVE_FALLBACK_MAPRED_WRITER, false);
    }
    try {
        tEnv.executeSql("create database db1");
        tEnv.useDatabase("db1");
        // prepare source
        List<Row> data = Arrays.asList(Row.of(1, "a", "b", "2020-05-03", "7"), Row.of(2, "p", "q", "2020-05-03", "8"), Row.of(3, "x", "y", "2020-05-03", "9"), Row.of(4, "x", "y", "2020-05-03", "10"), Row.of(5, "x", "y", "2020-05-03", "11"));
        DataStream<Row> stream = env.addSource(new FiniteTestSource<>(data), new RowTypeInfo(Types.INT, Types.STRING, Types.STRING, Types.STRING, Types.STRING));
        tEnv.createTemporaryView("my_table", stream, $("a"), $("b"), $("c"), $("d"), $("e"));
        // DDL
        tEnv.executeSql("create external table sink_table (a int,b string,c string" + (part ? "" : ",d string,e string") + ") " + (part ? "partitioned by (d string,e string) " : "") + " stored as " + format + " TBLPROPERTIES (" + "'" + PARTITION_TIME_EXTRACTOR_TIMESTAMP_PATTERN.key() + "'='$d $e:00:00'," + "'" + SINK_PARTITION_COMMIT_DELAY.key() + "'='1h'," + "'" + SINK_PARTITION_COMMIT_POLICY_KIND.key() + "'='metastore,success-file'," + "'" + SINK_PARTITION_COMMIT_SUCCESS_FILE_NAME.key() + "'='_MY_SUCCESS'" + ")");
        // hive dialect only works with hive tables at the moment, switch to default dialect
        tEnv.getConfig().setSqlDialect(SqlDialect.DEFAULT);
        tEnv.sqlQuery("select * from my_table").executeInsert("sink_table").await();
        assertBatch("db1.sink_table", Arrays.asList("+I[1, a, b, 2020-05-03, 7]", "+I[1, a, b, 2020-05-03, 7]", "+I[2, p, q, 2020-05-03, 8]", "+I[2, p, q, 2020-05-03, 8]", "+I[3, x, y, 2020-05-03, 9]", "+I[3, x, y, 2020-05-03, 9]", "+I[4, x, y, 2020-05-03, 10]", "+I[4, x, y, 2020-05-03, 10]", "+I[5, x, y, 2020-05-03, 11]", "+I[5, x, y, 2020-05-03, 11]"));
        pathConsumer.accept(URI.create(hiveCatalog.getHiveTable(ObjectPath.fromString("db1.sink_table")).getSd().getLocation()).getPath());
    } finally {
        tEnv.executeSql("drop database db1 cascade");
    }
}
Also used : StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Row(org.apache.flink.types.Row) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo)

Example 33 with RowTypeInfo

use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.

the class LegacyRowSerializerTest method testRowSerializer.

@Test
public void testRowSerializer() {
    RowTypeInfo typeInfo = new RowTypeInfo(BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
    Row row1 = new Row(2);
    row1.setField(0, 1);
    row1.setField(1, "a");
    Row row2 = new Row(2);
    row2.setField(0, 2);
    row2.setField(1, null);
    TypeSerializer<Row> serializer = typeInfo.createLegacySerializer(new ExecutionConfig());
    RowSerializerTestInstance instance = new RowSerializerTestInstance(serializer, row1, row2);
    instance.testAll();
}
Also used : Row(org.apache.flink.types.Row) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) Test(org.junit.Test)

Example 34 with RowTypeInfo

use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.

the class LegacyRowSerializerTest method testRowSerializerWithComplexTypes.

@Test
public void testRowSerializerWithComplexTypes() {
    RowTypeInfo typeInfo = new RowTypeInfo(BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, new TupleTypeInfo<Tuple3<Integer, Boolean, Short>>(BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.BOOLEAN_TYPE_INFO, BasicTypeInfo.SHORT_TYPE_INFO), TypeExtractor.createTypeInfo(MyPojo.class));
    MyPojo testPojo1 = new MyPojo();
    testPojo1.name = null;
    MyPojo testPojo2 = new MyPojo();
    testPojo2.name = "Test1";
    MyPojo testPojo3 = new MyPojo();
    testPojo3.name = "Test2";
    Row[] data = new Row[] { createRow(null, null, null, null, null), createRow(0, null, null, null, null), createRow(0, 0.0, null, null, null), createRow(0, 0.0, "a", null, null), createRow(1, 0.0, "a", null, null), createRow(1, 1.0, "a", null, null), createRow(1, 1.0, "b", null, null), createRow(1, 1.0, "b", new Tuple3<>(1, false, (short) 2), null), createRow(1, 1.0, "b", new Tuple3<>(2, false, (short) 2), null), createRow(1, 1.0, "b", new Tuple3<>(2, true, (short) 2), null), createRow(1, 1.0, "b", new Tuple3<>(2, true, (short) 3), null), createRow(1, 1.0, "b", new Tuple3<>(2, true, (short) 3), testPojo1), createRow(1, 1.0, "b", new Tuple3<>(2, true, (short) 3), testPojo2), createRow(1, 1.0, "b", new Tuple3<>(2, true, (short) 3), testPojo3) };
    TypeSerializer<Row> serializer = typeInfo.createLegacySerializer(new ExecutionConfig());
    RowSerializerTestInstance testInstance = new RowSerializerTestInstance(serializer, data);
    testInstance.testAll();
}
Also used : Tuple3(org.apache.flink.api.java.tuple.Tuple3) Row(org.apache.flink.types.Row) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) Test(org.junit.Test)

Example 35 with RowTypeInfo

use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.

the class LegacyRowSerializerTest method testLargeRowSerializer.

@Test
public void testLargeRowSerializer() {
    RowTypeInfo typeInfo = new RowTypeInfo(BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
    Row row = new Row(13);
    row.setField(0, 2);
    row.setField(1, null);
    row.setField(3, null);
    row.setField(4, null);
    row.setField(5, null);
    row.setField(6, null);
    row.setField(7, null);
    row.setField(8, null);
    row.setField(9, null);
    row.setField(10, null);
    row.setField(11, null);
    row.setField(12, "Test");
    TypeSerializer<Row> serializer = typeInfo.createLegacySerializer(new ExecutionConfig());
    RowSerializerTestInstance testInstance = new RowSerializerTestInstance(serializer, row);
    testInstance.testAll();
}
Also used : Row(org.apache.flink.types.Row) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) Test(org.junit.Test)

Aggregations

RowTypeInfo (org.apache.flink.api.java.typeutils.RowTypeInfo)50 Test (org.junit.Test)34 Row (org.apache.flink.types.Row)32 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)26 Configuration (org.apache.flink.configuration.Configuration)16 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)15 ArrayList (java.util.ArrayList)10 Transformation (org.apache.flink.api.dag.Transformation)8 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)8 SourceTransformation (org.apache.flink.streaming.api.transformations.SourceTransformation)8 TwoInputTransformation (org.apache.flink.streaming.api.transformations.TwoInputTransformation)8 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)6 PythonKeyedProcessOperator (org.apache.flink.streaming.api.operators.python.PythonKeyedProcessOperator)6 IOException (java.io.IOException)4 MapTypeInfo (org.apache.flink.api.java.typeutils.MapTypeInfo)4 File (java.io.File)3 FileOutputStream (java.io.FileOutputStream)3 OutputStreamWriter (java.io.OutputStreamWriter)3 LocalDateTime (java.time.LocalDateTime)3 PrimitiveArrayTypeInfo (org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo)3