Search in sources :

Example 11 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class AvroRowDataDeSerializationSchemaTest method testSerializeDeserialize.

@Test
public void testSerializeDeserialize() throws Exception {
    final DataType dataType = ROW(FIELD("bool", BOOLEAN()), FIELD("tinyint", TINYINT()), FIELD("smallint", SMALLINT()), FIELD("int", INT()), FIELD("bigint", BIGINT()), FIELD("float", FLOAT()), FIELD("double", DOUBLE()), FIELD("name", STRING()), FIELD("bytes", BYTES()), FIELD("decimal", DECIMAL(19, 6)), FIELD("doubles", ARRAY(DOUBLE())), FIELD("time", TIME(0)), FIELD("date", DATE()), FIELD("timestamp3", TIMESTAMP(3)), FIELD("timestamp3_2", TIMESTAMP(3)), FIELD("map", MAP(STRING(), BIGINT())), FIELD("map2map", MAP(STRING(), MAP(STRING(), INT()))), FIELD("map2array", MAP(STRING(), ARRAY(INT()))), FIELD("nullEntryMap", MAP(STRING(), STRING()))).notNull();
    final RowType rowType = (RowType) dataType.getLogicalType();
    final Schema schema = AvroSchemaConverter.convertToSchema(rowType);
    final GenericRecord record = new GenericData.Record(schema);
    record.put(0, true);
    record.put(1, (int) Byte.MAX_VALUE);
    record.put(2, (int) Short.MAX_VALUE);
    record.put(3, 33);
    record.put(4, 44L);
    record.put(5, 12.34F);
    record.put(6, 23.45);
    record.put(7, "hello avro");
    record.put(8, ByteBuffer.wrap(new byte[] { 1, 2, 4, 5, 6, 7, 8, 12 }));
    record.put(9, ByteBuffer.wrap(BigDecimal.valueOf(123456789, 6).unscaledValue().toByteArray()));
    List<Double> doubles = new ArrayList<>();
    doubles.add(1.2);
    doubles.add(3.4);
    doubles.add(567.8901);
    record.put(10, doubles);
    record.put(11, 18397);
    record.put(12, 10087);
    record.put(13, 1589530213123L);
    record.put(14, 1589530213122L);
    Map<String, Long> map = new HashMap<>();
    map.put("flink", 12L);
    map.put("avro", 23L);
    record.put(15, map);
    Map<String, Map<String, Integer>> map2map = new HashMap<>();
    Map<String, Integer> innerMap = new HashMap<>();
    innerMap.put("inner_key1", 123);
    innerMap.put("inner_key2", 234);
    map2map.put("outer_key", innerMap);
    record.put(16, map2map);
    List<Integer> list1 = Arrays.asList(1, 2, 3, 4, 5, 6);
    List<Integer> list2 = Arrays.asList(11, 22, 33, 44, 55);
    Map<String, List<Integer>> map2list = new HashMap<>();
    map2list.put("list1", list1);
    map2list.put("list2", list2);
    record.put(17, map2list);
    Map<String, String> map2 = new HashMap<>();
    map2.put("key1", null);
    record.put(18, map2);
    AvroRowDataSerializationSchema serializationSchema = createSerializationSchema(dataType);
    AvroRowDataDeserializationSchema deserializationSchema = createDeserializationSchema(dataType);
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    GenericDatumWriter<IndexedRecord> datumWriter = new GenericDatumWriter<>(schema);
    Encoder encoder = EncoderFactory.get().binaryEncoder(byteArrayOutputStream, null);
    datumWriter.write(record, encoder);
    encoder.flush();
    byte[] input = byteArrayOutputStream.toByteArray();
    RowData rowData = deserializationSchema.deserialize(input);
    byte[] output = serializationSchema.serialize(rowData);
    assertArrayEquals(input, output);
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) RowType(org.apache.flink.table.types.logical.RowType) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) Encoder(org.apache.avro.io.Encoder) DataType(org.apache.flink.table.types.DataType) IndexedRecord(org.apache.avro.generic.IndexedRecord) GenericRecord(org.apache.avro.generic.GenericRecord) LogicalTimeRecord(org.apache.flink.formats.avro.generated.LogicalTimeRecord) List(java.util.List) ArrayList(java.util.ArrayList) GenericRecord(org.apache.avro.generic.GenericRecord) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) Map(java.util.Map) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 12 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class CsvFileFormatFactory method buildCsvSchema.

private static CsvSchema buildCsvSchema(RowType rowType, ReadableConfig options) {
    final CsvSchema csvSchema = CsvRowSchemaConverter.convert(rowType);
    final CsvSchema.Builder csvBuilder = csvSchema.rebuild();
    // format properties
    options.getOptional(FIELD_DELIMITER).map(s -> StringEscapeUtils.unescapeJava(s).charAt(0)).ifPresent(csvBuilder::setColumnSeparator);
    if (options.get(DISABLE_QUOTE_CHARACTER)) {
        csvBuilder.disableQuoteChar();
    } else {
        options.getOptional(QUOTE_CHARACTER).map(s -> s.charAt(0)).ifPresent(csvBuilder::setQuoteChar);
    }
    options.getOptional(ALLOW_COMMENTS).ifPresent(csvBuilder::setAllowComments);
    options.getOptional(ARRAY_ELEMENT_DELIMITER).ifPresent(csvBuilder::setArrayElementSeparator);
    options.getOptional(ESCAPE_CHARACTER).map(s -> s.charAt(0)).ifPresent(csvBuilder::setEscapeChar);
    options.getOptional(NULL_LITERAL).ifPresent(csvBuilder::setNullValue);
    return csvBuilder.build();
}
Also used : DynamicTableFactory(org.apache.flink.table.factories.DynamicTableFactory) DataType(org.apache.flink.table.types.DataType) EncodingFormat(org.apache.flink.table.connector.format.EncodingFormat) ChangelogMode(org.apache.flink.table.connector.ChangelogMode) FIELD_DELIMITER(org.apache.flink.formats.csv.CsvFormatOptions.FIELD_DELIMITER) BulkWriterFormatFactory(org.apache.flink.connector.file.table.factories.BulkWriterFormatFactory) CsvSchema(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvSchema) Context(org.apache.flink.table.connector.source.DynamicTableSource.Context) JsonNode(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode) RowType(org.apache.flink.table.types.logical.RowType) ALLOW_COMMENTS(org.apache.flink.formats.csv.CsvFormatOptions.ALLOW_COMMENTS) Factory(org.apache.flink.api.common.serialization.BulkWriter.Factory) ReadableConfig(org.apache.flink.configuration.ReadableConfig) FileSourceSplit(org.apache.flink.connector.file.src.FileSourceSplit) IGNORE_PARSE_ERRORS(org.apache.flink.formats.csv.CsvFormatOptions.IGNORE_PARSE_ERRORS) QUOTE_CHARACTER(org.apache.flink.formats.csv.CsvFormatOptions.QUOTE_CHARACTER) RowDataToCsvConverter(org.apache.flink.formats.csv.RowDataToCsvConverters.RowDataToCsvConverter) ESCAPE_CHARACTER(org.apache.flink.formats.csv.CsvFormatOptions.ESCAPE_CHARACTER) StreamFormatAdapter(org.apache.flink.connector.file.src.impl.StreamFormatAdapter) ConfigOption(org.apache.flink.configuration.ConfigOption) StringEscapeUtils(org.apache.commons.lang3.StringEscapeUtils) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) BulkDecodingFormat(org.apache.flink.connector.file.table.format.BulkDecodingFormat) Projection(org.apache.flink.table.connector.Projection) BulkReaderFormatFactory(org.apache.flink.connector.file.table.factories.BulkReaderFormatFactory) RowData(org.apache.flink.table.data.RowData) DynamicTableSink(org.apache.flink.table.connector.sink.DynamicTableSink) BulkWriter(org.apache.flink.api.common.serialization.BulkWriter) ObjectNode(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode) Set(java.util.Set) ProjectableDecodingFormat(org.apache.flink.table.connector.format.ProjectableDecodingFormat) DISABLE_QUOTE_CHARACTER(org.apache.flink.formats.csv.CsvFormatOptions.DISABLE_QUOTE_CHARACTER) ARRAY_ELEMENT_DELIMITER(org.apache.flink.formats.csv.CsvFormatOptions.ARRAY_ELEMENT_DELIMITER) Converter(org.apache.flink.formats.common.Converter) NULL_LITERAL(org.apache.flink.formats.csv.CsvFormatOptions.NULL_LITERAL) Internal(org.apache.flink.annotation.Internal) BulkFormat(org.apache.flink.connector.file.src.reader.BulkFormat) Collections(java.util.Collections) CsvMapper(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvMapper) CsvSchema(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvSchema)

Example 13 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class CsvFileFormatFactory method createEncodingFormat.

@Override
public EncodingFormat<Factory<RowData>> createEncodingFormat(DynamicTableFactory.Context context, ReadableConfig formatOptions) {
    return new EncodingFormat<BulkWriter.Factory<RowData>>() {

        @Override
        public BulkWriter.Factory<RowData> createRuntimeEncoder(DynamicTableSink.Context context, DataType physicalDataType) {
            final RowType rowType = (RowType) physicalDataType.getLogicalType();
            final CsvSchema schema = buildCsvSchema(rowType, formatOptions);
            final RowDataToCsvConverter converter = RowDataToCsvConverters.createRowConverter(rowType);
            final CsvMapper mapper = new CsvMapper();
            final ObjectNode container = mapper.createObjectNode();
            final RowDataToCsvConverter.RowDataToCsvFormatConverterContext converterContext = new RowDataToCsvConverter.RowDataToCsvFormatConverterContext(mapper, container);
            return out -> CsvBulkWriter.forSchema(mapper, schema, converter, converterContext, out);
        }

        @Override
        public ChangelogMode getChangelogMode() {
            return ChangelogMode.insertOnly();
        }
    };
}
Also used : Context(org.apache.flink.table.connector.source.DynamicTableSource.Context) DynamicTableFactory(org.apache.flink.table.factories.DynamicTableFactory) DataType(org.apache.flink.table.types.DataType) EncodingFormat(org.apache.flink.table.connector.format.EncodingFormat) ChangelogMode(org.apache.flink.table.connector.ChangelogMode) FIELD_DELIMITER(org.apache.flink.formats.csv.CsvFormatOptions.FIELD_DELIMITER) BulkWriterFormatFactory(org.apache.flink.connector.file.table.factories.BulkWriterFormatFactory) CsvSchema(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvSchema) Context(org.apache.flink.table.connector.source.DynamicTableSource.Context) JsonNode(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode) RowType(org.apache.flink.table.types.logical.RowType) ALLOW_COMMENTS(org.apache.flink.formats.csv.CsvFormatOptions.ALLOW_COMMENTS) Factory(org.apache.flink.api.common.serialization.BulkWriter.Factory) ReadableConfig(org.apache.flink.configuration.ReadableConfig) FileSourceSplit(org.apache.flink.connector.file.src.FileSourceSplit) IGNORE_PARSE_ERRORS(org.apache.flink.formats.csv.CsvFormatOptions.IGNORE_PARSE_ERRORS) QUOTE_CHARACTER(org.apache.flink.formats.csv.CsvFormatOptions.QUOTE_CHARACTER) RowDataToCsvConverter(org.apache.flink.formats.csv.RowDataToCsvConverters.RowDataToCsvConverter) ESCAPE_CHARACTER(org.apache.flink.formats.csv.CsvFormatOptions.ESCAPE_CHARACTER) StreamFormatAdapter(org.apache.flink.connector.file.src.impl.StreamFormatAdapter) ConfigOption(org.apache.flink.configuration.ConfigOption) StringEscapeUtils(org.apache.commons.lang3.StringEscapeUtils) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) BulkDecodingFormat(org.apache.flink.connector.file.table.format.BulkDecodingFormat) Projection(org.apache.flink.table.connector.Projection) BulkReaderFormatFactory(org.apache.flink.connector.file.table.factories.BulkReaderFormatFactory) RowData(org.apache.flink.table.data.RowData) DynamicTableSink(org.apache.flink.table.connector.sink.DynamicTableSink) BulkWriter(org.apache.flink.api.common.serialization.BulkWriter) ObjectNode(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode) Set(java.util.Set) ProjectableDecodingFormat(org.apache.flink.table.connector.format.ProjectableDecodingFormat) DISABLE_QUOTE_CHARACTER(org.apache.flink.formats.csv.CsvFormatOptions.DISABLE_QUOTE_CHARACTER) ARRAY_ELEMENT_DELIMITER(org.apache.flink.formats.csv.CsvFormatOptions.ARRAY_ELEMENT_DELIMITER) Converter(org.apache.flink.formats.common.Converter) NULL_LITERAL(org.apache.flink.formats.csv.CsvFormatOptions.NULL_LITERAL) Internal(org.apache.flink.annotation.Internal) BulkFormat(org.apache.flink.connector.file.src.reader.BulkFormat) Collections(java.util.Collections) CsvMapper(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvMapper) RowDataToCsvConverter(org.apache.flink.formats.csv.RowDataToCsvConverters.RowDataToCsvConverter) ObjectNode(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode) CsvMapper(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvMapper) RowType(org.apache.flink.table.types.logical.RowType) EncodingFormat(org.apache.flink.table.connector.format.EncodingFormat) RowData(org.apache.flink.table.data.RowData) CsvSchema(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvSchema) BulkWriter(org.apache.flink.api.common.serialization.BulkWriter) DataType(org.apache.flink.table.types.DataType)

Example 14 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class AvroSchemaConverterTest method testRowTypeAvroSchemaConversion.

@Test
public void testRowTypeAvroSchemaConversion() {
    RowType rowType = (RowType) TableSchema.builder().field("row1", DataTypes.ROW(DataTypes.FIELD("a", DataTypes.STRING()))).field("row2", DataTypes.ROW(DataTypes.FIELD("b", DataTypes.STRING()))).field("row3", DataTypes.ROW(DataTypes.FIELD("row3", DataTypes.ROW(DataTypes.FIELD("c", DataTypes.STRING()))))).build().toRowDataType().getLogicalType();
    Schema schema = AvroSchemaConverter.convertToSchema(rowType);
    assertEquals("{\n" + "  \"type\" : \"record\",\n" + "  \"name\" : \"record\",\n" + "  \"fields\" : [ {\n" + "    \"name\" : \"row1\",\n" + "    \"type\" : [ \"null\", {\n" + "      \"type\" : \"record\",\n" + "      \"name\" : \"record_row1\",\n" + "      \"fields\" : [ {\n" + "        \"name\" : \"a\",\n" + "        \"type\" : [ \"null\", \"string\" ],\n" + "        \"default\" : null\n" + "      } ]\n" + "    } ],\n" + "    \"default\" : null\n" + "  }, {\n" + "    \"name\" : \"row2\",\n" + "    \"type\" : [ \"null\", {\n" + "      \"type\" : \"record\",\n" + "      \"name\" : \"record_row2\",\n" + "      \"fields\" : [ {\n" + "        \"name\" : \"b\",\n" + "        \"type\" : [ \"null\", \"string\" ],\n" + "        \"default\" : null\n" + "      } ]\n" + "    } ],\n" + "    \"default\" : null\n" + "  }, {\n" + "    \"name\" : \"row3\",\n" + "    \"type\" : [ \"null\", {\n" + "      \"type\" : \"record\",\n" + "      \"name\" : \"record_row3\",\n" + "      \"fields\" : [ {\n" + "        \"name\" : \"row3\",\n" + "        \"type\" : [ \"null\", {\n" + "          \"type\" : \"record\",\n" + "          \"name\" : \"record_row3_row3\",\n" + "          \"fields\" : [ {\n" + "            \"name\" : \"c\",\n" + "            \"type\" : [ \"null\", \"string\" ],\n" + "            \"default\" : null\n" + "          } ]\n" + "        } ],\n" + "        \"default\" : null\n" + "      } ]\n" + "    } ],\n" + "    \"default\" : null\n" + "  } ]\n" + "}", schema.toString(true));
}
Also used : Schema(org.apache.avro.Schema) TableSchema(org.apache.flink.table.api.TableSchema) RowType(org.apache.flink.table.types.logical.RowType) Test(org.junit.Test)

Example 15 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class AvroSchemaConverterTest method testInvalidRawTypeAvroSchemaConversion.

@Test
public void testInvalidRawTypeAvroSchemaConversion() {
    RowType rowType = (RowType) TableSchema.builder().field("a", DataTypes.STRING()).field("b", DataTypes.RAW(Void.class, VoidSerializer.INSTANCE)).build().toRowDataType().getLogicalType();
    thrown.expect(UnsupportedOperationException.class);
    thrown.expectMessage("Unsupported to derive Schema for type: RAW");
    AvroSchemaConverter.convertToSchema(rowType);
}
Also used : RowType(org.apache.flink.table.types.logical.RowType) Test(org.junit.Test)

Aggregations

RowType (org.apache.flink.table.types.logical.RowType)212 RowData (org.apache.flink.table.data.RowData)108 LogicalType (org.apache.flink.table.types.logical.LogicalType)59 DataType (org.apache.flink.table.types.DataType)57 Transformation (org.apache.flink.api.dag.Transformation)50 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)46 TableException (org.apache.flink.table.api.TableException)37 Test (org.junit.Test)36 GenericRowData (org.apache.flink.table.data.GenericRowData)33 ArrayList (java.util.ArrayList)28 List (java.util.List)28 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)26 RowDataKeySelector (org.apache.flink.table.runtime.keyselector.RowDataKeySelector)25 CodeGeneratorContext (org.apache.flink.table.planner.codegen.CodeGeneratorContext)22 TableConfig (org.apache.flink.table.api.TableConfig)19 ArrayType (org.apache.flink.table.types.logical.ArrayType)19 TimestampType (org.apache.flink.table.types.logical.TimestampType)19 DecimalType (org.apache.flink.table.types.logical.DecimalType)17 Collections (java.util.Collections)16 AggregateInfoList (org.apache.flink.table.planner.plan.utils.AggregateInfoList)16