Search in sources :

Example 1 with GenericRecord

use of org.apache.pulsar.shade.org.apache.avro.generic.GenericRecord in project pulsar-flink by streamnative.

the class PulsarDeserializer method getRecordWriter.

private BinFunction<RowUpdater, GenericRecord> getRecordWriter(Schema avroType, FieldsDataType sqlType, List<String> path) throws IncompatibleSchemaException {
    List<Integer> validFieldIndexes = new ArrayList<>();
    List<BinFunction<RowUpdater, Object>> fieldWriters = new ArrayList<>();
    int length = sqlType.getChildren().size();
    RowType rowType = (RowType) sqlType.getLogicalType();
    List<RowType.RowField> fields = rowType.getFields();
    for (int i = 0; i < length; i++) {
        RowType.RowField sqlField = fields.get(i);
        org.apache.flink.table.types.logical.LogicalType logicalType = rowType.getTypeAt(i);
        Schema.Field avroField = avroType.getField(sqlField.getName());
        if (avroField != null) {
            validFieldIndexes.add(avroField.pos());
            TriFunction<FlinkDataUpdater, Integer, Object> baseWriter = newWriter(avroField.schema(), TypeConversions.fromLogicalToDataType(logicalType), Stream.concat(path.stream(), Stream.of(sqlField.getName())).collect(Collectors.toList()));
            int ordinal = i;
            BinFunction<RowUpdater, Object> fieldWriter = (updater, value) -> {
                if (value == null) {
                    updater.setNullAt(ordinal);
                } else {
                    baseWriter.apply(updater, ordinal, value);
                }
            };
            fieldWriters.add(fieldWriter);
        } else if (!sqlField.getType().isNullable()) {
            throw new IncompatibleSchemaException(String.format("Cannot find non-nullable field in avro schema %s", avroType));
        }
    }
    return (rowUpdater, record) -> {
        for (int i = 0; i < validFieldIndexes.size(); i++) {
            fieldWriters.get(i).apply(rowUpdater, record.get(validFieldIndexes.get(i)));
        }
    };
}
Also used : DataType(org.apache.flink.table.types.DataType) KeyValueDataType(org.apache.flink.table.types.KeyValueDataType) ImmutableSet(org.apache.pulsar.shade.com.google.common.collect.ImmutableSet) BiFunction(java.util.function.BiFunction) LocalDateTime(java.time.LocalDateTime) PulsarDeserializationSchema(org.apache.flink.streaming.connectors.pulsar.serialization.PulsarDeserializationSchema) ExceptionUtils(org.apache.flink.util.ExceptionUtils) HashMap(java.util.HashMap) Message(org.apache.pulsar.client.api.Message) LogicalType(org.apache.pulsar.shade.org.apache.avro.LogicalType) RowType(org.apache.flink.table.types.logical.RowType) ByteBuffer(java.nio.ByteBuffer) ArrayList(java.util.ArrayList) BigDecimal(java.math.BigDecimal) DecimalType(org.apache.flink.table.types.logical.DecimalType) FieldsDataType(org.apache.flink.table.types.FieldsDataType) Map(java.util.Map) LogicalTypes(org.apache.pulsar.shade.org.apache.avro.LogicalTypes) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) JsonFactory(org.apache.pulsar.shade.com.fasterxml.jackson.core.JsonFactory) Utf8(org.apache.pulsar.shade.org.apache.avro.util.Utf8) GenericData(org.apache.pulsar.shade.org.apache.avro.generic.GenericData) GenericRecord(org.apache.pulsar.shade.org.apache.avro.generic.GenericRecord) Iterator(java.util.Iterator) GenericAvroRecord(org.apache.pulsar.client.impl.schema.generic.GenericAvroRecord) Schema(org.apache.pulsar.shade.org.apache.avro.Schema) SchemaBuilder(org.apache.pulsar.shade.org.apache.avro.SchemaBuilder) DataTypes(org.apache.flink.table.api.DataTypes) IOException(java.io.IOException) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) ZoneId(java.time.ZoneId) Serializable(java.io.Serializable) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema) Slf4j(lombok.extern.slf4j.Slf4j) Conversions(org.apache.pulsar.shade.org.apache.avro.Conversions) List(java.util.List) Stream(java.util.stream.Stream) CollectionDataType(org.apache.flink.table.types.CollectionDataType) META_FIELD_NAMES(org.apache.flink.streaming.connectors.pulsar.internal.PulsarOptions.META_FIELD_NAMES) LocalDate(java.time.LocalDate) GenericFixed(org.apache.pulsar.shade.org.apache.avro.generic.GenericFixed) TypeConversions(org.apache.flink.table.types.utils.TypeConversions) Row(org.apache.flink.types.Row) LogicalTypeRoot(org.apache.flink.table.types.logical.LogicalTypeRoot) SchemaInfo(org.apache.pulsar.common.schema.SchemaInfo) JsonParser(org.apache.pulsar.shade.com.fasterxml.jackson.core.JsonParser) PulsarDeserializationSchema(org.apache.flink.streaming.connectors.pulsar.serialization.PulsarDeserializationSchema) Schema(org.apache.pulsar.shade.org.apache.avro.Schema) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema) ArrayList(java.util.ArrayList) RowType(org.apache.flink.table.types.logical.RowType)

Example 2 with GenericRecord

use of org.apache.pulsar.shade.org.apache.avro.generic.GenericRecord in project pulsar-flink by streamnative.

the class PulsarDeserializer method newWriter.

private TriFunction<FlinkDataUpdater, Integer, Object> newWriter(Schema avroType, DataType flinkType, List<String> path) throws IncompatibleSchemaException {
    LogicalTypeRoot tpe = flinkType.getLogicalType().getTypeRoot();
    Schema.Type atpe = avroType.getType();
    if (atpe == Schema.Type.NULL && tpe == LogicalTypeRoot.NULL) {
        return (rowUpdater, ordinal, value) -> rowUpdater.setNullAt(ordinal);
    } else if (atpe == Schema.Type.BOOLEAN && tpe == LogicalTypeRoot.BOOLEAN || atpe == Schema.Type.INT && tpe == LogicalTypeRoot.INTEGER || atpe == Schema.Type.LONG && tpe == LogicalTypeRoot.BIGINT || atpe == Schema.Type.FLOAT && tpe == LogicalTypeRoot.FLOAT || atpe == Schema.Type.DOUBLE && tpe == LogicalTypeRoot.DOUBLE) {
        return (rowUpdater, ordinal, value) -> rowUpdater.set(ordinal, value);
    } else if (atpe == Schema.Type.INT && tpe == LogicalTypeRoot.DATE) {
        return (rowUpdater, ordinal, value) -> rowUpdater.set(ordinal, LocalDate.ofEpochDay((Long) value));
    } else if (atpe == Schema.Type.LONG && tpe == LogicalTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE) {
        LogicalType altpe = avroType.getLogicalType();
        if (altpe instanceof LogicalTypes.TimestampMillis) {
            return (rowUpdater, ordinal, value) -> rowUpdater.set(ordinal, DateTimeUtils.toJavaTimestamp(((Long) value) * 1000).toLocalDateTime());
        } else if (altpe instanceof LogicalTypes.TimestampMicros) {
            return (rowUpdater, ordinal, value) -> rowUpdater.set(ordinal, DateTimeUtils.toJavaTimestamp((Long) value).toLocalDateTime());
        } else {
            throw new IncompatibleSchemaException(String.format("Cannot convert Avro logical type %s to flink timestamp type", altpe.toString()));
        }
    } else if (atpe == Schema.Type.STRING && tpe == LogicalTypeRoot.VARCHAR) {
        return (rowUpdater, ordinal, value) -> {
            String s = null;
            if (value instanceof String) {
                s = (String) value;
            } else if (value instanceof Utf8) {
                Utf8 u8 = (Utf8) value;
                byte[] bytes = new byte[u8.getByteLength()];
                System.arraycopy(u8.getBytes(), 0, bytes, 0, u8.getByteLength());
                s = new String(bytes, StandardCharsets.UTF_8);
            }
            rowUpdater.set(ordinal, s);
        };
    } else if (atpe == Schema.Type.ENUM && tpe == LogicalTypeRoot.VARCHAR) {
        return (rowUpdater, ordinal, value) -> rowUpdater.set(ordinal, value.toString());
    } else if (atpe == Schema.Type.FIXED && tpe == LogicalTypeRoot.BINARY) {
        return (rowUpdater, ordinal, value) -> rowUpdater.set(ordinal, ((GenericFixed) value).bytes().clone());
    } else if (atpe == Schema.Type.BYTES && tpe == LogicalTypeRoot.VARBINARY) {
        return (rowUpdater, ordinal, value) -> {
            byte[] bytes = null;
            if (value instanceof ByteBuffer) {
                ByteBuffer bb = (ByteBuffer) value;
                bytes = new byte[bb.remaining()];
                bb.get(bytes);
            } else if (value instanceof byte[]) {
                bytes = (byte[]) value;
            } else {
                throw new IllegalStateException(value.toString() + " is not a valid avro binary");
            }
            rowUpdater.set(ordinal, bytes);
        };
    } else if (atpe == Schema.Type.FIXED && tpe == LogicalTypeRoot.DECIMAL) {
        DecimalType d = (DecimalType) flinkType.getLogicalType();
        return (rowUpdater, ordinal, value) -> {
            BigDecimal bigDecimal = decimalConversions.fromFixed((GenericFixed) value, avroType, LogicalTypes.decimal(d.getPrecision(), d.getScale()));
            rowUpdater.set(ordinal, bigDecimal);
        };
    } else if (atpe == Schema.Type.BYTES && tpe == LogicalTypeRoot.DECIMAL) {
        DecimalType d = (DecimalType) flinkType.getLogicalType();
        return (rowUpdater, ordinal, value) -> {
            BigDecimal bigDecimal = decimalConversions.fromBytes((ByteBuffer) value, avroType, LogicalTypes.decimal(d.getPrecision(), d.getScale()));
            rowUpdater.set(ordinal, bigDecimal);
        };
    } else if (atpe == Schema.Type.RECORD && tpe == LogicalTypeRoot.ROW) {
        FieldsDataType fieldsDataType = (FieldsDataType) flinkType;
        BinFunction<RowUpdater, GenericRecord> writeRecord = getRecordWriter(avroType, fieldsDataType, path);
        return (rowUpdater, ordinal, value) -> {
            Row row = new Row(fieldsDataType.getChildren().size());
            RowUpdater ru = new RowUpdater();
            ru.setRow(row);
            writeRecord.apply(ru, (GenericRecord) value);
            rowUpdater.set(ordinal, row);
        };
    } else if (tpe == LogicalTypeRoot.ARRAY && atpe == Schema.Type.ARRAY && flinkType instanceof CollectionDataType) {
        DataType et = ((CollectionDataType) flinkType).getElementDataType();
        boolean containsNull = et.getLogicalType().isNullable();
        TriFunction<FlinkDataUpdater, Integer, Object> elementWriter = newWriter(avroType.getElementType(), et, path);
        return (rowUpdater, ordinal, value) -> {
            List array = (List) value;
            int len = array.size();
            Object[] result = new Object[len];
            ArrayDataUpdater elementUpdater = new ArrayDataUpdater(result);
            for (int i = 0; i < len; i++) {
                Object element = array.get(i);
                if (element == null) {
                    if (!containsNull) {
                        throw new IllegalArgumentException(String.format("Array value at path %s is not allowed to be null", path.toString()));
                    } else {
                        elementUpdater.setNullAt(i);
                    }
                } else {
                    elementWriter.apply(elementUpdater, i, element);
                }
            }
            rowUpdater.set(ordinal, result);
        };
    } else if (tpe == LogicalTypeRoot.MAP && atpe == Schema.Type.MAP && ((KeyValueDataType) flinkType).getKeyDataType().getLogicalType().getTypeRoot() == LogicalTypeRoot.VARCHAR) {
        KeyValueDataType kvt = (KeyValueDataType) flinkType;
        DataType kt = kvt.getKeyDataType();
        TriFunction<FlinkDataUpdater, Integer, Object> keyWriter = newWriter(SchemaBuilder.builder().stringType(), kt, path);
        DataType vt = kvt.getValueDataType();
        TriFunction<FlinkDataUpdater, Integer, Object> valueWriter = newWriter(avroType.getValueType(), vt, path);
        boolean valueContainsNull = vt.getLogicalType().isNullable();
        return (rowUpdater, ordinal, value) -> {
            Map<Object, Object> map = (Map<Object, Object>) value;
            String[] keys = new String[map.size()];
            Object[] values = new Object[map.size()];
            ArrayDataUpdater keyUpdater = new ArrayDataUpdater(keys);
            ArrayDataUpdater valueUpdater = new ArrayDataUpdater(values);
            Iterator<Map.Entry<Object, Object>> iterator = map.entrySet().iterator();
            int i = 0;
            while (iterator.hasNext()) {
                Map.Entry entry = iterator.next();
                assert entry.getKey() != null;
                keyWriter.apply(keyUpdater, i, entry.getKey());
                if (entry.getValue() == null) {
                    if (!valueContainsNull) {
                        throw new IllegalArgumentException(String.format("Map value at path %s is not allowed to be null", path.toString()));
                    } else {
                        valueUpdater.setNullAt(i);
                    }
                } else {
                    valueWriter.apply(valueUpdater, i, entry.getValue());
                }
                i += 1;
            }
            Map<String, Object> result = new HashMap<>(map.size());
            for (int j = 0; j < map.size(); j++) {
                result.put(keys[j], values[j]);
            }
            rowUpdater.set(ordinal, result);
        };
    } else if (atpe == Schema.Type.UNION) {
        List<Schema> allTypes = avroType.getTypes();
        List<Schema> nonNullTypes = allTypes.stream().filter(t -> t.getType() != Schema.Type.NULL).collect(Collectors.toList());
        if (!nonNullTypes.isEmpty()) {
            if (nonNullTypes.size() == 1) {
                return newWriter(nonNullTypes.get(0), flinkType, path);
            } else {
                if (nonNullTypes.size() == 2) {
                    Schema.Type tp1 = nonNullTypes.get(0).getType();
                    Schema.Type tp2 = nonNullTypes.get(1).getType();
                    if (ImmutableSet.of(tp1, tp2).equals(ImmutableSet.of(Schema.Type.INT, Schema.Type.LONG)) && flinkType == DataTypes.BIGINT()) {
                        return (updater, ordinal, value) -> {
                            if (value == null) {
                                updater.setNullAt(ordinal);
                            } else if (value instanceof Long) {
                                updater.set(ordinal, value);
                            } else if (value instanceof Integer) {
                                updater.set(ordinal, ((Integer) value).longValue());
                            }
                        };
                    } else if (ImmutableSet.of(tp1, tp2).equals(ImmutableSet.of(Schema.Type.FLOAT, Schema.Type.DOUBLE)) && flinkType == DataTypes.DOUBLE()) {
                        return (updater, ordinal, value) -> {
                            if (value == null) {
                                updater.setNullAt(ordinal);
                            } else if (value instanceof Double) {
                                updater.set(ordinal, value);
                            } else if (value instanceof Float) {
                                updater.set(ordinal, ((Float) value).doubleValue());
                            }
                        };
                    } else {
                        throw new IncompatibleSchemaException(String.format("Cannot convert %s %s together to %s", tp1.toString(), tp2.toString(), flinkType));
                    }
                } else if (tpe == LogicalTypeRoot.ROW && ((RowType) flinkType.getLogicalType()).getFieldCount() == nonNullTypes.size()) {
                    RowType rt = (RowType) flinkType.getLogicalType();
                    List<TriFunction<FlinkDataUpdater, Integer, Object>> fieldWriters = new ArrayList<>();
                    for (int i = 0; i < nonNullTypes.size(); i++) {
                        Schema schema = nonNullTypes.get(i);
                        String field = rt.getFieldNames().get(i);
                        org.apache.flink.table.types.logical.LogicalType logicalType = rt.getTypeAt(i);
                        fieldWriters.add(newWriter(schema, TypeConversions.fromLogicalToDataType(logicalType), Stream.concat(path.stream(), Stream.of(field)).collect(Collectors.toList())));
                    }
                    return (updater, ordinal, value) -> {
                        Row row = new Row(rt.getFieldCount());
                        RowUpdater fieldUpdater = new RowUpdater();
                        fieldUpdater.setRow(row);
                        int i = GenericData.get().resolveUnion(avroType, value);
                        fieldWriters.get(i).apply(fieldUpdater, i, value);
                        updater.set(ordinal, row);
                    };
                } else {
                    throw new IncompatibleSchemaException(String.format("Cannot convert avro to flink because schema at %s is not compatible (avroType = %s, sqlType = %s)", path.toString(), avroType, flinkType.toString()));
                }
            }
        } else {
            return (updater, ordinal, value) -> updater.setNullAt(ordinal);
        }
    } else {
        throw new IncompatibleSchemaException(String.format("Cannot convert avro to flink because schema at path %s is not compatible (avroType = %s, sqlType = %s)", path.toString(), avroType.toString(), flinkType.toString()));
    }
}
Also used : DataType(org.apache.flink.table.types.DataType) KeyValueDataType(org.apache.flink.table.types.KeyValueDataType) ImmutableSet(org.apache.pulsar.shade.com.google.common.collect.ImmutableSet) BiFunction(java.util.function.BiFunction) LocalDateTime(java.time.LocalDateTime) PulsarDeserializationSchema(org.apache.flink.streaming.connectors.pulsar.serialization.PulsarDeserializationSchema) ExceptionUtils(org.apache.flink.util.ExceptionUtils) HashMap(java.util.HashMap) Message(org.apache.pulsar.client.api.Message) LogicalType(org.apache.pulsar.shade.org.apache.avro.LogicalType) RowType(org.apache.flink.table.types.logical.RowType) ByteBuffer(java.nio.ByteBuffer) ArrayList(java.util.ArrayList) BigDecimal(java.math.BigDecimal) DecimalType(org.apache.flink.table.types.logical.DecimalType) FieldsDataType(org.apache.flink.table.types.FieldsDataType) Map(java.util.Map) LogicalTypes(org.apache.pulsar.shade.org.apache.avro.LogicalTypes) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) JsonFactory(org.apache.pulsar.shade.com.fasterxml.jackson.core.JsonFactory) Utf8(org.apache.pulsar.shade.org.apache.avro.util.Utf8) GenericData(org.apache.pulsar.shade.org.apache.avro.generic.GenericData) GenericRecord(org.apache.pulsar.shade.org.apache.avro.generic.GenericRecord) Iterator(java.util.Iterator) GenericAvroRecord(org.apache.pulsar.client.impl.schema.generic.GenericAvroRecord) Schema(org.apache.pulsar.shade.org.apache.avro.Schema) SchemaBuilder(org.apache.pulsar.shade.org.apache.avro.SchemaBuilder) DataTypes(org.apache.flink.table.api.DataTypes) IOException(java.io.IOException) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) ZoneId(java.time.ZoneId) Serializable(java.io.Serializable) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema) Slf4j(lombok.extern.slf4j.Slf4j) Conversions(org.apache.pulsar.shade.org.apache.avro.Conversions) List(java.util.List) Stream(java.util.stream.Stream) CollectionDataType(org.apache.flink.table.types.CollectionDataType) META_FIELD_NAMES(org.apache.flink.streaming.connectors.pulsar.internal.PulsarOptions.META_FIELD_NAMES) LocalDate(java.time.LocalDate) GenericFixed(org.apache.pulsar.shade.org.apache.avro.generic.GenericFixed) TypeConversions(org.apache.flink.table.types.utils.TypeConversions) Row(org.apache.flink.types.Row) LogicalTypeRoot(org.apache.flink.table.types.logical.LogicalTypeRoot) SchemaInfo(org.apache.pulsar.common.schema.SchemaInfo) JsonParser(org.apache.pulsar.shade.com.fasterxml.jackson.core.JsonParser) PulsarDeserializationSchema(org.apache.flink.streaming.connectors.pulsar.serialization.PulsarDeserializationSchema) Schema(org.apache.pulsar.shade.org.apache.avro.Schema) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema) ArrayList(java.util.ArrayList) LogicalType(org.apache.pulsar.shade.org.apache.avro.LogicalType) ArrayList(java.util.ArrayList) List(java.util.List) FieldsDataType(org.apache.flink.table.types.FieldsDataType) Utf8(org.apache.pulsar.shade.org.apache.avro.util.Utf8) DecimalType(org.apache.flink.table.types.logical.DecimalType) Row(org.apache.flink.types.Row) HashMap(java.util.HashMap) Map(java.util.Map) GenericFixed(org.apache.pulsar.shade.org.apache.avro.generic.GenericFixed) CollectionDataType(org.apache.flink.table.types.CollectionDataType) RowType(org.apache.flink.table.types.logical.RowType) LogicalTypeRoot(org.apache.flink.table.types.logical.LogicalTypeRoot) Iterator(java.util.Iterator) DataType(org.apache.flink.table.types.DataType) KeyValueDataType(org.apache.flink.table.types.KeyValueDataType) FieldsDataType(org.apache.flink.table.types.FieldsDataType) CollectionDataType(org.apache.flink.table.types.CollectionDataType) GenericRecord(org.apache.pulsar.shade.org.apache.avro.generic.GenericRecord) KeyValueDataType(org.apache.flink.table.types.KeyValueDataType) LogicalTypes(org.apache.pulsar.shade.org.apache.avro.LogicalTypes) ByteBuffer(java.nio.ByteBuffer) BigDecimal(java.math.BigDecimal) DataType(org.apache.flink.table.types.DataType) KeyValueDataType(org.apache.flink.table.types.KeyValueDataType) LogicalType(org.apache.pulsar.shade.org.apache.avro.LogicalType) RowType(org.apache.flink.table.types.logical.RowType) DecimalType(org.apache.flink.table.types.logical.DecimalType) FieldsDataType(org.apache.flink.table.types.FieldsDataType) CollectionDataType(org.apache.flink.table.types.CollectionDataType)

Aggregations

IOException (java.io.IOException)2 Serializable (java.io.Serializable)2 BigDecimal (java.math.BigDecimal)2 ByteBuffer (java.nio.ByteBuffer)2 StandardCharsets (java.nio.charset.StandardCharsets)2 Instant (java.time.Instant)2 LocalDate (java.time.LocalDate)2 LocalDateTime (java.time.LocalDateTime)2 ZoneId (java.time.ZoneId)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 Iterator (java.util.Iterator)2 List (java.util.List)2 Map (java.util.Map)2 BiFunction (java.util.function.BiFunction)2 Collectors (java.util.stream.Collectors)2 Stream (java.util.stream.Stream)2 Slf4j (lombok.extern.slf4j.Slf4j)2 DeserializationSchema (org.apache.flink.api.common.serialization.DeserializationSchema)2 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)2