Search in sources :

Example 1 with ObjectArrayTypeInfo

use of org.apache.flink.api.java.typeutils.ObjectArrayTypeInfo in project flink by apache.

the class AvroRowDeserializationSchema method convertAvroType.

private Object convertAvroType(Schema schema, TypeInformation<?> info, Object object) {
    if (object == null) {
        return null;
    }
    switch(schema.getType()) {
        case RECORD:
            if (object instanceof IndexedRecord) {
                return convertAvroRecordToRow(schema, (RowTypeInfo) info, (IndexedRecord) object);
            }
            throw new IllegalStateException("IndexedRecord expected but was: " + object.getClass());
        case ENUM:
        case STRING:
            return object.toString();
        case ARRAY:
            if (info instanceof BasicArrayTypeInfo) {
                final TypeInformation<?> elementInfo = ((BasicArrayTypeInfo<?, ?>) info).getComponentInfo();
                return convertToObjectArray(schema.getElementType(), elementInfo, object);
            } else {
                final TypeInformation<?> elementInfo = ((ObjectArrayTypeInfo<?, ?>) info).getComponentInfo();
                return convertToObjectArray(schema.getElementType(), elementInfo, object);
            }
        case MAP:
            final MapTypeInfo<?, ?> mapTypeInfo = (MapTypeInfo<?, ?>) info;
            final Map<String, Object> convertedMap = new HashMap<>();
            final Map<?, ?> map = (Map<?, ?>) object;
            for (Map.Entry<?, ?> entry : map.entrySet()) {
                convertedMap.put(entry.getKey().toString(), convertAvroType(schema.getValueType(), mapTypeInfo.getValueTypeInfo(), entry.getValue()));
            }
            return convertedMap;
        case UNION:
            final List<Schema> types = schema.getTypes();
            final int size = types.size();
            final Schema actualSchema;
            if (size == 2 && types.get(0).getType() == Schema.Type.NULL) {
                actualSchema = types.get(1);
            } else if (size == 2 && types.get(1).getType() == Schema.Type.NULL) {
                actualSchema = types.get(0);
            } else if (size == 1) {
                actualSchema = types.get(0);
            } else {
                // generic type
                return object;
            }
            return convertAvroType(actualSchema, info, object);
        case FIXED:
            final byte[] fixedBytes = ((GenericFixed) object).bytes();
            if (info == Types.BIG_DEC) {
                return convertToDecimal(schema, fixedBytes);
            }
            return fixedBytes;
        case BYTES:
            final ByteBuffer byteBuffer = (ByteBuffer) object;
            final byte[] bytes = new byte[byteBuffer.remaining()];
            byteBuffer.get(bytes);
            if (info == Types.BIG_DEC) {
                return convertToDecimal(schema, bytes);
            }
            return bytes;
        case INT:
            if (info == Types.SQL_DATE) {
                return convertToDate(object);
            } else if (info == Types.SQL_TIME) {
                return convertToTime(object);
            }
            return object;
        case LONG:
            if (info == Types.SQL_TIMESTAMP) {
                return convertToTimestamp(object, schema.getLogicalType() == LogicalTypes.timestampMicros());
            } else if (info == Types.SQL_TIME) {
                return convertToTime(object);
            }
            return object;
        case FLOAT:
        case DOUBLE:
        case BOOLEAN:
            return object;
    }
    throw new RuntimeException("Unsupported Avro type:" + schema);
}
Also used : GenericFixed(org.apache.avro.generic.GenericFixed) IndexedRecord(org.apache.avro.generic.IndexedRecord) HashMap(java.util.HashMap) AbstractDeserializationSchema(org.apache.flink.api.common.serialization.AbstractDeserializationSchema) Schema(org.apache.avro.Schema) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema) ByteBuffer(java.nio.ByteBuffer) ObjectArrayTypeInfo(org.apache.flink.api.java.typeutils.ObjectArrayTypeInfo) MapTypeInfo(org.apache.flink.api.java.typeutils.MapTypeInfo) HashMap(java.util.HashMap) Map(java.util.Map) BasicArrayTypeInfo(org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo)

Example 2 with ObjectArrayTypeInfo

use of org.apache.flink.api.java.typeutils.ObjectArrayTypeInfo in project flink by apache.

the class PythonTableUtils method converter.

private static Function<Object, Object> converter(final TypeInformation<?> dataType, final ExecutionConfig config) {
    if (dataType.equals(Types.BOOLEAN())) {
        return b -> b instanceof Boolean ? b : null;
    }
    if (dataType.equals(Types.BYTE())) {
        return c -> {
            if (c instanceof Byte) {
                return c;
            }
            if (c instanceof Short) {
                return ((Short) c).byteValue();
            }
            if (c instanceof Integer) {
                return ((Integer) c).byteValue();
            }
            if (c instanceof Long) {
                return ((Long) c).byteValue();
            }
            return null;
        };
    }
    if (dataType.equals(Types.SHORT())) {
        return c -> {
            if (c instanceof Byte) {
                return ((Byte) c).shortValue();
            }
            if (c instanceof Short) {
                return c;
            }
            if (c instanceof Integer) {
                return ((Integer) c).shortValue();
            }
            if (c instanceof Long) {
                return ((Long) c).shortValue();
            }
            return null;
        };
    }
    if (dataType.equals(Types.INT())) {
        return c -> {
            if (c instanceof Byte) {
                return ((Byte) c).intValue();
            }
            if (c instanceof Short) {
                return ((Short) c).intValue();
            }
            if (c instanceof Integer) {
                return c;
            }
            if (c instanceof Long) {
                return ((Long) c).intValue();
            }
            return null;
        };
    }
    if (dataType.equals(Types.LONG())) {
        return c -> {
            if (c instanceof Byte) {
                return ((Byte) c).longValue();
            }
            if (c instanceof Short) {
                return ((Short) c).longValue();
            }
            if (c instanceof Integer) {
                return ((Integer) c).longValue();
            }
            if (c instanceof Long) {
                return c;
            }
            return null;
        };
    }
    if (dataType.equals(Types.FLOAT())) {
        return c -> {
            if (c instanceof Float) {
                return c;
            }
            if (c instanceof Double) {
                return ((Double) c).floatValue();
            }
            return null;
        };
    }
    if (dataType.equals(Types.DOUBLE())) {
        return c -> {
            if (c instanceof Float) {
                return ((Float) c).doubleValue();
            }
            if (c instanceof Double) {
                return c;
            }
            return null;
        };
    }
    if (dataType.equals(Types.DECIMAL())) {
        return c -> c instanceof BigDecimal ? c : null;
    }
    if (dataType.equals(Types.SQL_DATE())) {
        return c -> {
            if (c instanceof Integer) {
                long millisLocal = ((Integer) c).longValue() * 86400000;
                long millisUtc = millisLocal - PythonTableUtils.getOffsetFromLocalMillis(millisLocal);
                return new Date(millisUtc);
            }
            return null;
        };
    }
    if (dataType.equals(Types.SQL_TIME())) {
        return c -> c instanceof Integer || c instanceof Long ? new Time(((Number) c).longValue() / 1000) : null;
    }
    if (dataType.equals(Types.SQL_TIMESTAMP())) {
        return c -> c instanceof Integer || c instanceof Long ? new Timestamp(((Number) c).longValue() / 1000) : null;
    }
    if (dataType.equals(org.apache.flink.api.common.typeinfo.Types.INSTANT)) {
        return c -> c instanceof Integer || c instanceof Long ? Instant.ofEpochMilli(((Number) c).longValue() / 1000) : null;
    }
    if (dataType.equals(Types.INTERVAL_MILLIS())) {
        return c -> c instanceof Integer || c instanceof Long ? ((Number) c).longValue() / 1000 : null;
    }
    if (dataType.equals(Types.STRING())) {
        return c -> c != null ? c.toString() : null;
    }
    if (dataType.equals(PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO)) {
        return c -> {
            if (c instanceof String) {
                return ((String) c).getBytes(StandardCharsets.UTF_8);
            }
            if (c instanceof byte[]) {
                return c;
            }
            return null;
        };
    }
    if (dataType instanceof PrimitiveArrayTypeInfo || dataType instanceof BasicArrayTypeInfo || dataType instanceof ObjectArrayTypeInfo) {
        TypeInformation<?> elementType = dataType instanceof PrimitiveArrayTypeInfo ? ((PrimitiveArrayTypeInfo<?>) dataType).getComponentType() : dataType instanceof BasicArrayTypeInfo ? ((BasicArrayTypeInfo<?, ?>) dataType).getComponentInfo() : ((ObjectArrayTypeInfo<?, ?>) dataType).getComponentInfo();
        boolean primitive = dataType instanceof PrimitiveArrayTypeInfo;
        Function<Object, Object> elementConverter = converter(elementType, config);
        BiFunction<Integer, Function<Integer, Object>, Object> arrayConstructor = arrayConstructor(elementType, primitive);
        return c -> {
            int length = -1;
            Function<Integer, Object> elementGetter = null;
            if (c instanceof List) {
                length = ((List<?>) c).size();
                elementGetter = i -> elementConverter.apply(((List<?>) c).get(i));
            }
            if (c != null && c.getClass().isArray()) {
                length = Array.getLength(c);
                elementGetter = i -> elementConverter.apply(Array.get(c, i));
            }
            if (elementGetter != null) {
                return arrayConstructor.apply(length, elementGetter);
            }
            return null;
        };
    }
    if (dataType instanceof MapTypeInfo) {
        Function<Object, Object> keyConverter = converter(((MapTypeInfo<?, ?>) dataType).getKeyTypeInfo(), config);
        Function<Object, Object> valueConverter = converter(((MapTypeInfo<?, ?>) dataType).getValueTypeInfo(), config);
        return c -> c instanceof Map ? ((Map<?, ?>) c).entrySet().stream().collect(Collectors.toMap(e -> keyConverter.apply(e.getKey()), e -> valueConverter.apply(e.getValue()))) : null;
    }
    if (dataType instanceof RowTypeInfo) {
        TypeInformation<?>[] fieldTypes = ((RowTypeInfo) dataType).getFieldTypes();
        List<Function<Object, Object>> fieldConverters = Arrays.stream(fieldTypes).map(x -> PythonTableUtils.converter(x, config)).collect(Collectors.toList());
        return c -> {
            if (c != null && c.getClass().isArray()) {
                int length = Array.getLength(c);
                if (length - 1 != fieldTypes.length) {
                    throw new IllegalStateException("Input row doesn't have expected number of values required by the schema. " + fieldTypes.length + " fields are required while " + (length - 1) + " values are provided.");
                }
                Row row = new Row(length - 1);
                row.setKind(RowKind.fromByteValue(((Number) Array.get(c, 0)).byteValue()));
                for (int i = 0; i < row.getArity(); i++) {
                    row.setField(i, fieldConverters.get(i).apply(Array.get(c, i + 1)));
                }
                return row;
            }
            return null;
        };
    }
    if (dataType instanceof TupleTypeInfo) {
        TypeInformation<?>[] fieldTypes = ((TupleTypeInfo<?>) dataType).getFieldTypes();
        List<Function<Object, Object>> fieldConverters = Arrays.stream(fieldTypes).map(x -> PythonTableUtils.converter(x, config)).collect(Collectors.toList());
        return c -> {
            if (c != null && c.getClass().isArray()) {
                int length = Array.getLength(c);
                if (length != fieldTypes.length) {
                    throw new IllegalStateException("Input tuple doesn't have expected number of values required by the schema. " + fieldTypes.length + " fields are required while " + length + " values are provided.");
                }
                Tuple tuple = Tuple.newInstance(length);
                for (int i = 0; i < tuple.getArity(); i++) {
                    tuple.setField(fieldConverters.get(i).apply(Array.get(c, i)), i);
                }
                return tuple;
            }
            return null;
        };
    }
    return c -> {
        if (c.getClass() != byte[].class || dataType instanceof PickledByteArrayTypeInfo) {
            return c;
        }
        // other typeinfos will use the corresponding serializer to deserialize data.
        byte[] b = (byte[]) c;
        TypeSerializer<?> dataSerializer = dataType.createSerializer(config);
        ByteArrayInputStreamWithPos bais = new ByteArrayInputStreamWithPos();
        DataInputViewStreamWrapper baisWrapper = new DataInputViewStreamWrapper(bais);
        bais.setBuffer(b, 0, b.length);
        try {
            return dataSerializer.deserialize(baisWrapper);
        } catch (IOException e) {
            throw new IllegalStateException("Failed to deserialize the object with datatype " + dataType, e);
        }
    };
}
Also used : CollectionInputFormat(org.apache.flink.api.java.io.CollectionInputFormat) Arrays(java.util.Arrays) Array(java.lang.reflect.Array) Time(java.sql.Time) BasicArrayTypeInfo(org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo) BiFunction(java.util.function.BiFunction) LocalDateTime(java.time.LocalDateTime) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) Types(org.apache.flink.table.api.Types) ObjectArrayTypeInfo(org.apache.flink.api.java.typeutils.ObjectArrayTypeInfo) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) Function(java.util.function.Function) ByteArrayInputStreamWithPos(org.apache.flink.core.memory.ByteArrayInputStreamWithPos) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) BigDecimal(java.math.BigDecimal) Map(java.util.Map) LocalTime(java.time.LocalTime) MapTypeInfo(org.apache.flink.api.java.typeutils.MapTypeInfo) InputFormat(org.apache.flink.api.common.io.InputFormat) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Tuple(org.apache.flink.api.java.tuple.Tuple) DataInputViewStreamWrapper(org.apache.flink.core.memory.DataInputViewStreamWrapper) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) PickledByteArrayTypeInfo(org.apache.flink.streaming.api.typeinfo.python.PickledByteArrayTypeInfo) TimeZone(java.util.TimeZone) Timestamp(java.sql.Timestamp) IOException(java.io.IOException) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) Date(java.sql.Date) PrimitiveArrayTypeInfo(org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo) List(java.util.List) RowKind(org.apache.flink.types.RowKind) LocalDate(java.time.LocalDate) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Internal(org.apache.flink.annotation.Internal) Row(org.apache.flink.types.Row) PickledByteArrayTypeInfo(org.apache.flink.streaming.api.typeinfo.python.PickledByteArrayTypeInfo) Time(java.sql.Time) LocalDateTime(java.time.LocalDateTime) LocalTime(java.time.LocalTime) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) Timestamp(java.sql.Timestamp) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) BiFunction(java.util.function.BiFunction) Function(java.util.function.Function) ByteArrayInputStreamWithPos(org.apache.flink.core.memory.ByteArrayInputStreamWithPos) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) List(java.util.List) IOException(java.io.IOException) DataInputViewStreamWrapper(org.apache.flink.core.memory.DataInputViewStreamWrapper) BigDecimal(java.math.BigDecimal) Date(java.sql.Date) LocalDate(java.time.LocalDate) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) ObjectArrayTypeInfo(org.apache.flink.api.java.typeutils.ObjectArrayTypeInfo) MapTypeInfo(org.apache.flink.api.java.typeutils.MapTypeInfo) PrimitiveArrayTypeInfo(org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo) Row(org.apache.flink.types.Row) Map(java.util.Map) BasicArrayTypeInfo(org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo) Tuple(org.apache.flink.api.java.tuple.Tuple)

Example 3 with ObjectArrayTypeInfo

use of org.apache.flink.api.java.typeutils.ObjectArrayTypeInfo in project flink by apache.

the class TypeStringUtils method writeTypeInfo.

public static String writeTypeInfo(TypeInformation<?> typeInfo) {
    if (typeInfo.equals(Types.STRING)) {
        return VARCHAR;
    } else if (typeInfo.equals(Types.BOOLEAN)) {
        return BOOLEAN;
    } else if (typeInfo.equals(Types.BYTE)) {
        return TINYINT;
    } else if (typeInfo.equals(Types.SHORT)) {
        return SMALLINT;
    } else if (typeInfo.equals(Types.INT)) {
        return INT;
    } else if (typeInfo.equals(Types.LONG)) {
        return BIGINT;
    } else if (typeInfo.equals(Types.FLOAT)) {
        return FLOAT;
    } else if (typeInfo.equals(Types.DOUBLE)) {
        return DOUBLE;
    } else if (typeInfo.equals(Types.BIG_DEC)) {
        return DECIMAL;
    } else if (typeInfo.equals(Types.SQL_DATE) || typeInfo.equals(Types.LOCAL_DATE)) {
        // write LOCAL_DATE as "DATE" to keep compatible when using new types
        return DATE;
    } else if (typeInfo.equals(Types.SQL_TIME) || typeInfo.equals(Types.LOCAL_TIME)) {
        // write LOCAL_TIME as "TIME" to keep compatible when using new types
        return TIME;
    } else if (typeInfo.equals(Types.SQL_TIMESTAMP) || typeInfo.equals(Types.LOCAL_DATE_TIME)) {
        // write LOCAL_DATE_TIME as "TIMESTAMP" to keep compatible when using new types
        return TIMESTAMP;
    } else if (typeInfo instanceof RowTypeInfo) {
        final RowTypeInfo rt = (RowTypeInfo) typeInfo;
        final String[] fieldNames = rt.getFieldNames();
        final TypeInformation<?>[] fieldTypes = rt.getFieldTypes();
        final StringBuilder result = new StringBuilder();
        result.append(ROW);
        result.append('<');
        for (int i = 0; i < fieldNames.length; i++) {
            // escape field name if it contains delimiters
            if (containsDelimiter(fieldNames[i])) {
                result.append('`');
                result.append(fieldNames[i].replace("`", "``"));
                result.append('`');
            } else {
                result.append(fieldNames[i]);
            }
            result.append(' ');
            result.append(writeTypeInfo(fieldTypes[i]));
            if (i < fieldNames.length - 1) {
                result.append(", ");
            }
        }
        result.append('>');
        return result.toString();
    } else if (typeInfo instanceof GenericTypeInfo) {
        return ANY + '<' + typeInfo.getTypeClass().getName() + '>';
    } else if (typeInfo instanceof PojoTypeInfo) {
        // we only support very simple POJOs that only contain extracted fields
        // (not manually specified)
        TypeInformation<?> extractedPojo;
        try {
            extractedPojo = TypeExtractor.createTypeInfo(typeInfo.getTypeClass());
        } catch (InvalidTypesException e) {
            extractedPojo = null;
        }
        if (extractedPojo == null || !typeInfo.equals(extractedPojo)) {
            throw new TableException("A string representation for custom POJO types is not supported yet.");
        }
        return POJO + '<' + typeInfo.getTypeClass().getName() + '>';
    } else if (typeInfo instanceof PrimitiveArrayTypeInfo) {
        final PrimitiveArrayTypeInfo arrayTypeInfo = (PrimitiveArrayTypeInfo) typeInfo;
        return PRIMITIVE_ARRAY + '<' + writeTypeInfo(arrayTypeInfo.getComponentType()) + '>';
    } else if (typeInfo instanceof ObjectArrayTypeInfo) {
        final ObjectArrayTypeInfo arrayTypeInfo = (ObjectArrayTypeInfo) typeInfo;
        return OBJECT_ARRAY + '<' + writeTypeInfo(arrayTypeInfo.getComponentInfo()) + '>';
    } else if (typeInfo instanceof MultisetTypeInfo) {
        final MultisetTypeInfo multisetTypeInfo = (MultisetTypeInfo) typeInfo;
        return MULTISET + '<' + writeTypeInfo(multisetTypeInfo.getElementTypeInfo()) + '>';
    } else if (typeInfo instanceof MapTypeInfo) {
        final MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
        final String keyTypeInfo = writeTypeInfo(mapTypeInfo.getKeyTypeInfo());
        final String valueTypeInfo = writeTypeInfo(mapTypeInfo.getValueTypeInfo());
        return MAP + '<' + keyTypeInfo + ", " + valueTypeInfo + '>';
    } else {
        return ANY + '<' + typeInfo.getTypeClass().getName() + ", " + EncodingUtils.encodeObjectToString(typeInfo) + '>';
    }
}
Also used : MultisetTypeInfo(org.apache.flink.api.java.typeutils.MultisetTypeInfo) TableException(org.apache.flink.table.api.TableException) PojoTypeInfo(org.apache.flink.api.java.typeutils.PojoTypeInfo) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) GenericTypeInfo(org.apache.flink.api.java.typeutils.GenericTypeInfo) ObjectArrayTypeInfo(org.apache.flink.api.java.typeutils.ObjectArrayTypeInfo) MapTypeInfo(org.apache.flink.api.java.typeutils.MapTypeInfo) PrimitiveArrayTypeInfo(org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo) InvalidTypesException(org.apache.flink.api.common.functions.InvalidTypesException)

Aggregations

MapTypeInfo (org.apache.flink.api.java.typeutils.MapTypeInfo)3 ObjectArrayTypeInfo (org.apache.flink.api.java.typeutils.ObjectArrayTypeInfo)3 Map (java.util.Map)2 BasicArrayTypeInfo (org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo)2 PrimitiveArrayTypeInfo (org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo)2 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)2 RowTypeInfo (org.apache.flink.api.java.typeutils.RowTypeInfo)2 IOException (java.io.IOException)1 Array (java.lang.reflect.Array)1 BigDecimal (java.math.BigDecimal)1 ByteBuffer (java.nio.ByteBuffer)1 StandardCharsets (java.nio.charset.StandardCharsets)1 Date (java.sql.Date)1 Time (java.sql.Time)1 Timestamp (java.sql.Timestamp)1 Instant (java.time.Instant)1 LocalDate (java.time.LocalDate)1 LocalDateTime (java.time.LocalDateTime)1 LocalTime (java.time.LocalTime)1 Arrays (java.util.Arrays)1