use of org.apache.flink.streaming.api.typeinfo.python.PickledByteArrayTypeInfo in project flink by apache.
the class PythonTableUtils method converter.
private static Function<Object, Object> converter(final TypeInformation<?> dataType, final ExecutionConfig config) {
if (dataType.equals(Types.BOOLEAN())) {
return b -> b instanceof Boolean ? b : null;
}
if (dataType.equals(Types.BYTE())) {
return c -> {
if (c instanceof Byte) {
return c;
}
if (c instanceof Short) {
return ((Short) c).byteValue();
}
if (c instanceof Integer) {
return ((Integer) c).byteValue();
}
if (c instanceof Long) {
return ((Long) c).byteValue();
}
return null;
};
}
if (dataType.equals(Types.SHORT())) {
return c -> {
if (c instanceof Byte) {
return ((Byte) c).shortValue();
}
if (c instanceof Short) {
return c;
}
if (c instanceof Integer) {
return ((Integer) c).shortValue();
}
if (c instanceof Long) {
return ((Long) c).shortValue();
}
return null;
};
}
if (dataType.equals(Types.INT())) {
return c -> {
if (c instanceof Byte) {
return ((Byte) c).intValue();
}
if (c instanceof Short) {
return ((Short) c).intValue();
}
if (c instanceof Integer) {
return c;
}
if (c instanceof Long) {
return ((Long) c).intValue();
}
return null;
};
}
if (dataType.equals(Types.LONG())) {
return c -> {
if (c instanceof Byte) {
return ((Byte) c).longValue();
}
if (c instanceof Short) {
return ((Short) c).longValue();
}
if (c instanceof Integer) {
return ((Integer) c).longValue();
}
if (c instanceof Long) {
return c;
}
return null;
};
}
if (dataType.equals(Types.FLOAT())) {
return c -> {
if (c instanceof Float) {
return c;
}
if (c instanceof Double) {
return ((Double) c).floatValue();
}
return null;
};
}
if (dataType.equals(Types.DOUBLE())) {
return c -> {
if (c instanceof Float) {
return ((Float) c).doubleValue();
}
if (c instanceof Double) {
return c;
}
return null;
};
}
if (dataType.equals(Types.DECIMAL())) {
return c -> c instanceof BigDecimal ? c : null;
}
if (dataType.equals(Types.SQL_DATE())) {
return c -> {
if (c instanceof Integer) {
long millisLocal = ((Integer) c).longValue() * 86400000;
long millisUtc = millisLocal - PythonTableUtils.getOffsetFromLocalMillis(millisLocal);
return new Date(millisUtc);
}
return null;
};
}
if (dataType.equals(Types.SQL_TIME())) {
return c -> c instanceof Integer || c instanceof Long ? new Time(((Number) c).longValue() / 1000) : null;
}
if (dataType.equals(Types.SQL_TIMESTAMP())) {
return c -> c instanceof Integer || c instanceof Long ? new Timestamp(((Number) c).longValue() / 1000) : null;
}
if (dataType.equals(org.apache.flink.api.common.typeinfo.Types.INSTANT)) {
return c -> c instanceof Integer || c instanceof Long ? Instant.ofEpochMilli(((Number) c).longValue() / 1000) : null;
}
if (dataType.equals(Types.INTERVAL_MILLIS())) {
return c -> c instanceof Integer || c instanceof Long ? ((Number) c).longValue() / 1000 : null;
}
if (dataType.equals(Types.STRING())) {
return c -> c != null ? c.toString() : null;
}
if (dataType.equals(PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO)) {
return c -> {
if (c instanceof String) {
return ((String) c).getBytes(StandardCharsets.UTF_8);
}
if (c instanceof byte[]) {
return c;
}
return null;
};
}
if (dataType instanceof PrimitiveArrayTypeInfo || dataType instanceof BasicArrayTypeInfo || dataType instanceof ObjectArrayTypeInfo) {
TypeInformation<?> elementType = dataType instanceof PrimitiveArrayTypeInfo ? ((PrimitiveArrayTypeInfo<?>) dataType).getComponentType() : dataType instanceof BasicArrayTypeInfo ? ((BasicArrayTypeInfo<?, ?>) dataType).getComponentInfo() : ((ObjectArrayTypeInfo<?, ?>) dataType).getComponentInfo();
boolean primitive = dataType instanceof PrimitiveArrayTypeInfo;
Function<Object, Object> elementConverter = converter(elementType, config);
BiFunction<Integer, Function<Integer, Object>, Object> arrayConstructor = arrayConstructor(elementType, primitive);
return c -> {
int length = -1;
Function<Integer, Object> elementGetter = null;
if (c instanceof List) {
length = ((List<?>) c).size();
elementGetter = i -> elementConverter.apply(((List<?>) c).get(i));
}
if (c != null && c.getClass().isArray()) {
length = Array.getLength(c);
elementGetter = i -> elementConverter.apply(Array.get(c, i));
}
if (elementGetter != null) {
return arrayConstructor.apply(length, elementGetter);
}
return null;
};
}
if (dataType instanceof MapTypeInfo) {
Function<Object, Object> keyConverter = converter(((MapTypeInfo<?, ?>) dataType).getKeyTypeInfo(), config);
Function<Object, Object> valueConverter = converter(((MapTypeInfo<?, ?>) dataType).getValueTypeInfo(), config);
return c -> c instanceof Map ? ((Map<?, ?>) c).entrySet().stream().collect(Collectors.toMap(e -> keyConverter.apply(e.getKey()), e -> valueConverter.apply(e.getValue()))) : null;
}
if (dataType instanceof RowTypeInfo) {
TypeInformation<?>[] fieldTypes = ((RowTypeInfo) dataType).getFieldTypes();
List<Function<Object, Object>> fieldConverters = Arrays.stream(fieldTypes).map(x -> PythonTableUtils.converter(x, config)).collect(Collectors.toList());
return c -> {
if (c != null && c.getClass().isArray()) {
int length = Array.getLength(c);
if (length - 1 != fieldTypes.length) {
throw new IllegalStateException("Input row doesn't have expected number of values required by the schema. " + fieldTypes.length + " fields are required while " + (length - 1) + " values are provided.");
}
Row row = new Row(length - 1);
row.setKind(RowKind.fromByteValue(((Number) Array.get(c, 0)).byteValue()));
for (int i = 0; i < row.getArity(); i++) {
row.setField(i, fieldConverters.get(i).apply(Array.get(c, i + 1)));
}
return row;
}
return null;
};
}
if (dataType instanceof TupleTypeInfo) {
TypeInformation<?>[] fieldTypes = ((TupleTypeInfo<?>) dataType).getFieldTypes();
List<Function<Object, Object>> fieldConverters = Arrays.stream(fieldTypes).map(x -> PythonTableUtils.converter(x, config)).collect(Collectors.toList());
return c -> {
if (c != null && c.getClass().isArray()) {
int length = Array.getLength(c);
if (length != fieldTypes.length) {
throw new IllegalStateException("Input tuple doesn't have expected number of values required by the schema. " + fieldTypes.length + " fields are required while " + length + " values are provided.");
}
Tuple tuple = Tuple.newInstance(length);
for (int i = 0; i < tuple.getArity(); i++) {
tuple.setField(fieldConverters.get(i).apply(Array.get(c, i)), i);
}
return tuple;
}
return null;
};
}
return c -> {
if (c.getClass() != byte[].class || dataType instanceof PickledByteArrayTypeInfo) {
return c;
}
// other typeinfos will use the corresponding serializer to deserialize data.
byte[] b = (byte[]) c;
TypeSerializer<?> dataSerializer = dataType.createSerializer(config);
ByteArrayInputStreamWithPos bais = new ByteArrayInputStreamWithPos();
DataInputViewStreamWrapper baisWrapper = new DataInputViewStreamWrapper(bais);
bais.setBuffer(b, 0, b.length);
try {
return dataSerializer.deserialize(baisWrapper);
} catch (IOException e) {
throw new IllegalStateException("Failed to deserialize the object with datatype " + dataType, e);
}
};
}
use of org.apache.flink.streaming.api.typeinfo.python.PickledByteArrayTypeInfo in project flink by apache.
the class PythonBridgeUtils method getPickledBytesFromJavaObject.
public static Object getPickledBytesFromJavaObject(Object obj, TypeInformation<?> dataType) throws IOException {
Pickler pickler = new Pickler();
initialize();
if (obj == null) {
return new byte[0];
} else {
if (dataType instanceof SqlTimeTypeInfo) {
SqlTimeTypeInfo<?> sqlTimeTypeInfo = SqlTimeTypeInfo.getInfoFor(dataType.getTypeClass());
if (sqlTimeTypeInfo == DATE) {
return pickler.dumps(((Date) obj).toLocalDate().toEpochDay());
} else if (sqlTimeTypeInfo == TIME) {
return pickler.dumps(((Time) obj).toLocalTime().toNanoOfDay() / 1000);
}
} else if (dataType instanceof RowTypeInfo || dataType instanceof TupleTypeInfo) {
TypeInformation<?>[] fieldTypes = ((TupleTypeInfoBase<?>) dataType).getFieldTypes();
int arity = dataType instanceof RowTypeInfo ? ((Row) obj).getArity() : ((Tuple) obj).getArity();
List<Object> fieldBytes = new ArrayList<>(arity + 1);
if (dataType instanceof RowTypeInfo) {
fieldBytes.add(new byte[] { ((Row) obj).getKind().toByteValue() });
}
for (int i = 0; i < arity; i++) {
Object field = dataType instanceof RowTypeInfo ? ((Row) obj).getField(i) : ((Tuple) obj).getField(i);
fieldBytes.add(getPickledBytesFromJavaObject(field, fieldTypes[i]));
}
return fieldBytes;
} else if (dataType instanceof BasicArrayTypeInfo || dataType instanceof PrimitiveArrayTypeInfo) {
Object[] objects = (Object[]) obj;
List<Object> serializedElements = new ArrayList<>(objects.length);
TypeInformation<?> elementType = dataType instanceof BasicArrayTypeInfo ? ((BasicArrayTypeInfo<?, ?>) dataType).getComponentInfo() : ((PrimitiveArrayTypeInfo<?>) dataType).getComponentType();
for (Object object : objects) {
serializedElements.add(getPickledBytesFromJavaObject(object, elementType));
}
return pickler.dumps(serializedElements);
} else if (dataType instanceof MapTypeInfo) {
List<List<Object>> serializedMapKV = new ArrayList<>(2);
Map<Object, Object> mapObj = (Map) obj;
List<Object> keyBytesList = new ArrayList<>(mapObj.size());
List<Object> valueBytesList = new ArrayList<>(mapObj.size());
for (Map.Entry entry : mapObj.entrySet()) {
keyBytesList.add(getPickledBytesFromJavaObject(entry.getKey(), ((MapTypeInfo) dataType).getKeyTypeInfo()));
valueBytesList.add(getPickledBytesFromJavaObject(entry.getValue(), ((MapTypeInfo) dataType).getValueTypeInfo()));
}
serializedMapKV.add(keyBytesList);
serializedMapKV.add(valueBytesList);
return pickler.dumps(serializedMapKV);
} else if (dataType instanceof ListTypeInfo) {
List objects = (List) obj;
List<Object> serializedElements = new ArrayList<>(objects.size());
TypeInformation elementType = ((ListTypeInfo) dataType).getElementTypeInfo();
for (Object object : objects) {
serializedElements.add(getPickledBytesFromJavaObject(object, elementType));
}
return pickler.dumps(serializedElements);
}
if (dataType instanceof BasicTypeInfo && BasicTypeInfo.getInfoFor(dataType.getTypeClass()) == FLOAT_TYPE_INFO) {
// Serialization of float type with pickler loses precision.
return pickler.dumps(String.valueOf(obj));
} else if (dataType instanceof PickledByteArrayTypeInfo || dataType instanceof BasicTypeInfo) {
return pickler.dumps(obj);
} else {
// other typeinfos will use the corresponding serializer to serialize data.
TypeSerializer serializer = dataType.createSerializer(null);
ByteArrayOutputStreamWithPos baos = new ByteArrayOutputStreamWithPos();
DataOutputViewStreamWrapper baosWrapper = new DataOutputViewStreamWrapper(baos);
serializer.serialize(obj, baosWrapper);
return pickler.dumps(baos.toByteArray());
}
}
}
Aggregations