use of org.apache.flink.table.types.logical.MapType in project flink by apache.
the class ParquetSchemaConverter method convertToParquetType.
private static Type convertToParquetType(String name, LogicalType type, Type.Repetition repetition) {
switch(type.getTypeRoot()) {
case CHAR:
case VARCHAR:
return Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8).named(name);
case BOOLEAN:
return Types.primitive(PrimitiveType.PrimitiveTypeName.BOOLEAN, repetition).named(name);
case BINARY:
case VARBINARY:
return Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY, repetition).named(name);
case DECIMAL:
int precision = ((DecimalType) type).getPrecision();
int scale = ((DecimalType) type).getScale();
int numBytes = computeMinBytesForDecimalPrecision(precision);
return Types.primitive(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, repetition).precision(precision).scale(scale).length(numBytes).as(OriginalType.DECIMAL).named(name);
case TINYINT:
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).as(OriginalType.INT_8).named(name);
case SMALLINT:
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).as(OriginalType.INT_16).named(name);
case INTEGER:
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).named(name);
case BIGINT:
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT64, repetition).named(name);
case FLOAT:
return Types.primitive(PrimitiveType.PrimitiveTypeName.FLOAT, repetition).named(name);
case DOUBLE:
return Types.primitive(PrimitiveType.PrimitiveTypeName.DOUBLE, repetition).named(name);
case DATE:
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).as(OriginalType.DATE).named(name);
case TIME_WITHOUT_TIME_ZONE:
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).as(OriginalType.TIME_MILLIS).named(name);
case TIMESTAMP_WITHOUT_TIME_ZONE:
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT96, repetition).named(name);
case ARRAY:
ArrayType arrayType = (ArrayType) type;
return ConversionPatterns.listOfElements(repetition, name, convertToParquetType(LIST_ELEMENT_NAME, arrayType.getElementType()));
case MAP:
MapType mapType = (MapType) type;
return ConversionPatterns.mapType(repetition, name, MAP_REPEATED_NAME, convertToParquetType("key", mapType.getKeyType()), convertToParquetType("value", mapType.getValueType()));
case ROW:
RowType rowType = (RowType) type;
return new GroupType(repetition, name, convertToParquetTypes(rowType));
default:
throw new UnsupportedOperationException("Unsupported type: " + type);
}
}
use of org.apache.flink.table.types.logical.MapType in project flink by apache.
the class OrcFileSystemITCase method initNestedTypesFile.
private String initNestedTypesFile(List<RowData> data) throws Exception {
LogicalType[] fieldTypes = new LogicalType[4];
fieldTypes[0] = new VarCharType();
fieldTypes[1] = new IntType();
List<RowType.RowField> arrayRowFieldList = Collections.singletonList(new RowType.RowField("_col2_col0", new VarCharType()));
fieldTypes[2] = new ArrayType(new RowType(arrayRowFieldList));
List<RowType.RowField> mapRowFieldList = Arrays.asList(new RowType.RowField("_col3_col0", new VarCharType()), new RowType.RowField("_col3_col1", new TimestampType()));
fieldTypes[3] = new MapType(new VarCharType(), new RowType(mapRowFieldList));
String schema = "struct<_col0:string,_col1:int,_col2:array<struct<_col2_col0:string>>," + "_col3:map<string,struct<_col3_col0:string,_col3_col1:timestamp>>>";
File outDir = TEMPORARY_FOLDER.newFolder();
Properties writerProps = new Properties();
writerProps.setProperty("orc.compress", "LZ4");
final OrcBulkWriterFactory<RowData> writer = new OrcBulkWriterFactory<>(new RowDataVectorizer(schema, fieldTypes), writerProps, new Configuration());
StreamingFileSink<RowData> sink = StreamingFileSink.forBulkFormat(new org.apache.flink.core.fs.Path(outDir.toURI()), writer).withBucketCheckInterval(10000).build();
try (OneInputStreamOperatorTestHarness<RowData, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink), 1, 1, 0)) {
testHarness.setup();
testHarness.open();
int time = 0;
for (final RowData record : data) {
testHarness.processElement(record, ++time);
}
testHarness.snapshot(1, ++time);
testHarness.notifyOfCompletedCheckpoint(1);
}
return outDir.getAbsolutePath();
}
use of org.apache.flink.table.types.logical.MapType in project flink by apache.
the class PythonBridgeUtils method getPickledBytesFromJavaObject.
private static Object getPickledBytesFromJavaObject(Object obj, LogicalType dataType) throws IOException {
Pickler pickler = new Pickler();
initialize();
if (obj == null) {
return new byte[0];
} else {
if (dataType instanceof DateType) {
long time;
if (obj instanceof LocalDate) {
time = ((LocalDate) (obj)).toEpochDay();
} else {
time = ((Date) obj).toLocalDate().toEpochDay();
}
return pickler.dumps(time);
} else if (dataType instanceof TimeType) {
long time;
if (obj instanceof LocalTime) {
time = ((LocalTime) obj).toNanoOfDay();
} else {
time = ((Time) obj).toLocalTime().toNanoOfDay();
}
time = time / 1000;
return pickler.dumps(time);
} else if (dataType instanceof TimestampType) {
if (obj instanceof LocalDateTime) {
return pickler.dumps(Timestamp.valueOf((LocalDateTime) obj));
} else {
return pickler.dumps(obj);
}
} else if (dataType instanceof RowType) {
Row tmpRow = (Row) obj;
LogicalType[] tmpRowFieldTypes = ((RowType) dataType).getChildren().toArray(new LogicalType[0]);
List<Object> rowFieldBytes = new ArrayList<>(tmpRow.getArity() + 1);
rowFieldBytes.add(new byte[] { tmpRow.getKind().toByteValue() });
for (int i = 0; i < tmpRow.getArity(); i++) {
rowFieldBytes.add(getPickledBytesFromJavaObject(tmpRow.getField(i), tmpRowFieldTypes[i]));
}
return rowFieldBytes;
} else if (dataType instanceof MapType) {
List<List<Object>> serializedMapKV = new ArrayList<>(2);
MapType mapType = (MapType) dataType;
Map<Object, Object> mapObj = (Map) obj;
List<Object> keyBytesList = new ArrayList<>(mapObj.size());
List<Object> valueBytesList = new ArrayList<>(mapObj.size());
for (Map.Entry entry : mapObj.entrySet()) {
keyBytesList.add(getPickledBytesFromJavaObject(entry.getKey(), mapType.getKeyType()));
valueBytesList.add(getPickledBytesFromJavaObject(entry.getValue(), mapType.getValueType()));
}
serializedMapKV.add(keyBytesList);
serializedMapKV.add(valueBytesList);
return pickler.dumps(serializedMapKV);
} else if (dataType instanceof ArrayType) {
Object[] objects = (Object[]) obj;
List<Object> serializedElements = new ArrayList<>(objects.length);
ArrayType arrayType = (ArrayType) dataType;
LogicalType elementType = arrayType.getElementType();
for (Object object : objects) {
serializedElements.add(getPickledBytesFromJavaObject(object, elementType));
}
return pickler.dumps(serializedElements);
}
if (dataType instanceof FloatType) {
return pickler.dumps(String.valueOf(obj));
} else {
return pickler.dumps(obj);
}
}
}
use of org.apache.flink.table.types.logical.MapType in project flink by apache.
the class DataTypes method MAP.
/**
* Data type of an associative array that maps keys (including {@code NULL}) to values
* (including {@code NULL}). A map cannot contain duplicate keys; each key can map to at most
* one value.
*
* <p>There is no restriction of key types; it is the responsibility of the user to ensure
* uniqueness. The map type is an extension to the SQL standard.
*
* @see MapType
*/
public static DataType MAP(DataType keyDataType, DataType valueDataType) {
Preconditions.checkNotNull(keyDataType, "Key data type must not be null.");
Preconditions.checkNotNull(valueDataType, "Value data type must not be null.");
return new KeyValueDataType(new MapType(keyDataType.getLogicalType(), valueDataType.getLogicalType()), keyDataType, valueDataType);
}
use of org.apache.flink.table.types.logical.MapType in project flink by apache.
the class DataTypeJsonDeserializer method deserializeClass.
private static DataType deserializeClass(LogicalType logicalType, @Nullable JsonNode parentNode, SerdeContext serdeContext) {
if (parentNode == null) {
return DataTypes.of(logicalType);
}
final DataType dataType;
switch(logicalType.getTypeRoot()) {
case ARRAY:
case MULTISET:
final DataType elementDataType = deserializeClass(logicalType.getChildren().get(0), parentNode.get(FIELD_NAME_ELEMENT_CLASS), serdeContext);
dataType = new CollectionDataType(logicalType, elementDataType);
break;
case MAP:
final MapType mapType = (MapType) logicalType;
final DataType keyDataType = deserializeClass(mapType.getKeyType(), parentNode.get(FIELD_NAME_KEY_CLASS), serdeContext);
final DataType valueDataType = deserializeClass(mapType.getValueType(), parentNode.get(FIELD_NAME_VALUE_CLASS), serdeContext);
dataType = new KeyValueDataType(mapType, keyDataType, valueDataType);
break;
case ROW:
case STRUCTURED_TYPE:
final List<String> fieldNames = LogicalTypeChecks.getFieldNames(logicalType);
final List<LogicalType> fieldTypes = LogicalTypeChecks.getFieldTypes(logicalType);
final ArrayNode fieldNodes = (ArrayNode) parentNode.get(FIELD_NAME_FIELDS);
final Map<String, JsonNode> fieldNodesByName = new HashMap<>();
if (fieldNodes != null) {
fieldNodes.forEach(fieldNode -> fieldNodesByName.put(fieldNode.get(FIELD_NAME_FIELD_NAME).asText(), fieldNode));
}
final List<DataType> fieldDataTypes = IntStream.range(0, fieldNames.size()).mapToObj(i -> {
final String fieldName = fieldNames.get(i);
final LogicalType fieldType = fieldTypes.get(i);
return deserializeClass(fieldType, fieldNodesByName.get(fieldName), serdeContext);
}).collect(Collectors.toList());
dataType = new FieldsDataType(logicalType, fieldDataTypes);
break;
case DISTINCT_TYPE:
final DistinctType distinctType = (DistinctType) logicalType;
dataType = deserializeClass(distinctType.getSourceType(), parentNode, serdeContext);
break;
default:
dataType = DataTypes.of(logicalType);
}
if (!parentNode.has(FIELD_NAME_CONVERSION_CLASS)) {
return dataType;
}
final Class<?> conversionClass = loadClass(parentNode.get(FIELD_NAME_CONVERSION_CLASS).asText(), serdeContext, String.format("conversion class of data type '%s'", dataType));
return dataType.bridgedTo(conversionClass);
}
Aggregations