Search in sources :

Example 11 with MapType

use of org.apache.flink.table.types.logical.MapType in project flink by apache.

the class ParquetSchemaConverter method convertToParquetType.

private static Type convertToParquetType(String name, LogicalType type, Type.Repetition repetition) {
    switch(type.getTypeRoot()) {
        case CHAR:
        case VARCHAR:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8).named(name);
        case BOOLEAN:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.BOOLEAN, repetition).named(name);
        case BINARY:
        case VARBINARY:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY, repetition).named(name);
        case DECIMAL:
            int precision = ((DecimalType) type).getPrecision();
            int scale = ((DecimalType) type).getScale();
            int numBytes = computeMinBytesForDecimalPrecision(precision);
            return Types.primitive(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, repetition).precision(precision).scale(scale).length(numBytes).as(OriginalType.DECIMAL).named(name);
        case TINYINT:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).as(OriginalType.INT_8).named(name);
        case SMALLINT:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).as(OriginalType.INT_16).named(name);
        case INTEGER:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).named(name);
        case BIGINT:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.INT64, repetition).named(name);
        case FLOAT:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.FLOAT, repetition).named(name);
        case DOUBLE:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.DOUBLE, repetition).named(name);
        case DATE:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).as(OriginalType.DATE).named(name);
        case TIME_WITHOUT_TIME_ZONE:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).as(OriginalType.TIME_MILLIS).named(name);
        case TIMESTAMP_WITHOUT_TIME_ZONE:
        case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
            return Types.primitive(PrimitiveType.PrimitiveTypeName.INT96, repetition).named(name);
        case ARRAY:
            ArrayType arrayType = (ArrayType) type;
            return ConversionPatterns.listOfElements(repetition, name, convertToParquetType(LIST_ELEMENT_NAME, arrayType.getElementType()));
        case MAP:
            MapType mapType = (MapType) type;
            return ConversionPatterns.mapType(repetition, name, MAP_REPEATED_NAME, convertToParquetType("key", mapType.getKeyType()), convertToParquetType("value", mapType.getValueType()));
        case ROW:
            RowType rowType = (RowType) type;
            return new GroupType(repetition, name, convertToParquetTypes(rowType));
        default:
            throw new UnsupportedOperationException("Unsupported type: " + type);
    }
}
Also used : ArrayType(org.apache.flink.table.types.logical.ArrayType) GroupType(org.apache.parquet.schema.GroupType) DecimalType(org.apache.flink.table.types.logical.DecimalType) RowType(org.apache.flink.table.types.logical.RowType) MapType(org.apache.flink.table.types.logical.MapType)

Example 12 with MapType

use of org.apache.flink.table.types.logical.MapType in project flink by apache.

the class OrcFileSystemITCase method initNestedTypesFile.

private String initNestedTypesFile(List<RowData> data) throws Exception {
    LogicalType[] fieldTypes = new LogicalType[4];
    fieldTypes[0] = new VarCharType();
    fieldTypes[1] = new IntType();
    List<RowType.RowField> arrayRowFieldList = Collections.singletonList(new RowType.RowField("_col2_col0", new VarCharType()));
    fieldTypes[2] = new ArrayType(new RowType(arrayRowFieldList));
    List<RowType.RowField> mapRowFieldList = Arrays.asList(new RowType.RowField("_col3_col0", new VarCharType()), new RowType.RowField("_col3_col1", new TimestampType()));
    fieldTypes[3] = new MapType(new VarCharType(), new RowType(mapRowFieldList));
    String schema = "struct<_col0:string,_col1:int,_col2:array<struct<_col2_col0:string>>," + "_col3:map<string,struct<_col3_col0:string,_col3_col1:timestamp>>>";
    File outDir = TEMPORARY_FOLDER.newFolder();
    Properties writerProps = new Properties();
    writerProps.setProperty("orc.compress", "LZ4");
    final OrcBulkWriterFactory<RowData> writer = new OrcBulkWriterFactory<>(new RowDataVectorizer(schema, fieldTypes), writerProps, new Configuration());
    StreamingFileSink<RowData> sink = StreamingFileSink.forBulkFormat(new org.apache.flink.core.fs.Path(outDir.toURI()), writer).withBucketCheckInterval(10000).build();
    try (OneInputStreamOperatorTestHarness<RowData, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink), 1, 1, 0)) {
        testHarness.setup();
        testHarness.open();
        int time = 0;
        for (final RowData record : data) {
            testHarness.processElement(record, ++time);
        }
        testHarness.snapshot(1, ++time);
        testHarness.notifyOfCompletedCheckpoint(1);
    }
    return outDir.getAbsolutePath();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) LogicalType(org.apache.flink.table.types.logical.LogicalType) RowType(org.apache.flink.table.types.logical.RowType) Properties(java.util.Properties) MapType(org.apache.flink.table.types.logical.MapType) IntType(org.apache.flink.table.types.logical.IntType) ArrayType(org.apache.flink.table.types.logical.ArrayType) OrcBulkWriterFactory(org.apache.flink.orc.writer.OrcBulkWriterFactory) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) RowDataVectorizer(org.apache.flink.orc.vector.RowDataVectorizer) TimestampType(org.apache.flink.table.types.logical.TimestampType) VarCharType(org.apache.flink.table.types.logical.VarCharType) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) OrcFile(org.apache.orc.OrcFile) File(java.io.File)

Example 13 with MapType

use of org.apache.flink.table.types.logical.MapType in project flink by apache.

the class PythonBridgeUtils method getPickledBytesFromJavaObject.

private static Object getPickledBytesFromJavaObject(Object obj, LogicalType dataType) throws IOException {
    Pickler pickler = new Pickler();
    initialize();
    if (obj == null) {
        return new byte[0];
    } else {
        if (dataType instanceof DateType) {
            long time;
            if (obj instanceof LocalDate) {
                time = ((LocalDate) (obj)).toEpochDay();
            } else {
                time = ((Date) obj).toLocalDate().toEpochDay();
            }
            return pickler.dumps(time);
        } else if (dataType instanceof TimeType) {
            long time;
            if (obj instanceof LocalTime) {
                time = ((LocalTime) obj).toNanoOfDay();
            } else {
                time = ((Time) obj).toLocalTime().toNanoOfDay();
            }
            time = time / 1000;
            return pickler.dumps(time);
        } else if (dataType instanceof TimestampType) {
            if (obj instanceof LocalDateTime) {
                return pickler.dumps(Timestamp.valueOf((LocalDateTime) obj));
            } else {
                return pickler.dumps(obj);
            }
        } else if (dataType instanceof RowType) {
            Row tmpRow = (Row) obj;
            LogicalType[] tmpRowFieldTypes = ((RowType) dataType).getChildren().toArray(new LogicalType[0]);
            List<Object> rowFieldBytes = new ArrayList<>(tmpRow.getArity() + 1);
            rowFieldBytes.add(new byte[] { tmpRow.getKind().toByteValue() });
            for (int i = 0; i < tmpRow.getArity(); i++) {
                rowFieldBytes.add(getPickledBytesFromJavaObject(tmpRow.getField(i), tmpRowFieldTypes[i]));
            }
            return rowFieldBytes;
        } else if (dataType instanceof MapType) {
            List<List<Object>> serializedMapKV = new ArrayList<>(2);
            MapType mapType = (MapType) dataType;
            Map<Object, Object> mapObj = (Map) obj;
            List<Object> keyBytesList = new ArrayList<>(mapObj.size());
            List<Object> valueBytesList = new ArrayList<>(mapObj.size());
            for (Map.Entry entry : mapObj.entrySet()) {
                keyBytesList.add(getPickledBytesFromJavaObject(entry.getKey(), mapType.getKeyType()));
                valueBytesList.add(getPickledBytesFromJavaObject(entry.getValue(), mapType.getValueType()));
            }
            serializedMapKV.add(keyBytesList);
            serializedMapKV.add(valueBytesList);
            return pickler.dumps(serializedMapKV);
        } else if (dataType instanceof ArrayType) {
            Object[] objects = (Object[]) obj;
            List<Object> serializedElements = new ArrayList<>(objects.length);
            ArrayType arrayType = (ArrayType) dataType;
            LogicalType elementType = arrayType.getElementType();
            for (Object object : objects) {
                serializedElements.add(getPickledBytesFromJavaObject(object, elementType));
            }
            return pickler.dumps(serializedElements);
        }
        if (dataType instanceof FloatType) {
            return pickler.dumps(String.valueOf(obj));
        } else {
            return pickler.dumps(obj);
        }
    }
}
Also used : LocalDateTime(java.time.LocalDateTime) LocalTime(java.time.LocalTime) ArrayList(java.util.ArrayList) RowType(org.apache.flink.table.types.logical.RowType) LogicalType(org.apache.flink.table.types.logical.LogicalType) LocalDate(java.time.LocalDate) Date(java.sql.Date) LocalDate(java.time.LocalDate) MapType(org.apache.flink.table.types.logical.MapType) TimeType(org.apache.flink.table.types.logical.TimeType) FloatType(org.apache.flink.table.types.logical.FloatType) ArrayType(org.apache.flink.table.types.logical.ArrayType) TimestampType(org.apache.flink.table.types.logical.TimestampType) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) Row(org.apache.flink.types.Row) Pickler(net.razorvine.pickle.Pickler) DateType(org.apache.flink.table.types.logical.DateType) Map(java.util.Map)

Example 14 with MapType

use of org.apache.flink.table.types.logical.MapType in project flink by apache.

the class DataTypes method MAP.

/**
 * Data type of an associative array that maps keys (including {@code NULL}) to values
 * (including {@code NULL}). A map cannot contain duplicate keys; each key can map to at most
 * one value.
 *
 * <p>There is no restriction of key types; it is the responsibility of the user to ensure
 * uniqueness. The map type is an extension to the SQL standard.
 *
 * @see MapType
 */
public static DataType MAP(DataType keyDataType, DataType valueDataType) {
    Preconditions.checkNotNull(keyDataType, "Key data type must not be null.");
    Preconditions.checkNotNull(valueDataType, "Value data type must not be null.");
    return new KeyValueDataType(new MapType(keyDataType.getLogicalType(), valueDataType.getLogicalType()), keyDataType, valueDataType);
}
Also used : KeyValueDataType(org.apache.flink.table.types.KeyValueDataType) MapType(org.apache.flink.table.types.logical.MapType)

Example 15 with MapType

use of org.apache.flink.table.types.logical.MapType in project flink by apache.

the class DataTypeJsonDeserializer method deserializeClass.

private static DataType deserializeClass(LogicalType logicalType, @Nullable JsonNode parentNode, SerdeContext serdeContext) {
    if (parentNode == null) {
        return DataTypes.of(logicalType);
    }
    final DataType dataType;
    switch(logicalType.getTypeRoot()) {
        case ARRAY:
        case MULTISET:
            final DataType elementDataType = deserializeClass(logicalType.getChildren().get(0), parentNode.get(FIELD_NAME_ELEMENT_CLASS), serdeContext);
            dataType = new CollectionDataType(logicalType, elementDataType);
            break;
        case MAP:
            final MapType mapType = (MapType) logicalType;
            final DataType keyDataType = deserializeClass(mapType.getKeyType(), parentNode.get(FIELD_NAME_KEY_CLASS), serdeContext);
            final DataType valueDataType = deserializeClass(mapType.getValueType(), parentNode.get(FIELD_NAME_VALUE_CLASS), serdeContext);
            dataType = new KeyValueDataType(mapType, keyDataType, valueDataType);
            break;
        case ROW:
        case STRUCTURED_TYPE:
            final List<String> fieldNames = LogicalTypeChecks.getFieldNames(logicalType);
            final List<LogicalType> fieldTypes = LogicalTypeChecks.getFieldTypes(logicalType);
            final ArrayNode fieldNodes = (ArrayNode) parentNode.get(FIELD_NAME_FIELDS);
            final Map<String, JsonNode> fieldNodesByName = new HashMap<>();
            if (fieldNodes != null) {
                fieldNodes.forEach(fieldNode -> fieldNodesByName.put(fieldNode.get(FIELD_NAME_FIELD_NAME).asText(), fieldNode));
            }
            final List<DataType> fieldDataTypes = IntStream.range(0, fieldNames.size()).mapToObj(i -> {
                final String fieldName = fieldNames.get(i);
                final LogicalType fieldType = fieldTypes.get(i);
                return deserializeClass(fieldType, fieldNodesByName.get(fieldName), serdeContext);
            }).collect(Collectors.toList());
            dataType = new FieldsDataType(logicalType, fieldDataTypes);
            break;
        case DISTINCT_TYPE:
            final DistinctType distinctType = (DistinctType) logicalType;
            dataType = deserializeClass(distinctType.getSourceType(), parentNode, serdeContext);
            break;
        default:
            dataType = DataTypes.of(logicalType);
    }
    if (!parentNode.has(FIELD_NAME_CONVERSION_CLASS)) {
        return dataType;
    }
    final Class<?> conversionClass = loadClass(parentNode.get(FIELD_NAME_CONVERSION_CLASS).asText(), serdeContext, String.format("conversion class of data type '%s'", dataType));
    return dataType.bridgedTo(conversionClass);
}
Also used : IntStream(java.util.stream.IntStream) FIELD_NAME_ELEMENT_CLASS(org.apache.flink.table.planner.plan.nodes.exec.serde.DataTypeJsonSerializer.FIELD_NAME_ELEMENT_CLASS) DataType(org.apache.flink.table.types.DataType) KeyValueDataType(org.apache.flink.table.types.KeyValueDataType) FIELD_NAME_VALUE_CLASS(org.apache.flink.table.planner.plan.nodes.exec.serde.DataTypeJsonSerializer.FIELD_NAME_VALUE_CLASS) JsonParser(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonParser) HashMap(java.util.HashMap) JsonNode(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode) MapType(org.apache.flink.table.types.logical.MapType) ArrayNode(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ArrayNode) DeserializationContext(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.DeserializationContext) FIELD_NAME_FIELD_NAME(org.apache.flink.table.planner.plan.nodes.exec.serde.DataTypeJsonSerializer.FIELD_NAME_FIELD_NAME) FIELD_NAME_CONVERSION_CLASS(org.apache.flink.table.planner.plan.nodes.exec.serde.DataTypeJsonSerializer.FIELD_NAME_CONVERSION_CLASS) FIELD_NAME_FIELDS(org.apache.flink.table.planner.plan.nodes.exec.serde.DataTypeJsonSerializer.FIELD_NAME_FIELDS) FieldsDataType(org.apache.flink.table.types.FieldsDataType) Map(java.util.Map) FIELD_NAME_KEY_CLASS(org.apache.flink.table.planner.plan.nodes.exec.serde.DataTypeJsonSerializer.FIELD_NAME_KEY_CLASS) Nullable(javax.annotation.Nullable) DataTypes(org.apache.flink.table.api.DataTypes) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) List(java.util.List) FIELD_NAME_TYPE(org.apache.flink.table.planner.plan.nodes.exec.serde.DataTypeJsonSerializer.FIELD_NAME_TYPE) CollectionDataType(org.apache.flink.table.types.CollectionDataType) DistinctType(org.apache.flink.table.types.logical.DistinctType) LogicalType(org.apache.flink.table.types.logical.LogicalType) JsonSerdeUtil.loadClass(org.apache.flink.table.planner.plan.nodes.exec.serde.JsonSerdeUtil.loadClass) Internal(org.apache.flink.annotation.Internal) StdDeserializer(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.deser.std.StdDeserializer) LogicalTypeChecks(org.apache.flink.table.types.logical.utils.LogicalTypeChecks) FieldsDataType(org.apache.flink.table.types.FieldsDataType) HashMap(java.util.HashMap) CollectionDataType(org.apache.flink.table.types.CollectionDataType) KeyValueDataType(org.apache.flink.table.types.KeyValueDataType) LogicalType(org.apache.flink.table.types.logical.LogicalType) JsonNode(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode) MapType(org.apache.flink.table.types.logical.MapType) DistinctType(org.apache.flink.table.types.logical.DistinctType) DataType(org.apache.flink.table.types.DataType) KeyValueDataType(org.apache.flink.table.types.KeyValueDataType) FieldsDataType(org.apache.flink.table.types.FieldsDataType) CollectionDataType(org.apache.flink.table.types.CollectionDataType) ArrayNode(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ArrayNode)

Aggregations

MapType (org.apache.flink.table.types.logical.MapType)17 ArrayType (org.apache.flink.table.types.logical.ArrayType)9 LogicalType (org.apache.flink.table.types.logical.LogicalType)8 IntType (org.apache.flink.table.types.logical.IntType)7 RowType (org.apache.flink.table.types.logical.RowType)7 TimestampType (org.apache.flink.table.types.logical.TimestampType)5 VarCharType (org.apache.flink.table.types.logical.VarCharType)5 Map (java.util.Map)4 GenericRowData (org.apache.flink.table.data.GenericRowData)4 DataType (org.apache.flink.table.types.DataType)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 List (java.util.List)3 RowData (org.apache.flink.table.data.RowData)3 KeyValueDataType (org.apache.flink.table.types.KeyValueDataType)3 DecimalType (org.apache.flink.table.types.logical.DecimalType)3 MultisetType (org.apache.flink.table.types.logical.MultisetType)3 LocalDateTime (java.time.LocalDateTime)2 Internal (org.apache.flink.annotation.Internal)2 JsonNode (org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode)2