Search in sources :

Example 31 with DataType

use of org.apache.flink.table.types.DataType in project flink by apache.

the class DebeziumJsonDecodingFormat method createRuntimeDecoder.

@Override
public DeserializationSchema<RowData> createRuntimeDecoder(DynamicTableSource.Context context, DataType physicalDataType, int[][] projections) {
    physicalDataType = Projection.of(projections).project(physicalDataType);
    final List<ReadableMetadata> readableMetadata = metadataKeys.stream().map(k -> Stream.of(ReadableMetadata.values()).filter(rm -> rm.key.equals(k)).findFirst().orElseThrow(IllegalStateException::new)).collect(Collectors.toList());
    final List<DataTypes.Field> metadataFields = readableMetadata.stream().map(m -> DataTypes.FIELD(m.key, m.dataType)).collect(Collectors.toList());
    final DataType producedDataType = DataTypeUtils.appendRowFields(physicalDataType, metadataFields);
    final TypeInformation<RowData> producedTypeInfo = context.createTypeInformation(producedDataType);
    return new DebeziumJsonDeserializationSchema(physicalDataType, readableMetadata, producedTypeInfo, schemaInclude, ignoreParseErrors, timestampFormat);
}
Also used : DataType(org.apache.flink.table.types.DataType) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) RowData(org.apache.flink.table.data.RowData) TimestampData(org.apache.flink.table.data.TimestampData) ChangelogMode(org.apache.flink.table.connector.ChangelogMode) DataTypes(org.apache.flink.table.api.DataTypes) TimestampFormat(org.apache.flink.formats.common.TimestampFormat) ProjectableDecodingFormat(org.apache.flink.table.connector.format.ProjectableDecodingFormat) Collectors(java.util.stream.Collectors) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema) LinkedHashMap(java.util.LinkedHashMap) DecodingFormat(org.apache.flink.table.connector.format.DecodingFormat) StringData(org.apache.flink.table.data.StringData) List(java.util.List) GenericRowData(org.apache.flink.table.data.GenericRowData) Stream(java.util.stream.Stream) RowKind(org.apache.flink.types.RowKind) GenericMapData(org.apache.flink.table.data.GenericMapData) Map(java.util.Map) MetadataConverter(org.apache.flink.formats.json.debezium.DebeziumJsonDeserializationSchema.MetadataConverter) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Collections(java.util.Collections) Projection(org.apache.flink.table.connector.Projection) DataTypeUtils(org.apache.flink.table.types.utils.DataTypeUtils) RowData(org.apache.flink.table.data.RowData) GenericRowData(org.apache.flink.table.data.GenericRowData) DataType(org.apache.flink.table.types.DataType)

Example 32 with DataType

use of org.apache.flink.table.types.DataType in project flink by apache.

the class DebeziumJsonFormatFactory method createEncodingFormat.

@Override
public EncodingFormat<SerializationSchema<RowData>> createEncodingFormat(DynamicTableFactory.Context context, ReadableConfig formatOptions) {
    FactoryUtil.validateFactoryOptions(this, formatOptions);
    validateEncodingFormatOptions(formatOptions);
    TimestampFormat timestampFormat = JsonFormatOptionsUtil.getTimestampFormat(formatOptions);
    JsonFormatOptions.MapNullKeyMode mapNullKeyMode = JsonFormatOptionsUtil.getMapNullKeyMode(formatOptions);
    String mapNullKeyLiteral = formatOptions.get(JSON_MAP_NULL_KEY_LITERAL);
    final boolean encodeDecimalAsPlainNumber = formatOptions.get(ENCODE_DECIMAL_AS_PLAIN_NUMBER);
    return new EncodingFormat<SerializationSchema<RowData>>() {

        @Override
        public ChangelogMode getChangelogMode() {
            return ChangelogMode.newBuilder().addContainedKind(RowKind.INSERT).addContainedKind(RowKind.UPDATE_BEFORE).addContainedKind(RowKind.UPDATE_AFTER).addContainedKind(RowKind.DELETE).build();
        }

        @Override
        public SerializationSchema<RowData> createRuntimeEncoder(DynamicTableSink.Context context, DataType consumedDataType) {
            final RowType rowType = (RowType) consumedDataType.getLogicalType();
            return new DebeziumJsonSerializationSchema(rowType, timestampFormat, mapNullKeyMode, mapNullKeyLiteral, encodeDecimalAsPlainNumber);
        }
    };
}
Also used : EncodingFormat(org.apache.flink.table.connector.format.EncodingFormat) JsonFormatOptions(org.apache.flink.formats.json.JsonFormatOptions) RowData(org.apache.flink.table.data.RowData) DataType(org.apache.flink.table.types.DataType) RowType(org.apache.flink.table.types.logical.RowType) TimestampFormat(org.apache.flink.formats.common.TimestampFormat)

Example 33 with DataType

use of org.apache.flink.table.types.DataType in project flink by apache.

the class MaxwellJsonDeserializationSchema method createJsonRowType.

// --------------------------------------------------------------------------------------------
private static RowType createJsonRowType(DataType physicalDataType, List<ReadableMetadata> readableMetadata) {
    DataType root = DataTypes.ROW(DataTypes.FIELD("data", physicalDataType), DataTypes.FIELD("old", physicalDataType), DataTypes.FIELD("type", DataTypes.STRING()));
    // append fields that are required for reading metadata in the root
    final List<DataTypes.Field> rootMetadataFields = readableMetadata.stream().map(m -> m.requiredJsonField).distinct().collect(Collectors.toList());
    return (RowType) DataTypeUtils.appendRowFields(root, rootMetadataFields).getLogicalType();
}
Also used : DataType(org.apache.flink.table.types.DataType) RowType(org.apache.flink.table.types.logical.RowType)

Example 34 with DataType

use of org.apache.flink.table.types.DataType in project flink by apache.

the class FileInfoExtractorBulkFormat method wrapReader.

private Reader<RowData> wrapReader(Reader<RowData> superReader, FileSourceSplit split) {
    // Fill the metadata + partition columns row
    final GenericRowData fileInfoRowData = new GenericRowData(metadataColumnsFunctions.size() + partitionColumnTypes.size());
    int fileInfoRowIndex = 0;
    for (; fileInfoRowIndex < metadataColumnsFunctions.size(); fileInfoRowIndex++) {
        fileInfoRowData.setField(fileInfoRowIndex, metadataColumnsFunctions.get(fileInfoRowIndex).getValue(split));
    }
    if (!partitionColumnTypes.isEmpty()) {
        final LinkedHashMap<String, String> partitionSpec = PartitionPathUtils.extractPartitionSpecFromPath(split.path());
        for (int partitionFieldIndex = 0; fileInfoRowIndex < fileInfoRowData.getArity(); fileInfoRowIndex++, partitionFieldIndex++) {
            final String fieldName = partitionColumnTypes.get(partitionFieldIndex).getKey();
            final DataType fieldType = partitionColumnTypes.get(partitionFieldIndex).getValue();
            if (!partitionSpec.containsKey(fieldName)) {
                throw new RuntimeException("Cannot find the partition value from path for partition: " + fieldName);
            }
            String valueStr = partitionSpec.get(fieldName);
            valueStr = valueStr.equals(defaultPartName) ? null : valueStr;
            fileInfoRowData.setField(fileInfoRowIndex, PartitionPathUtils.convertStringToInternalValue(valueStr, fieldType));
        }
    }
    // This row is going to be reused for every record
    final EnrichedRowData producedRowData = new EnrichedRowData(fileInfoRowData, this.extendedRowIndexMapping);
    return RecordMapperWrapperRecordIterator.wrapReader(superReader, physicalRowData -> {
        producedRowData.replaceMutableRow(physicalRowData);
        return producedRowData;
    });
}
Also used : GenericRowData(org.apache.flink.table.data.GenericRowData) DataType(org.apache.flink.table.types.DataType)

Example 35 with DataType

use of org.apache.flink.table.types.DataType in project flink by apache.

the class HiveWriterFactory method checkInitialize.

private void checkInitialize() throws Exception {
    if (initialized) {
        return;
    }
    JobConf jobConf = confWrapper.conf();
    Object serdeLib = Class.forName(serDeInfo.deserializeValue().getSerializationLib()).newInstance();
    Preconditions.checkArgument(serdeLib instanceof Serializer && serdeLib instanceof Deserializer, "Expect a SerDe lib implementing both Serializer and Deserializer, but actually got " + serdeLib.getClass().getName());
    this.recordSerDe = (Serializer) serdeLib;
    ReflectionUtils.setConf(recordSerDe, jobConf);
    // TODO: support partition properties, for now assume they're same as table properties
    SerDeUtils.initializeSerDe((Deserializer) recordSerDe, jobConf, tableProperties, null);
    this.formatFields = allColumns.length - partitionColumns.length;
    this.hiveConversions = new HiveObjectConversion[formatFields];
    this.converters = new DataFormatConverter[formatFields];
    List<ObjectInspector> objectInspectors = new ArrayList<>(hiveConversions.length);
    for (int i = 0; i < formatFields; i++) {
        DataType type = allTypes[i];
        ObjectInspector objectInspector = HiveInspectors.getObjectInspector(type);
        objectInspectors.add(objectInspector);
        hiveConversions[i] = HiveInspectors.getConversion(objectInspector, type.getLogicalType(), hiveShim);
        converters[i] = DataFormatConverters.getConverterForDataType(type);
    }
    this.formatInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Arrays.asList(allColumns).subList(0, formatFields), objectInspectors);
    this.initialized = true;
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) ArrayList(java.util.ArrayList) DataType(org.apache.flink.table.types.DataType) JobConf(org.apache.hadoop.mapred.JobConf) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Aggregations

DataType (org.apache.flink.table.types.DataType)260 Test (org.junit.Test)72 RowType (org.apache.flink.table.types.logical.RowType)59 LogicalType (org.apache.flink.table.types.logical.LogicalType)58 RowData (org.apache.flink.table.data.RowData)54 List (java.util.List)38 FieldsDataType (org.apache.flink.table.types.FieldsDataType)32 ValidationException (org.apache.flink.table.api.ValidationException)31 ArrayList (java.util.ArrayList)29 Collectors (java.util.stream.Collectors)24 AtomicDataType (org.apache.flink.table.types.AtomicDataType)24 Map (java.util.Map)23 Internal (org.apache.flink.annotation.Internal)23 TableException (org.apache.flink.table.api.TableException)23 HashMap (java.util.HashMap)22 GenericRowData (org.apache.flink.table.data.GenericRowData)22 Row (org.apache.flink.types.Row)22 TableSchema (org.apache.flink.table.api.TableSchema)20 TypeConversions.fromLogicalToDataType (org.apache.flink.table.types.utils.TypeConversions.fromLogicalToDataType)19 ResolvedSchema (org.apache.flink.table.catalog.ResolvedSchema)18