Search in sources :

Example 6 with DeserializationSchema

use of org.apache.flink.api.common.serialization.DeserializationSchema in project flink by apache.

the class KafkaDynamicTableFactory method createDynamicTableSource.

@Override
public DynamicTableSource createDynamicTableSource(Context context) {
    final TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context);
    final Optional<DecodingFormat<DeserializationSchema<RowData>>> keyDecodingFormat = getKeyDecodingFormat(helper);
    final DecodingFormat<DeserializationSchema<RowData>> valueDecodingFormat = getValueDecodingFormat(helper);
    helper.validateExcept(PROPERTIES_PREFIX);
    final ReadableConfig tableOptions = helper.getOptions();
    validateTableSourceOptions(tableOptions);
    validatePKConstraints(context.getObjectIdentifier(), context.getPrimaryKeyIndexes(), context.getCatalogTable().getOptions(), valueDecodingFormat);
    final StartupOptions startupOptions = getStartupOptions(tableOptions);
    final Properties properties = getKafkaProperties(context.getCatalogTable().getOptions());
    // add topic-partition discovery
    final Optional<Long> partitionDiscoveryInterval = tableOptions.getOptional(SCAN_TOPIC_PARTITION_DISCOVERY).map(Duration::toMillis);
    properties.setProperty(KafkaSourceOptions.PARTITION_DISCOVERY_INTERVAL_MS.key(), partitionDiscoveryInterval.orElse(-1L).toString());
    final DataType physicalDataType = context.getPhysicalRowDataType();
    final int[] keyProjection = createKeyFormatProjection(tableOptions, physicalDataType);
    final int[] valueProjection = createValueFormatProjection(tableOptions, physicalDataType);
    final String keyPrefix = tableOptions.getOptional(KEY_FIELDS_PREFIX).orElse(null);
    return createKafkaTableSource(physicalDataType, keyDecodingFormat.orElse(null), valueDecodingFormat, keyProjection, valueProjection, keyPrefix, getSourceTopics(tableOptions), getSourceTopicPattern(tableOptions), properties, startupOptions.startupMode, startupOptions.specificOffsets, startupOptions.startupTimestampMillis, context.getObjectIdentifier().asSummaryString());
}
Also used : DecodingFormat(org.apache.flink.table.connector.format.DecodingFormat) TableFactoryHelper(org.apache.flink.table.factories.FactoryUtil.TableFactoryHelper) Duration(java.time.Duration) KafkaConnectorOptionsUtil.getKafkaProperties(org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptionsUtil.getKafkaProperties) Properties(java.util.Properties) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema) RowData(org.apache.flink.table.data.RowData) ReadableConfig(org.apache.flink.configuration.ReadableConfig) DataType(org.apache.flink.table.types.DataType) KafkaConnectorOptionsUtil.getStartupOptions(org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptionsUtil.getStartupOptions) StartupOptions(org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptionsUtil.StartupOptions)

Example 7 with DeserializationSchema

use of org.apache.flink.api.common.serialization.DeserializationSchema in project flink by apache.

the class KafkaDynamicTableFactoryTest method testTableSource.

@Test
public void testTableSource() {
    final DynamicTableSource actualSource = createTableSource(SCHEMA, getBasicSourceOptions());
    final KafkaDynamicSource actualKafkaSource = (KafkaDynamicSource) actualSource;
    final Map<KafkaTopicPartition, Long> specificOffsets = new HashMap<>();
    specificOffsets.put(new KafkaTopicPartition(TOPIC, PARTITION_0), OFFSET_0);
    specificOffsets.put(new KafkaTopicPartition(TOPIC, PARTITION_1), OFFSET_1);
    final DecodingFormat<DeserializationSchema<RowData>> valueDecodingFormat = new DecodingFormatMock(",", true);
    // Test scan source equals
    final KafkaDynamicSource expectedKafkaSource = createExpectedScanSource(SCHEMA_DATA_TYPE, null, valueDecodingFormat, new int[0], new int[] { 0, 1, 2 }, null, Collections.singletonList(TOPIC), null, KAFKA_SOURCE_PROPERTIES, StartupMode.SPECIFIC_OFFSETS, specificOffsets, 0);
    assertThat(actualKafkaSource).isEqualTo(expectedKafkaSource);
    ScanTableSource.ScanRuntimeProvider provider = actualKafkaSource.getScanRuntimeProvider(ScanRuntimeProviderContext.INSTANCE);
    assertKafkaSource(provider);
}
Also used : ScanTableSource(org.apache.flink.table.connector.source.ScanTableSource) HashMap(java.util.HashMap) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) DecodingFormatMock(org.apache.flink.table.factories.TestFormatFactory.DecodingFormatMock) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 8 with DeserializationSchema

use of org.apache.flink.api.common.serialization.DeserializationSchema in project flink by apache.

the class KafkaDynamicTableFactoryTest method testTableSourceWithPattern.

@Test
public void testTableSourceWithPattern() {
    final Map<String, String> modifiedOptions = getModifiedOptions(getBasicSourceOptions(), options -> {
        options.remove("topic");
        options.put("topic-pattern", TOPIC_REGEX);
        options.put("scan.startup.mode", ScanStartupMode.EARLIEST_OFFSET.toString());
        options.remove("scan.startup.specific-offsets");
    });
    final DynamicTableSource actualSource = createTableSource(SCHEMA, modifiedOptions);
    final Map<KafkaTopicPartition, Long> specificOffsets = new HashMap<>();
    DecodingFormat<DeserializationSchema<RowData>> valueDecodingFormat = new DecodingFormatMock(",", true);
    // Test scan source equals
    final KafkaDynamicSource expectedKafkaSource = createExpectedScanSource(SCHEMA_DATA_TYPE, null, valueDecodingFormat, new int[0], new int[] { 0, 1, 2 }, null, null, Pattern.compile(TOPIC_REGEX), KAFKA_SOURCE_PROPERTIES, StartupMode.EARLIEST, specificOffsets, 0);
    final KafkaDynamicSource actualKafkaSource = (KafkaDynamicSource) actualSource;
    assertThat(actualKafkaSource).isEqualTo(expectedKafkaSource);
    ScanTableSource.ScanRuntimeProvider provider = actualKafkaSource.getScanRuntimeProvider(ScanRuntimeProviderContext.INSTANCE);
    assertKafkaSource(provider);
}
Also used : HashMap(java.util.HashMap) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) DecodingFormatMock(org.apache.flink.table.factories.TestFormatFactory.DecodingFormatMock) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema) ScanTableSource(org.apache.flink.table.connector.source.ScanTableSource) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 9 with DeserializationSchema

use of org.apache.flink.api.common.serialization.DeserializationSchema in project flink by apache.

the class MaxwellJsonDecodingFormat method createRuntimeDecoder.

@Override
public DeserializationSchema<RowData> createRuntimeDecoder(DynamicTableSource.Context context, DataType physicalDataType, int[][] projections) {
    physicalDataType = Projection.of(projections).project(physicalDataType);
    final List<ReadableMetadata> readableMetadata = metadataKeys.stream().map(k -> Stream.of(ReadableMetadata.values()).filter(rm -> rm.key.equals(k)).findFirst().orElseThrow(() -> new IllegalStateException(String.format("Could not find the requested metadata key: %s", k)))).collect(Collectors.toList());
    final List<DataTypes.Field> metadataFields = readableMetadata.stream().map(m -> DataTypes.FIELD(m.key, m.dataType)).collect(Collectors.toList());
    final DataType producedDataType = DataTypeUtils.appendRowFields(physicalDataType, metadataFields);
    final TypeInformation<RowData> producedTypeInfo = context.createTypeInformation(producedDataType);
    return new MaxwellJsonDeserializationSchema(physicalDataType, readableMetadata, producedTypeInfo, ignoreParseErrors, timestampFormat);
}
Also used : DataType(org.apache.flink.table.types.DataType) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) RowData(org.apache.flink.table.data.RowData) TimestampData(org.apache.flink.table.data.TimestampData) ChangelogMode(org.apache.flink.table.connector.ChangelogMode) DataTypes(org.apache.flink.table.api.DataTypes) TimestampFormat(org.apache.flink.formats.common.TimestampFormat) ProjectableDecodingFormat(org.apache.flink.table.connector.format.ProjectableDecodingFormat) Collectors(java.util.stream.Collectors) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema) LinkedHashMap(java.util.LinkedHashMap) DecodingFormat(org.apache.flink.table.connector.format.DecodingFormat) List(java.util.List) GenericRowData(org.apache.flink.table.data.GenericRowData) Stream(java.util.stream.Stream) RowKind(org.apache.flink.types.RowKind) Map(java.util.Map) MetadataConverter(org.apache.flink.formats.json.maxwell.MaxwellJsonDeserializationSchema.MetadataConverter) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Collections(java.util.Collections) Projection(org.apache.flink.table.connector.Projection) DataTypeUtils(org.apache.flink.table.types.utils.DataTypeUtils) RowData(org.apache.flink.table.data.RowData) GenericRowData(org.apache.flink.table.data.GenericRowData) DataType(org.apache.flink.table.types.DataType)

Example 10 with DeserializationSchema

use of org.apache.flink.api.common.serialization.DeserializationSchema in project flink by apache.

the class CanalJsonDecodingFormat method createRuntimeDecoder.

@Override
public DeserializationSchema<RowData> createRuntimeDecoder(DynamicTableSource.Context context, DataType physicalDataType, int[][] projections) {
    physicalDataType = Projection.of(projections).project(physicalDataType);
    final List<ReadableMetadata> readableMetadata = metadataKeys.stream().map(k -> Stream.of(ReadableMetadata.values()).filter(rm -> rm.key.equals(k)).findFirst().orElseThrow(IllegalStateException::new)).collect(Collectors.toList());
    final List<DataTypes.Field> metadataFields = readableMetadata.stream().map(m -> DataTypes.FIELD(m.key, m.dataType)).collect(Collectors.toList());
    final DataType producedDataType = DataTypeUtils.appendRowFields(physicalDataType, metadataFields);
    final TypeInformation<RowData> producedTypeInfo = context.createTypeInformation(producedDataType);
    return CanalJsonDeserializationSchema.builder(physicalDataType, readableMetadata, producedTypeInfo).setDatabase(database).setTable(table).setIgnoreParseErrors(ignoreParseErrors).setTimestampFormat(timestampFormat).build();
}
Also used : MetadataConverter(org.apache.flink.formats.json.canal.CanalJsonDeserializationSchema.MetadataConverter) DataType(org.apache.flink.table.types.DataType) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) RowData(org.apache.flink.table.data.RowData) TimestampData(org.apache.flink.table.data.TimestampData) ChangelogMode(org.apache.flink.table.connector.ChangelogMode) DataTypes(org.apache.flink.table.api.DataTypes) TimestampFormat(org.apache.flink.formats.common.TimestampFormat) ProjectableDecodingFormat(org.apache.flink.table.connector.format.ProjectableDecodingFormat) Collectors(java.util.stream.Collectors) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema) LinkedHashMap(java.util.LinkedHashMap) DecodingFormat(org.apache.flink.table.connector.format.DecodingFormat) List(java.util.List) GenericRowData(org.apache.flink.table.data.GenericRowData) Stream(java.util.stream.Stream) RowKind(org.apache.flink.types.RowKind) Map(java.util.Map) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Collections(java.util.Collections) Nullable(javax.annotation.Nullable) Projection(org.apache.flink.table.connector.Projection) DataTypeUtils(org.apache.flink.table.types.utils.DataTypeUtils) RowData(org.apache.flink.table.data.RowData) GenericRowData(org.apache.flink.table.data.GenericRowData) DataType(org.apache.flink.table.types.DataType)

Aggregations

DeserializationSchema (org.apache.flink.api.common.serialization.DeserializationSchema)17 RowData (org.apache.flink.table.data.RowData)9 DataType (org.apache.flink.table.types.DataType)9 DecodingFormat (org.apache.flink.table.connector.format.DecodingFormat)7 DynamicTableSource (org.apache.flink.table.connector.source.DynamicTableSource)7 Collections (java.util.Collections)6 List (java.util.List)6 Map (java.util.Map)6 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)6 LinkedHashMap (java.util.LinkedHashMap)5 Properties (java.util.Properties)5 Collectors (java.util.stream.Collectors)5 Stream (java.util.stream.Stream)5 ReadableConfig (org.apache.flink.configuration.ReadableConfig)5 DataTypes (org.apache.flink.table.api.DataTypes)5 ChangelogMode (org.apache.flink.table.connector.ChangelogMode)5 RowKind (org.apache.flink.types.RowKind)5 FactoryUtil (org.apache.flink.table.factories.FactoryUtil)4 IOException (java.io.IOException)3 HashMap (java.util.HashMap)3