Search in sources :

Example 1 with DeserializationSchema

use of org.apache.flink.api.common.serialization.DeserializationSchema in project flink by apache.

the class KafkaConsumerTestBase method runCollectingSchemaTest.

/**
 * Test that ensures that DeserializationSchema can emit multiple records via a Collector.
 *
 * @throws Exception
 */
public void runCollectingSchemaTest() throws Exception {
    final int elementCount = 20;
    final String topic = writeSequence("testCollectingSchema", elementCount, 1, 1);
    // read using custom schema
    final StreamExecutionEnvironment env1 = StreamExecutionEnvironment.getExecutionEnvironment();
    env1.setParallelism(1);
    env1.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    Properties props = new Properties();
    props.putAll(standardProps);
    props.putAll(secureProps);
    DataStream<Tuple2<Integer, String>> fromKafka = env1.addSource(kafkaServer.getConsumer(topic, new CollectingDeserializationSchema(elementCount), props).assignTimestampsAndWatermarks(new AscendingTimestampExtractor<Tuple2<Integer, String>>() {

        @Override
        public long extractAscendingTimestamp(Tuple2<Integer, String> element) {
            String string = element.f1;
            return Long.parseLong(string.substring(0, string.length() - 1));
        }
    }));
    fromKafka.keyBy(t -> t.f0).process(new KeyedProcessFunction<Integer, Tuple2<Integer, String>, Void>() {

        private boolean registered = false;

        @Override
        public void processElement(Tuple2<Integer, String> value, Context ctx, Collector<Void> out) throws Exception {
            if (!registered) {
                ctx.timerService().registerEventTimeTimer(elementCount - 2);
                registered = true;
            }
        }

        @Override
        public void onTimer(long timestamp, OnTimerContext ctx, Collector<Void> out) throws Exception {
            throw new SuccessException();
        }
    });
    tryExecute(env1, "Consume " + elementCount + " elements from Kafka");
    deleteTestTopic(topic);
}
Also used : RetryOnException(org.apache.flink.testutils.junit.RetryOnException) Tuple1(org.apache.flink.api.java.tuple.Tuple1) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Tuple2(org.apache.flink.api.java.tuple.Tuple2) OffsetResetStrategy(org.apache.kafka.clients.consumer.OffsetResetStrategy) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) DataOutputViewStreamWrapper(org.apache.flink.core.memory.DataOutputViewStreamWrapper) TestUtils.submitJobAndWaitForResult(org.apache.flink.test.util.TestUtils.submitJobAndWaitForResult) CheckpointListener(org.apache.flink.api.common.state.CheckpointListener) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KafkaSourceBuilder(org.apache.flink.connector.kafka.source.KafkaSourceBuilder) Map(java.util.Map) RichSourceFunction(org.apache.flink.streaming.api.functions.source.RichSourceFunction) ThrottledMapper(org.apache.flink.streaming.connectors.kafka.testutils.ThrottledMapper) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) KafkaDeserializationSchemaWrapper(org.apache.flink.streaming.connectors.kafka.internals.KafkaDeserializationSchemaWrapper) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) Set(java.util.Set) StreamingJobGraphGenerator(org.apache.flink.streaming.api.graph.StreamingJobGraphGenerator) KeyedSerializationSchema(org.apache.flink.streaming.util.serialization.KeyedSerializationSchema) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) ClusterClient(org.apache.flink.client.program.ClusterClient) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Assert.assertFalse(org.junit.Assert.assertFalse) SerializationSchema(org.apache.flink.api.common.serialization.SerializationSchema) JobStatus(org.apache.flink.api.common.JobStatus) DataOutputView(org.apache.flink.core.memory.DataOutputView) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) ArrayList(java.util.ArrayList) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Collector(org.apache.flink.util.Collector) Iterables(org.apache.flink.shaded.guava30.com.google.common.collect.Iterables) ManagementFactory(java.lang.management.ManagementFactory) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) Nullable(javax.annotation.Nullable) Before(org.junit.Before) TestUtils.tryExecute(org.apache.flink.test.util.TestUtils.tryExecute) Properties(java.util.Properties) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) StartupMode(org.apache.flink.streaming.connectors.kafka.config.StartupMode) JobID(org.apache.flink.api.common.JobID) Assert.assertNull(org.junit.Assert.assertNull) Assert(org.junit.Assert) Assert.assertEquals(org.junit.Assert.assertEquals) NotLeaderForPartitionException(org.apache.kafka.common.errors.NotLeaderForPartitionException) AscendingTimestampExtractor(org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor) Date(java.util.Date) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) ExceptionUtils(org.apache.flink.util.ExceptionUtils) Random(java.util.Random) SuccessException(org.apache.flink.test.util.SuccessException) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) ByteArrayInputStream(java.io.ByteArrayInputStream) PartitionValidatingMapper(org.apache.flink.streaming.connectors.kafka.testutils.PartitionValidatingMapper) Assert.fail(org.junit.Assert.fail) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) TopicPartition(org.apache.kafka.common.TopicPartition) DataInputViewStreamWrapper(org.apache.flink.core.memory.DataInputViewStreamWrapper) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) ObjectName(javax.management.ObjectName) UUID(java.util.UUID) RichSinkFunction(org.apache.flink.streaming.api.functions.sink.RichSinkFunction) List(java.util.List) ClusterCommunicationUtils.waitUntilNoJobIsRunning(org.apache.flink.streaming.connectors.kafka.testutils.ClusterCommunicationUtils.waitUntilNoJobIsRunning) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Optional(java.util.Optional) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) OffsetsInitializer(org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer) ClusterCommunicationUtils.waitUntilJobIsRunning(org.apache.flink.streaming.connectors.kafka.testutils.ClusterCommunicationUtils.waitUntilJobIsRunning) HashMap(java.util.HashMap) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) AtomicReference(java.util.concurrent.atomic.AtomicReference) Tuple2FlinkPartitioner(org.apache.flink.streaming.connectors.kafka.testutils.Tuple2FlinkPartitioner) MBeanServer(javax.management.MBeanServer) RichParallelSourceFunction(org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction) DataInputView(org.apache.flink.core.memory.DataInputView) ProducerConfig(org.apache.kafka.clients.producer.ProducerConfig) DataGenerators(org.apache.flink.streaming.connectors.kafka.testutils.DataGenerators) TypeInformationSerializationSchema(org.apache.flink.api.common.serialization.TypeInformationSerializationSchema) TimeoutException(org.apache.kafka.common.errors.TimeoutException) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) Configuration(org.apache.flink.configuration.Configuration) ValidatingExactlyOnceSink(org.apache.flink.streaming.connectors.kafka.testutils.ValidatingExactlyOnceSink) JobCancellationException(org.apache.flink.runtime.client.JobCancellationException) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema) DataStream(org.apache.flink.streaming.api.datastream.DataStream) KafkaSource(org.apache.flink.connector.kafka.source.KafkaSource) ListCheckpointed(org.apache.flink.streaming.api.checkpoint.ListCheckpointed) FailingIdentityMapper(org.apache.flink.streaming.connectors.kafka.testutils.FailingIdentityMapper) ClusterCommunicationUtils.getRunningJobs(org.apache.flink.streaming.connectors.kafka.testutils.ClusterCommunicationUtils.getRunningJobs) TypeInformationKeyValueSerializationSchema(org.apache.flink.streaming.util.serialization.TypeInformationKeyValueSerializationSchema) BitSet(java.util.BitSet) Collections(java.util.Collections) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Properties(java.util.Properties) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) RetryOnException(org.apache.flink.testutils.junit.RetryOnException) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) NotLeaderForPartitionException(org.apache.kafka.common.errors.NotLeaderForPartitionException) SuccessException(org.apache.flink.test.util.SuccessException) TimeoutException(org.apache.kafka.common.errors.TimeoutException) JobCancellationException(org.apache.flink.runtime.client.JobCancellationException) AscendingTimestampExtractor(org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SuccessException(org.apache.flink.test.util.SuccessException) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 2 with DeserializationSchema

use of org.apache.flink.api.common.serialization.DeserializationSchema in project flink by apache.

the class SocketDynamicTableFactory method createDynamicTableSource.

@Override
public DynamicTableSource createDynamicTableSource(Context context) {
    // either implement your custom validation logic here ...
    // or use the provided helper utility
    final FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context);
    // discover a suitable decoding format
    final DecodingFormat<DeserializationSchema<RowData>> decodingFormat = helper.discoverDecodingFormat(DeserializationFormatFactory.class, FactoryUtil.FORMAT);
    // validate all options
    helper.validate();
    // get the validated options
    final ReadableConfig options = helper.getOptions();
    final String hostname = options.get(HOSTNAME);
    final int port = options.get(PORT);
    final byte byteDelimiter = (byte) (int) options.get(BYTE_DELIMITER);
    // derive the produced data type (excluding computed columns) from the catalog table
    final DataType producedDataType = context.getCatalogTable().getResolvedSchema().toPhysicalRowDataType();
    // create and return dynamic table source
    return new SocketDynamicTableSource(hostname, port, byteDelimiter, decodingFormat, producedDataType);
}
Also used : ReadableConfig(org.apache.flink.configuration.ReadableConfig) FactoryUtil(org.apache.flink.table.factories.FactoryUtil) DataType(org.apache.flink.table.types.DataType) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema)

Example 3 with DeserializationSchema

use of org.apache.flink.api.common.serialization.DeserializationSchema in project flink by apache.

the class DebeziumJsonDecodingFormat method createRuntimeDecoder.

@Override
public DeserializationSchema<RowData> createRuntimeDecoder(DynamicTableSource.Context context, DataType physicalDataType, int[][] projections) {
    physicalDataType = Projection.of(projections).project(physicalDataType);
    final List<ReadableMetadata> readableMetadata = metadataKeys.stream().map(k -> Stream.of(ReadableMetadata.values()).filter(rm -> rm.key.equals(k)).findFirst().orElseThrow(IllegalStateException::new)).collect(Collectors.toList());
    final List<DataTypes.Field> metadataFields = readableMetadata.stream().map(m -> DataTypes.FIELD(m.key, m.dataType)).collect(Collectors.toList());
    final DataType producedDataType = DataTypeUtils.appendRowFields(physicalDataType, metadataFields);
    final TypeInformation<RowData> producedTypeInfo = context.createTypeInformation(producedDataType);
    return new DebeziumJsonDeserializationSchema(physicalDataType, readableMetadata, producedTypeInfo, schemaInclude, ignoreParseErrors, timestampFormat);
}
Also used : DataType(org.apache.flink.table.types.DataType) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) RowData(org.apache.flink.table.data.RowData) TimestampData(org.apache.flink.table.data.TimestampData) ChangelogMode(org.apache.flink.table.connector.ChangelogMode) DataTypes(org.apache.flink.table.api.DataTypes) TimestampFormat(org.apache.flink.formats.common.TimestampFormat) ProjectableDecodingFormat(org.apache.flink.table.connector.format.ProjectableDecodingFormat) Collectors(java.util.stream.Collectors) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema) LinkedHashMap(java.util.LinkedHashMap) DecodingFormat(org.apache.flink.table.connector.format.DecodingFormat) StringData(org.apache.flink.table.data.StringData) List(java.util.List) GenericRowData(org.apache.flink.table.data.GenericRowData) Stream(java.util.stream.Stream) RowKind(org.apache.flink.types.RowKind) GenericMapData(org.apache.flink.table.data.GenericMapData) Map(java.util.Map) MetadataConverter(org.apache.flink.formats.json.debezium.DebeziumJsonDeserializationSchema.MetadataConverter) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Collections(java.util.Collections) Projection(org.apache.flink.table.connector.Projection) DataTypeUtils(org.apache.flink.table.types.utils.DataTypeUtils) RowData(org.apache.flink.table.data.RowData) GenericRowData(org.apache.flink.table.data.GenericRowData) DataType(org.apache.flink.table.types.DataType)

Example 4 with DeserializationSchema

use of org.apache.flink.api.common.serialization.DeserializationSchema in project flink by apache.

the class DeserializationSchemaAdapter method createDeserialization.

private DeserializationSchema<RowData> createDeserialization() throws IOException {
    try {
        DeserializationSchema<RowData> deserialization = InstantiationUtil.clone(deserializationSchema);
        deserialization.open(new DeserializationSchema.InitializationContext() {

            @Override
            public MetricGroup getMetricGroup() {
                throw new UnsupportedOperationException("MetricGroup is unsupported in BulkFormat.");
            }

            @Override
            public UserCodeClassLoader getUserCodeClassLoader() {
                return (UserCodeClassLoader) Thread.currentThread().getContextClassLoader();
            }
        });
        return deserialization;
    } catch (Exception e) {
        throw new IOException(e);
    }
}
Also used : UserCodeClassLoader(org.apache.flink.util.UserCodeClassLoader) RowData(org.apache.flink.table.data.RowData) MetricGroup(org.apache.flink.metrics.MetricGroup) IOException(java.io.IOException) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema) IOException(java.io.IOException)

Example 5 with DeserializationSchema

use of org.apache.flink.api.common.serialization.DeserializationSchema in project flink by apache.

the class KinesisDynamicTableFactory method createDynamicTableSource.

@Override
public DynamicTableSource createDynamicTableSource(Context context) {
    FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context);
    ReadableConfig tableOptions = helper.getOptions();
    ResolvedCatalogTable catalogTable = context.getCatalogTable();
    DataType physicalDataType = catalogTable.getResolvedSchema().toPhysicalRowDataType();
    KinesisConnectorOptionsUtil optionsUtils = new KinesisConnectorOptionsUtil(catalogTable.getOptions(), tableOptions);
    // initialize the table format early in order to register its consumedOptionKeys
    // in the TableFactoryHelper, as those are needed for correct option validation
    DecodingFormat<DeserializationSchema<RowData>> decodingFormat = helper.discoverDecodingFormat(DeserializationFormatFactory.class, FORMAT);
    // validate the data types of the table options
    helper.validateExcept(optionsUtils.getNonValidatedPrefixes().toArray(new String[0]));
    Properties properties = optionsUtils.getValidatedSourceConfigurations();
    return new KinesisDynamicSource(physicalDataType, tableOptions.get(STREAM), properties, decodingFormat);
}
Also used : ReadableConfig(org.apache.flink.configuration.ReadableConfig) ResolvedCatalogTable(org.apache.flink.table.catalog.ResolvedCatalogTable) FactoryUtil(org.apache.flink.table.factories.FactoryUtil) DataType(org.apache.flink.table.types.DataType) Properties(java.util.Properties) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema)

Aggregations

DeserializationSchema (org.apache.flink.api.common.serialization.DeserializationSchema)17 RowData (org.apache.flink.table.data.RowData)9 DataType (org.apache.flink.table.types.DataType)9 DecodingFormat (org.apache.flink.table.connector.format.DecodingFormat)7 DynamicTableSource (org.apache.flink.table.connector.source.DynamicTableSource)7 Collections (java.util.Collections)6 List (java.util.List)6 Map (java.util.Map)6 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)6 LinkedHashMap (java.util.LinkedHashMap)5 Properties (java.util.Properties)5 Collectors (java.util.stream.Collectors)5 Stream (java.util.stream.Stream)5 ReadableConfig (org.apache.flink.configuration.ReadableConfig)5 DataTypes (org.apache.flink.table.api.DataTypes)5 ChangelogMode (org.apache.flink.table.connector.ChangelogMode)5 RowKind (org.apache.flink.types.RowKind)5 FactoryUtil (org.apache.flink.table.factories.FactoryUtil)4 IOException (java.io.IOException)3 HashMap (java.util.HashMap)3