Search in sources :

Example 1 with KeyedProcessFunction

use of org.apache.flink.streaming.api.functions.KeyedProcessFunction in project flink by apache.

the class KafkaConsumerTestBase method runCollectingSchemaTest.

/**
 * Test that ensures that DeserializationSchema can emit multiple records via a Collector.
 *
 * @throws Exception
 */
public void runCollectingSchemaTest() throws Exception {
    final int elementCount = 20;
    final String topic = writeSequence("testCollectingSchema", elementCount, 1, 1);
    // read using custom schema
    final StreamExecutionEnvironment env1 = StreamExecutionEnvironment.getExecutionEnvironment();
    env1.setParallelism(1);
    env1.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    Properties props = new Properties();
    props.putAll(standardProps);
    props.putAll(secureProps);
    DataStream<Tuple2<Integer, String>> fromKafka = env1.addSource(kafkaServer.getConsumer(topic, new CollectingDeserializationSchema(elementCount), props).assignTimestampsAndWatermarks(new AscendingTimestampExtractor<Tuple2<Integer, String>>() {

        @Override
        public long extractAscendingTimestamp(Tuple2<Integer, String> element) {
            String string = element.f1;
            return Long.parseLong(string.substring(0, string.length() - 1));
        }
    }));
    fromKafka.keyBy(t -> t.f0).process(new KeyedProcessFunction<Integer, Tuple2<Integer, String>, Void>() {

        private boolean registered = false;

        @Override
        public void processElement(Tuple2<Integer, String> value, Context ctx, Collector<Void> out) throws Exception {
            if (!registered) {
                ctx.timerService().registerEventTimeTimer(elementCount - 2);
                registered = true;
            }
        }

        @Override
        public void onTimer(long timestamp, OnTimerContext ctx, Collector<Void> out) throws Exception {
            throw new SuccessException();
        }
    });
    tryExecute(env1, "Consume " + elementCount + " elements from Kafka");
    deleteTestTopic(topic);
}
Also used : RetryOnException(org.apache.flink.testutils.junit.RetryOnException) Tuple1(org.apache.flink.api.java.tuple.Tuple1) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Tuple2(org.apache.flink.api.java.tuple.Tuple2) OffsetResetStrategy(org.apache.kafka.clients.consumer.OffsetResetStrategy) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) DataOutputViewStreamWrapper(org.apache.flink.core.memory.DataOutputViewStreamWrapper) TestUtils.submitJobAndWaitForResult(org.apache.flink.test.util.TestUtils.submitJobAndWaitForResult) CheckpointListener(org.apache.flink.api.common.state.CheckpointListener) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KafkaSourceBuilder(org.apache.flink.connector.kafka.source.KafkaSourceBuilder) Map(java.util.Map) RichSourceFunction(org.apache.flink.streaming.api.functions.source.RichSourceFunction) ThrottledMapper(org.apache.flink.streaming.connectors.kafka.testutils.ThrottledMapper) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) KafkaDeserializationSchemaWrapper(org.apache.flink.streaming.connectors.kafka.internals.KafkaDeserializationSchemaWrapper) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) Set(java.util.Set) StreamingJobGraphGenerator(org.apache.flink.streaming.api.graph.StreamingJobGraphGenerator) KeyedSerializationSchema(org.apache.flink.streaming.util.serialization.KeyedSerializationSchema) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) ClusterClient(org.apache.flink.client.program.ClusterClient) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Assert.assertFalse(org.junit.Assert.assertFalse) SerializationSchema(org.apache.flink.api.common.serialization.SerializationSchema) JobStatus(org.apache.flink.api.common.JobStatus) DataOutputView(org.apache.flink.core.memory.DataOutputView) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) ArrayList(java.util.ArrayList) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Collector(org.apache.flink.util.Collector) Iterables(org.apache.flink.shaded.guava30.com.google.common.collect.Iterables) ManagementFactory(java.lang.management.ManagementFactory) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) Nullable(javax.annotation.Nullable) Before(org.junit.Before) TestUtils.tryExecute(org.apache.flink.test.util.TestUtils.tryExecute) Properties(java.util.Properties) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) StartupMode(org.apache.flink.streaming.connectors.kafka.config.StartupMode) JobID(org.apache.flink.api.common.JobID) Assert.assertNull(org.junit.Assert.assertNull) Assert(org.junit.Assert) Assert.assertEquals(org.junit.Assert.assertEquals) NotLeaderForPartitionException(org.apache.kafka.common.errors.NotLeaderForPartitionException) AscendingTimestampExtractor(org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor) Date(java.util.Date) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) ExceptionUtils(org.apache.flink.util.ExceptionUtils) Random(java.util.Random) SuccessException(org.apache.flink.test.util.SuccessException) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) ByteArrayInputStream(java.io.ByteArrayInputStream) PartitionValidatingMapper(org.apache.flink.streaming.connectors.kafka.testutils.PartitionValidatingMapper) Assert.fail(org.junit.Assert.fail) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) TopicPartition(org.apache.kafka.common.TopicPartition) DataInputViewStreamWrapper(org.apache.flink.core.memory.DataInputViewStreamWrapper) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) ObjectName(javax.management.ObjectName) UUID(java.util.UUID) RichSinkFunction(org.apache.flink.streaming.api.functions.sink.RichSinkFunction) List(java.util.List) ClusterCommunicationUtils.waitUntilNoJobIsRunning(org.apache.flink.streaming.connectors.kafka.testutils.ClusterCommunicationUtils.waitUntilNoJobIsRunning) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Optional(java.util.Optional) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) OffsetsInitializer(org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer) ClusterCommunicationUtils.waitUntilJobIsRunning(org.apache.flink.streaming.connectors.kafka.testutils.ClusterCommunicationUtils.waitUntilJobIsRunning) HashMap(java.util.HashMap) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) AtomicReference(java.util.concurrent.atomic.AtomicReference) Tuple2FlinkPartitioner(org.apache.flink.streaming.connectors.kafka.testutils.Tuple2FlinkPartitioner) MBeanServer(javax.management.MBeanServer) RichParallelSourceFunction(org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction) DataInputView(org.apache.flink.core.memory.DataInputView) ProducerConfig(org.apache.kafka.clients.producer.ProducerConfig) DataGenerators(org.apache.flink.streaming.connectors.kafka.testutils.DataGenerators) TypeInformationSerializationSchema(org.apache.flink.api.common.serialization.TypeInformationSerializationSchema) TimeoutException(org.apache.kafka.common.errors.TimeoutException) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) Configuration(org.apache.flink.configuration.Configuration) ValidatingExactlyOnceSink(org.apache.flink.streaming.connectors.kafka.testutils.ValidatingExactlyOnceSink) JobCancellationException(org.apache.flink.runtime.client.JobCancellationException) DeserializationSchema(org.apache.flink.api.common.serialization.DeserializationSchema) DataStream(org.apache.flink.streaming.api.datastream.DataStream) KafkaSource(org.apache.flink.connector.kafka.source.KafkaSource) ListCheckpointed(org.apache.flink.streaming.api.checkpoint.ListCheckpointed) FailingIdentityMapper(org.apache.flink.streaming.connectors.kafka.testutils.FailingIdentityMapper) ClusterCommunicationUtils.getRunningJobs(org.apache.flink.streaming.connectors.kafka.testutils.ClusterCommunicationUtils.getRunningJobs) TypeInformationKeyValueSerializationSchema(org.apache.flink.streaming.util.serialization.TypeInformationKeyValueSerializationSchema) BitSet(java.util.BitSet) Collections(java.util.Collections) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Properties(java.util.Properties) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) RetryOnException(org.apache.flink.testutils.junit.RetryOnException) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) NotLeaderForPartitionException(org.apache.kafka.common.errors.NotLeaderForPartitionException) SuccessException(org.apache.flink.test.util.SuccessException) TimeoutException(org.apache.kafka.common.errors.TimeoutException) JobCancellationException(org.apache.flink.runtime.client.JobCancellationException) AscendingTimestampExtractor(org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SuccessException(org.apache.flink.test.util.SuccessException) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 2 with KeyedProcessFunction

use of org.apache.flink.streaming.api.functions.KeyedProcessFunction in project flink by apache.

the class SortingBoundedInputITCase method testBatchExecutionWithTimersOneInput.

@Test
public void testBatchExecutionWithTimersOneInput() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    // set parallelism to 1 to have consistent order of results
    env.setParallelism(1);
    Configuration config = new Configuration();
    config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH);
    env.configure(config, this.getClass().getClassLoader());
    WatermarkStrategy<Tuple2<Integer, Integer>> watermarkStrategy = WatermarkStrategy.forGenerator(ctx -> GENERATE_WATERMARK_AFTER_4_14_TIMESTAMP).withTimestampAssigner((r, previousTimestamp) -> r.f1);
    SingleOutputStreamOperator<Tuple2<Integer, Integer>> elements = env.fromElements(Tuple2.of(1, 3), Tuple2.of(1, 1), Tuple2.of(2, 1), Tuple2.of(1, 4), // late element
    Tuple2.of(2, 3), // late element
    Tuple2.of(1, 2), Tuple2.of(1, 13), Tuple2.of(1, 11), Tuple2.of(2, 14), // late element
    Tuple2.of(1, 11)).assignTimestampsAndWatermarks(watermarkStrategy);
    OutputTag<Integer> lateElements = new OutputTag<>("late_elements", BasicTypeInfo.INT_TYPE_INFO);
    SingleOutputStreamOperator<Tuple3<Long, Integer, Integer>> sums = elements.map(element -> element.f0).keyBy(element -> element).process(new KeyedProcessFunction<Integer, Integer, Tuple3<Long, Integer, Integer>>() {

        private MapState<Long, Integer> countState;

        private ValueState<Long> previousTimestampState;

        @Override
        public void open(Configuration parameters) {
            countState = getRuntimeContext().getMapState(new MapStateDescriptor<>("sum", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO));
            previousTimestampState = getRuntimeContext().getState(new ValueStateDescriptor<>("previousTimestamp", BasicTypeInfo.LONG_TYPE_INFO));
        }

        @Override
        public void processElement(Integer value, Context ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
            Long elementTimestamp = ctx.timestamp();
            long nextTen = ((elementTimestamp + 10) / 10) * 10;
            ctx.timerService().registerEventTimeTimer(nextTen);
            if (elementTimestamp < ctx.timerService().currentWatermark()) {
                ctx.output(lateElements, value);
            } else {
                Long previousTimestamp = Optional.ofNullable(previousTimestampState.value()).orElse(0L);
                assertThat(elementTimestamp, greaterThanOrEqualTo(previousTimestamp));
                previousTimestampState.update(elementTimestamp);
                Integer currentCount = Optional.ofNullable(countState.get(nextTen)).orElse(0);
                countState.put(nextTen, currentCount + 1);
            }
        }

        @Override
        public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
            out.collect(Tuple3.of(timestamp, ctx.getCurrentKey(), countState.get(timestamp)));
            countState.remove(timestamp);
            // this would go in infinite loop if we did not quiesce the
            // timer service.
            ctx.timerService().registerEventTimeTimer(timestamp + 1);
        }
    });
    DataStream<Integer> lateStream = sums.getSideOutput(lateElements);
    List<Integer> lateRecordsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(lateStream));
    List<Tuple3<Long, Integer, Integer>> sumsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(sums));
    assertTrue(lateRecordsCollected.isEmpty());
    assertThat(sumsCollected, equalTo(Arrays.asList(Tuple3.of(10L, 1, 4), Tuple3.of(20L, 1, 3), Tuple3.of(10L, 2, 2), Tuple3.of(20L, 2, 1))));
}
Also used : Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) WatermarkGenerator(org.apache.flink.api.common.eventtime.WatermarkGenerator) Tuple2(org.apache.flink.api.java.tuple.Tuple2) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) KeyedCoProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction) Random(java.util.Random) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) SplittableIterator(org.apache.flink.util.SplittableIterator) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) ExecutionOptions(org.apache.flink.configuration.ExecutionOptions) WatermarkStatus(org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus) AbstractTestBase(org.apache.flink.test.util.AbstractTestBase) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) DataStreamUtils(org.apache.flink.streaming.api.datastream.DataStreamUtils) Set(java.util.Set) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) OutputTag(org.apache.flink.util.OutputTag) BoundedOneInput(org.apache.flink.streaming.api.operators.BoundedOneInput) PrimitiveArrayTypeInfo(org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo) Objects(java.util.Objects) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) List(java.util.List) ValueState(org.apache.flink.api.common.state.ValueState) Watermark(org.apache.flink.api.common.eventtime.Watermark) Optional(java.util.Optional) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) AbstractStreamOperatorV2(org.apache.flink.streaming.api.operators.AbstractStreamOperatorV2) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) HashSet(java.util.HashSet) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) Collector(org.apache.flink.util.Collector) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) Iterator(java.util.Iterator) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) Configuration(org.apache.flink.configuration.Configuration) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) Assert.assertTrue(org.junit.Assert.assertTrue) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) WatermarkOutput(org.apache.flink.api.common.eventtime.WatermarkOutput) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) Consumer(java.util.function.Consumer) MapState(org.apache.flink.api.common.state.MapState) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) Assert(org.junit.Assert) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Input(org.apache.flink.streaming.api.operators.Input) Configuration(org.apache.flink.configuration.Configuration) OutputTag(org.apache.flink.util.OutputTag) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 3 with KeyedProcessFunction

use of org.apache.flink.streaming.api.functions.KeyedProcessFunction in project flink by apache.

the class DataStreamJavaITCase method getComplexUnifiedPipeline.

// --------------------------------------------------------------------------------------------
// Helper methods
// --------------------------------------------------------------------------------------------
private Table getComplexUnifiedPipeline(StreamExecutionEnvironment env) {
    final DataStream<String> allowedNamesStream = env.fromElements("Bob", "Alice");
    final StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
    tableEnv.createTemporaryView("AllowedNamesTable", tableEnv.fromDataStream(allowedNamesStream).as("allowedName"));
    final Table nameCountTable = tableEnv.sqlQuery("SELECT name, COUNT(*) AS c " + "FROM (VALUES ('Bob'), ('Alice'), ('Greg'), ('Bob')) AS NameTable(name) " + "WHERE name IN (SELECT allowedName FROM AllowedNamesTable)" + "GROUP BY name");
    final DataStream<Row> nameCountStream = tableEnv.toChangelogStream(nameCountTable);
    final DataStream<Tuple2<String, Long>> updatesPerNameStream = nameCountStream.keyBy(r -> r.<String>getFieldAs("name")).process(new KeyedProcessFunction<String, Row, Tuple2<String, Long>>() {

        ValueState<Long> count;

        @Override
        public void open(Configuration parameters) {
            count = getRuntimeContext().getState(new ValueStateDescriptor<>("count", Long.class));
        }

        @Override
        public void processElement(Row r, Context ctx, Collector<Tuple2<String, Long>> out) throws IOException {
            Long currentCount = count.value();
            if (currentCount == null) {
                currentCount = 0L;
            }
            final long updatedCount = currentCount + 1;
            count.update(updatedCount);
            out.collect(Tuple2.of(ctx.getCurrentKey(), updatedCount));
        }
    });
    tableEnv.createTemporaryView("UpdatesPerName", updatesPerNameStream);
    return tableEnv.sqlQuery("SELECT DISTINCT f0, f1 FROM UpdatesPerName");
}
Also used : DataType(org.apache.flink.table.types.DataType) BIGINT(org.apache.flink.table.api.DataTypes.BIGINT) STRING(org.apache.flink.table.api.DataTypes.STRING) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Arrays(java.util.Arrays) Schema(org.apache.flink.table.api.Schema) Tuple3(org.apache.flink.api.java.tuple.Tuple3) TableDescriptor(org.apache.flink.table.api.TableDescriptor) Tuple2(org.apache.flink.api.java.tuple.Tuple2) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) TIMESTAMP_LTZ(org.apache.flink.table.api.DataTypes.TIMESTAMP_LTZ) RawType(org.apache.flink.table.types.logical.RawType) ZoneOffset(java.time.ZoneOffset) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) FIELD(org.apache.flink.table.api.DataTypes.FIELD) Parameterized(org.junit.runners.Parameterized) AbstractTestBase(org.apache.flink.test.util.AbstractTestBase) DOUBLE(org.apache.flink.table.api.DataTypes.DOUBLE) TableConfig(org.apache.flink.table.api.TableConfig) Expressions.$(org.apache.flink.table.api.Expressions.$) TestValuesTableFactory(org.apache.flink.table.planner.factories.TestValuesTableFactory) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) Table(org.apache.flink.table.api.Table) ResolvedExpressionMock(org.apache.flink.table.expressions.utils.ResolvedExpressionMock) ZoneId(java.time.ZoneId) Objects(java.util.Objects) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) CloseableIterator(org.apache.flink.util.CloseableIterator) List(java.util.List) ValueState(org.apache.flink.api.common.state.ValueState) TumblingEventTimeWindows(org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) STRUCTURED(org.apache.flink.table.api.DataTypes.STRUCTURED) TableResult(org.apache.flink.table.api.TableResult) Row(org.apache.flink.types.Row) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) MAP(org.apache.flink.table.api.DataTypes.MAP) BOOLEAN(org.apache.flink.table.api.DataTypes.BOOLEAN) Either(org.apache.flink.types.Either) ChangelogMode(org.apache.flink.table.connector.ChangelogMode) ROW(org.apache.flink.table.api.DataTypes.ROW) Column(org.apache.flink.table.catalog.Column) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) LocalDateTime(java.time.LocalDateTime) Expressions.sourceWatermark(org.apache.flink.table.api.Expressions.sourceWatermark) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) ArrayList(java.util.ArrayList) Collector(org.apache.flink.util.Collector) ProcessFunction(org.apache.flink.streaming.api.functions.ProcessFunction) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) INT(org.apache.flink.table.api.DataTypes.INT) Before(org.junit.Before) Types(org.apache.flink.api.common.typeinfo.Types) Time(org.apache.flink.streaming.api.windowing.time.Time) WatermarkSpec(org.apache.flink.table.catalog.WatermarkSpec) GenericTypeInfo(org.apache.flink.api.java.typeutils.GenericTypeInfo) Parameter(org.junit.runners.Parameterized.Parameter) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) Configuration(org.apache.flink.configuration.Configuration) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) DataTypes(org.apache.flink.table.api.DataTypes) Test(org.junit.Test) IOException(java.io.IOException) CollectionUtil(org.apache.flink.util.CollectionUtil) DataStream(org.apache.flink.streaming.api.datastream.DataStream) RowKind(org.apache.flink.types.RowKind) DayOfWeek(java.time.DayOfWeek) TIMESTAMP(org.apache.flink.table.api.DataTypes.TIMESTAMP) EnumTypeInfo(org.apache.flink.api.java.typeutils.EnumTypeInfo) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) Table(org.apache.flink.table.api.Table) Configuration(org.apache.flink.configuration.Configuration) IOException(java.io.IOException) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Row(org.apache.flink.types.Row)

Example 4 with KeyedProcessFunction

use of org.apache.flink.streaming.api.functions.KeyedProcessFunction in project flink by apache.

the class DataStreamTest method testKeyedStreamKeyedProcessTranslation.

/**
 * Verify that a {@link KeyedStream#process(KeyedProcessFunction)} call is correctly translated
 * to an operator.
 */
@Test
public void testKeyedStreamKeyedProcessTranslation() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStreamSource<Long> src = env.generateSequence(0, 0);
    KeyedProcessFunction<Long, Long, Integer> keyedProcessFunction = new KeyedProcessFunction<Long, Long, Integer>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void processElement(Long value, Context ctx, Collector<Integer> out) throws Exception {
        // Do nothing
        }

        @Override
        public void onTimer(long timestamp, OnTimerContext ctx, Collector<Integer> out) throws Exception {
        // Do nothing
        }
    };
    DataStream<Integer> processed = src.keyBy(new IdentityKeySelector<Long>()).process(keyedProcessFunction);
    processed.addSink(new DiscardingSink<Integer>());
    assertEquals(keyedProcessFunction, getFunctionForDataStream(processed));
    assertTrue(getOperatorForDataStream(processed) instanceof KeyedProcessOperator);
}
Also used : KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) Collector(org.apache.flink.util.Collector) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) KeyedProcessOperator(org.apache.flink.streaming.api.operators.KeyedProcessOperator) LegacyKeyedProcessOperator(org.apache.flink.streaming.api.operators.LegacyKeyedProcessOperator) Test(org.junit.Test)

Example 5 with KeyedProcessFunction

use of org.apache.flink.streaming.api.functions.KeyedProcessFunction in project flink by apache.

the class SavepointITCase method testTriggerSavepointAndResumeWithNoClaim.

@Test
@Ignore("Disabling this test because it regularly fails on AZP. See FLINK-25427.")
public void testTriggerSavepointAndResumeWithNoClaim() throws Exception {
    final int numTaskManagers = 2;
    final int numSlotsPerTaskManager = 2;
    final int parallelism = numTaskManagers * numSlotsPerTaskManager;
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStateBackend(new EmbeddedRocksDBStateBackend(true));
    env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
    env.getCheckpointConfig().setCheckpointStorage(folder.newFolder().toURI());
    env.setParallelism(parallelism);
    final SharedReference<CountDownLatch> counter = sharedObjects.add(new CountDownLatch(10_000));
    env.fromSequence(1, Long.MAX_VALUE).keyBy(i -> i % parallelism).process(new KeyedProcessFunction<Long, Long, Long>() {

        private ListState<Long> last;

        @Override
        public void open(Configuration parameters) {
            // we use list state here to create sst files of a significant size
            // if sst files do not reach certain thresholds they are not stored
            // in files, but as a byte stream in checkpoints metadata
            last = getRuntimeContext().getListState(new ListStateDescriptor<>("last", BasicTypeInfo.LONG_TYPE_INFO));
        }

        @Override
        public void processElement(Long value, KeyedProcessFunction<Long, Long, Long>.Context ctx, Collector<Long> out) throws Exception {
            last.add(value);
            out.collect(value);
        }
    }).addSink(new SinkFunction<Long>() {

        @Override
        public void invoke(Long value) {
            counter.consumeSync(CountDownLatch::countDown);
        }
    }).setParallelism(1);
    final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
    MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setNumberTaskManagers(numTaskManagers).setNumberSlotsPerTaskManager(numSlotsPerTaskManager).build());
    cluster.before();
    try {
        final JobID jobID1 = new JobID();
        jobGraph.setJobID(jobID1);
        cluster.getClusterClient().submitJob(jobGraph).get();
        CommonTestUtils.waitForAllTaskRunning(cluster.getMiniCluster(), jobID1, false);
        // wait for some records to be processed before taking the checkpoint
        counter.get().await();
        final String firstCheckpoint = cluster.getMiniCluster().triggerCheckpoint(jobID1).get();
        cluster.getClusterClient().cancel(jobID1).get();
        jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(firstCheckpoint, false, RestoreMode.NO_CLAIM));
        final JobID jobID2 = new JobID();
        jobGraph.setJobID(jobID2);
        cluster.getClusterClient().submitJob(jobGraph).get();
        CommonTestUtils.waitForAllTaskRunning(cluster.getMiniCluster(), jobID2, false);
        String secondCheckpoint = cluster.getMiniCluster().triggerCheckpoint(jobID2).get();
        cluster.getClusterClient().cancel(jobID2).get();
        // delete the checkpoint we restored from
        FileUtils.deleteDirectory(Paths.get(new URI(firstCheckpoint)).getParent().toFile());
        // we should be able to restore from the second checkpoint even though it has been built
        // on top of the first checkpoint
        jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(secondCheckpoint, false, RestoreMode.NO_CLAIM));
        final JobID jobID3 = new JobID();
        jobGraph.setJobID(jobID3);
        cluster.getClusterClient().submitJob(jobGraph).get();
        CommonTestUtils.waitForAllTaskRunning(cluster.getMiniCluster(), jobID3, false);
    } finally {
        cluster.after();
    }
}
Also used : Arrays(java.util.Arrays) SharedObjects(org.apache.flink.testutils.junit.SharedObjects) MemorySize(org.apache.flink.configuration.MemorySize) EmptyRequestBody(org.apache.flink.runtime.rest.messages.EmptyRequestBody) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) ExceptionUtils.findThrowable(org.apache.flink.util.ExceptionUtils.findThrowable) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) TestUtils.submitJobAndWaitForResult(org.apache.flink.test.util.TestUtils.submitJobAndWaitForResult) FSDataOutputStream(org.apache.flink.core.fs.FSDataOutputStream) CheckpointListener(org.apache.flink.api.common.state.CheckpointListener) Duration(java.time.Duration) Map(java.util.Map) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) ExceptionUtils.assertThrowable(org.apache.flink.util.ExceptionUtils.assertThrowable) RichSourceFunction(org.apache.flink.streaming.api.functions.source.RichSourceFunction) Path(java.nio.file.Path) StateSnapshotContext(org.apache.flink.runtime.state.StateSnapshotContext) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) BoundedOneInput(org.apache.flink.streaming.api.operators.BoundedOneInput) FileSystemFactory(org.apache.flink.core.fs.FileSystemFactory) CountDownLatch(java.util.concurrent.CountDownLatch) JobMessageParameters(org.apache.flink.runtime.rest.messages.JobMessageParameters) Stream(java.util.stream.Stream) ValueState(org.apache.flink.api.common.state.ValueState) ClusterClient(org.apache.flink.client.program.ClusterClient) Assert.assertFalse(org.junit.Assert.assertFalse) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) Time(org.apache.flink.api.common.time.Time) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) FlinkException(org.apache.flink.util.FlinkException) LocalFileSystem(org.apache.flink.core.fs.local.LocalFileSystem) JobStatus(org.apache.flink.api.common.JobStatus) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) TypeSafeDiagnosingMatcher(org.hamcrest.TypeSafeDiagnosingMatcher) TaskManagerOptions(org.apache.flink.configuration.TaskManagerOptions) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Collector(org.apache.flink.util.Collector) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) Before(org.junit.Before) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) Files(java.nio.file.Files) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream) File(java.io.File) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) ExecutionException(java.util.concurrent.ExecutionException) JobID(org.apache.flink.api.common.JobID) Paths(java.nio.file.Paths) Matcher(org.hamcrest.Matcher) Assert(org.junit.Assert) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) Assert.assertEquals(org.junit.Assert.assertEquals) StateBackendOptions(org.apache.flink.configuration.StateBackendOptions) EntropyInjectingTestFileSystem(org.apache.flink.testutils.EntropyInjectingTestFileSystem) Deadline(org.apache.flink.api.common.time.Deadline) ExceptionUtils.findThrowableWithMessage(org.apache.flink.util.ExceptionUtils.findThrowableWithMessage) ClusterOptions(org.apache.flink.configuration.ClusterOptions) FileUtils(org.apache.flink.util.FileUtils) URISyntaxException(java.net.URISyntaxException) BiFunction(java.util.function.BiFunction) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) LoggerFactory(org.slf4j.LoggerFactory) BlockingNoOpInvokable(org.apache.flink.runtime.testtasks.BlockingNoOpInvokable) Random(java.util.Random) FunctionSnapshotContext(org.apache.flink.runtime.state.FunctionSnapshotContext) EmbeddedRocksDBStateBackend(org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend) MapFunction(org.apache.flink.api.common.functions.MapFunction) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) ListState(org.apache.flink.api.common.state.ListState) CommonTestUtils.waitForAllTaskRunning(org.apache.flink.runtime.testutils.CommonTestUtils.waitForAllTaskRunning) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) TestLogger(org.apache.flink.util.TestLogger) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) Assert.fail(org.junit.Assert.fail) URI(java.net.URI) KeySelector(org.apache.flink.api.java.functions.KeySelector) CheckpointedFunction(org.apache.flink.streaming.api.checkpoint.CheckpointedFunction) FunctionInitializationContext(org.apache.flink.runtime.state.FunctionInitializationContext) Collection(java.util.Collection) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) CheckpointingOptions(org.apache.flink.configuration.CheckpointingOptions) Objects(java.util.Objects) TestingUtils(org.apache.flink.testutils.TestingUtils) List(java.util.List) FileSystem(org.apache.flink.core.fs.FileSystem) FlinkJobNotFoundException(org.apache.flink.runtime.messages.FlinkJobNotFoundException) Optional(java.util.Optional) CheckpointConfig(org.apache.flink.streaming.api.environment.CheckpointConfig) ParallelSourceFunction(org.apache.flink.streaming.api.functions.source.ParallelSourceFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) IterativeStream(org.apache.flink.streaming.api.datastream.IterativeStream) CompletableFuture(java.util.concurrent.CompletableFuture) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) RestClusterClient(org.apache.flink.client.program.rest.RestClusterClient) RestoreMode(org.apache.flink.runtime.jobgraph.RestoreMode) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) CompletableFuture.allOf(java.util.concurrent.CompletableFuture.allOf) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) JobDetailsHeaders(org.apache.flink.runtime.rest.messages.job.JobDetailsHeaders) SharedReference(org.apache.flink.testutils.junit.SharedReference) Description(org.hamcrest.Description) Logger(org.slf4j.Logger) LocalRecoverableWriter(org.apache.flink.core.fs.local.LocalRecoverableWriter) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) Assert.assertNotNull(org.junit.Assert.assertNotNull) Configuration(org.apache.flink.configuration.Configuration) ExceptionUtils.assertThrowableWithMessage(org.apache.flink.util.ExceptionUtils.assertThrowableWithMessage) DataStream(org.apache.flink.streaming.api.datastream.DataStream) TimeUnit(java.util.concurrent.TimeUnit) Rule(org.junit.Rule) Ignore(org.junit.Ignore) ListCheckpointed(org.apache.flink.streaming.api.checkpoint.ListCheckpointed) FileVisitOption(java.nio.file.FileVisitOption) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) Configuration(org.apache.flink.configuration.Configuration) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) CountDownLatch(java.util.concurrent.CountDownLatch) URI(java.net.URI) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) FlinkException(org.apache.flink.util.FlinkException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) URISyntaxException(java.net.URISyntaxException) FileNotFoundException(java.io.FileNotFoundException) FlinkJobNotFoundException(org.apache.flink.runtime.messages.FlinkJobNotFoundException) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) EmbeddedRocksDBStateBackend(org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)5 KeyedProcessFunction (org.apache.flink.streaming.api.functions.KeyedProcessFunction)5 Collector (org.apache.flink.util.Collector)5 List (java.util.List)4 Configuration (org.apache.flink.configuration.Configuration)4 DataStream (org.apache.flink.streaming.api.datastream.DataStream)4 Test (org.junit.Test)4 IOException (java.io.IOException)3 Arrays (java.util.Arrays)3 Collections (java.util.Collections)3 Objects (java.util.Objects)3 Optional (java.util.Optional)3 Random (java.util.Random)3 WatermarkStrategy (org.apache.flink.api.common.eventtime.WatermarkStrategy)3 ValueState (org.apache.flink.api.common.state.ValueState)3 ValueStateDescriptor (org.apache.flink.api.common.state.ValueStateDescriptor)3 BasicTypeInfo (org.apache.flink.api.common.typeinfo.BasicTypeInfo)3 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)3 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)3 DataStreamSource (org.apache.flink.streaming.api.datastream.DataStreamSource)3