Search in sources :

Example 1 with RichSinkFunction

use of org.apache.flink.streaming.api.functions.sink.RichSinkFunction in project flink by apache.

the class KafkaConsumerTestBase method runSimpleConcurrentProducerConsumerTopology.

/**
 * Ensure Kafka is working on both producer and consumer side. This executes a job that contains
 * two Flink pipelines.
 *
 * <pre>
 * (generator source) --> (kafka sink)-[KAFKA-TOPIC]-(kafka source) --> (validating sink)
 * </pre>
 *
 * <p>We need to externally retry this test. We cannot let Flink's retry mechanism do it,
 * because the Kafka producer does not guarantee exactly-once output. Hence a recovery would
 * introduce duplicates that cause the test to fail.
 *
 * <p>This test also ensures that FLINK-3156 doesn't happen again:
 *
 * <p>The following situation caused a NPE in the FlinkKafkaConsumer
 *
 * <p>topic-1 <-- elements are only produced into topic1. topic-2
 *
 * <p>Therefore, this test is consuming as well from an empty topic.
 */
@RetryOnException(times = 2, exception = NotLeaderForPartitionException.class)
public void runSimpleConcurrentProducerConsumerTopology() throws Exception {
    final String topic = "concurrentProducerConsumerTopic_" + UUID.randomUUID().toString();
    final String additionalEmptyTopic = "additionalEmptyTopic_" + UUID.randomUUID().toString();
    final int parallelism = 3;
    final int elementsPerPartition = 100;
    final int totalElements = parallelism * elementsPerPartition;
    createTestTopic(topic, parallelism, 1);
    createTestTopic(additionalEmptyTopic, parallelism, // create an empty topic which will remain empty all the time
    1);
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    env.enableCheckpointing(500);
    // fail immediately
    env.setRestartStrategy(RestartStrategies.noRestart());
    TypeInformation<Tuple2<Long, String>> longStringType = TypeInformation.of(new TypeHint<Tuple2<Long, String>>() {
    });
    TypeInformationSerializationSchema<Tuple2<Long, String>> sourceSchema = new TypeInformationSerializationSchema<>(longStringType, env.getConfig());
    TypeInformationSerializationSchema<Tuple2<Long, String>> sinkSchema = new TypeInformationSerializationSchema<>(longStringType, env.getConfig());
    // ----------- add producer dataflow ----------
    DataStream<Tuple2<Long, String>> stream = env.addSource(new RichParallelSourceFunction<Tuple2<Long, String>>() {

        private boolean running = true;

        @Override
        public void run(SourceContext<Tuple2<Long, String>> ctx) throws InterruptedException {
            int cnt = getRuntimeContext().getIndexOfThisSubtask() * elementsPerPartition;
            int limit = cnt + elementsPerPartition;
            while (running && cnt < limit) {
                ctx.collect(new Tuple2<>(1000L + cnt, "kafka-" + cnt));
                cnt++;
                // we delay data generation a bit so that we are sure that some
                // checkpoints are
                // triggered (for FLINK-3156)
                Thread.sleep(50);
            }
        }

        @Override
        public void cancel() {
            running = false;
        }
    });
    Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
    producerProperties.setProperty("retries", "3");
    producerProperties.putAll(secureProps);
    kafkaServer.produceIntoKafka(stream, topic, sinkSchema, producerProperties, null);
    // ----------- add consumer dataflow ----------
    List<String> topics = new ArrayList<>();
    topics.add(topic);
    topics.add(additionalEmptyTopic);
    Properties props = new Properties();
    props.putAll(standardProps);
    props.putAll(secureProps);
    DataStreamSource<Tuple2<Long, String>> consuming = getStream(env, topics, sourceSchema, props);
    consuming.addSink(new RichSinkFunction<Tuple2<Long, String>>() {

        private int elCnt = 0;

        private BitSet validator = new BitSet(totalElements);

        @Override
        public void invoke(Tuple2<Long, String> value) throws Exception {
            String[] sp = value.f1.split("-");
            int v = Integer.parseInt(sp[1]);
            assertEquals(value.f0 - 1000, (long) v);
            assertFalse("Received tuple twice", validator.get(v));
            validator.set(v);
            elCnt++;
            if (elCnt == totalElements) {
                // check if everything in the bitset is set to true
                int nc;
                if ((nc = validator.nextClearBit(0)) != totalElements) {
                    fail("The bitset was not set to 1 on all elements. Next clear:" + nc + " Set: " + validator);
                }
                throw new SuccessException();
            }
        }

        @Override
        public void close() throws Exception {
            super.close();
        }
    }).setParallelism(1);
    try {
        tryExecutePropagateExceptions(env, "runSimpleConcurrentProducerConsumerTopology");
    } catch (ProgramInvocationException | JobExecutionException e) {
        // look for NotLeaderForPartitionException
        Throwable cause = e.getCause();
        // search for nested SuccessExceptions
        int depth = 0;
        while (cause != null && depth++ < 20) {
            if (cause instanceof NotLeaderForPartitionException) {
                throw (Exception) cause;
            }
            cause = cause.getCause();
        }
        throw e;
    }
    deleteTestTopic(topic);
}
Also used : ArrayList(java.util.ArrayList) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Properties(java.util.Properties) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) RichSinkFunction(org.apache.flink.streaming.api.functions.sink.RichSinkFunction) NotLeaderForPartitionException(org.apache.kafka.common.errors.NotLeaderForPartitionException) BitSet(java.util.BitSet) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) TypeInformationSerializationSchema(org.apache.flink.api.common.serialization.TypeInformationSerializationSchema) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SuccessException(org.apache.flink.test.util.SuccessException) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) RetryOnException(org.apache.flink.testutils.junit.RetryOnException)

Aggregations

ArrayList (java.util.ArrayList)1 BitSet (java.util.BitSet)1 Properties (java.util.Properties)1 TypeInformationSerializationSchema (org.apache.flink.api.common.serialization.TypeInformationSerializationSchema)1 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)1 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)1 ProgramInvocationException (org.apache.flink.client.program.ProgramInvocationException)1 JobExecutionException (org.apache.flink.runtime.client.JobExecutionException)1 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)1 RichSinkFunction (org.apache.flink.streaming.api.functions.sink.RichSinkFunction)1 SuccessException (org.apache.flink.test.util.SuccessException)1 RetryOnException (org.apache.flink.testutils.junit.RetryOnException)1 NotLeaderForPartitionException (org.apache.kafka.common.errors.NotLeaderForPartitionException)1 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)1