Examples with RichParallelSourceFunction - org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction

Example 1 with RichParallelSourceFunction

use of org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction in project flink by apache.

the class KafkaConsumerTestBase method writeAppendSequence.

protected void writeAppendSequence(String topicName, final int originalNumElements, final int numElementsToAppend, final int parallelism) throws Exception {
    LOG.info("\n===================================\n" + "== Appending sequence of " + numElementsToAppend + " into " + topicName + "===================================");
    final TypeInformation<Tuple2<Integer, Integer>> resultType = TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {
    });
    final SerializationSchema<Tuple2<Integer, Integer>> serSchema = new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig());
    final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema = new KafkaDeserializationSchemaWrapper<>(new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));
    // -------- Write the append sequence --------
    StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

        private boolean running = true;

        @Override
        public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
            int cnt = originalNumElements;
            int partition = getRuntimeContext().getIndexOfThisSubtask();
            while (running && cnt < numElementsToAppend + originalNumElements) {
                ctx.collect(new Tuple2<>(partition, cnt));
                cnt++;
            }
        }

        @Override
        public void cancel() {
            running = false;
        }
    }).setParallelism(parallelism);
    // the producer must not produce duplicates
    Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
    producerProperties.setProperty("retries", "0");
    producerProperties.putAll(secureProps);
    kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism)).setParallelism(parallelism);
    try {
        writeEnv.execute("Write sequence");
    } catch (Exception e) {
        throw new Exception("Failed to append sequence to Kafka; append job failed.", e);
    }
    LOG.info("Finished writing append sequence");
    // we need to validate the sequence, because kafka's producers are not exactly once
    LOG.info("Validating sequence");
    while (!getRunningJobs(client).isEmpty()) {
        Thread.sleep(50);
    }
    if (!validateSequence(topicName, parallelism, deserSchema, originalNumElements + numElementsToAppend)) {
        throw new Exception("Could not append a valid sequence to Kafka.");
    }
}

Also used : KafkaDeserializationSchemaWrapper(org.apache.flink.streaming.connectors.kafka.internals.KafkaDeserializationSchemaWrapper) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Properties(java.util.Properties) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) RetryOnException(org.apache.flink.testutils.junit.RetryOnException) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) NotLeaderForPartitionException(org.apache.kafka.common.errors.NotLeaderForPartitionException) SuccessException(org.apache.flink.test.util.SuccessException) TimeoutException(org.apache.kafka.common.errors.TimeoutException) JobCancellationException(org.apache.flink.runtime.client.JobCancellationException) TypeInformationSerializationSchema(org.apache.flink.api.common.serialization.TypeInformationSerializationSchema) Tuple2FlinkPartitioner(org.apache.flink.streaming.connectors.kafka.testutils.Tuple2FlinkPartitioner) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RichParallelSourceFunction(org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 2 with RichParallelSourceFunction

use of org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction in project flink by apache.

the class KafkaConsumerTestBase method writeSequence.

protected String writeSequence(String baseTopicName, final int numElements, final int parallelism, final int replicationFactor) throws Exception {
    LOG.info("\n===================================\n" + "== Writing sequence of " + numElements + " into " + baseTopicName + " with p=" + parallelism + "\n" + "===================================");
    final TypeInformation<Tuple2<Integer, Integer>> resultType = TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {
    });
    final SerializationSchema<Tuple2<Integer, Integer>> serSchema = new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig());
    final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema = new KafkaDeserializationSchemaWrapper<>(new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));
    final int maxNumAttempts = 10;
    for (int attempt = 1; attempt <= maxNumAttempts; attempt++) {
        final String topicName = baseTopicName + '-' + attempt;
        LOG.info("Writing attempt #" + attempt);
        // -------- Write the Sequence --------
        createTestTopic(topicName, parallelism, replicationFactor);
        StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
        writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
        DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

            private boolean running = true;

            @Override
            public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
                int cnt = 0;
                int partition = getRuntimeContext().getIndexOfThisSubtask();
                while (running && cnt < numElements) {
                    ctx.collect(new Tuple2<>(partition, cnt));
                    cnt++;
                }
            }

            @Override
            public void cancel() {
                running = false;
            }
        }).setParallelism(parallelism);
        // the producer must not produce duplicates
        Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
        producerProperties.setProperty("retries", "0");
        producerProperties.putAll(secureProps);
        kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism)).setParallelism(parallelism);
        try {
            writeEnv.execute("Write sequence");
        } catch (Exception e) {
            LOG.error("Write attempt failed, trying again", e);
            deleteTestTopic(topicName);
            waitUntilNoJobIsRunning(client);
            continue;
        }
        LOG.info("Finished writing sequence");
        // -------- Validate the Sequence --------
        // we need to validate the sequence, because kafka's producers are not exactly once
        LOG.info("Validating sequence");
        waitUntilNoJobIsRunning(client);
        if (validateSequence(topicName, parallelism, deserSchema, numElements)) {
            // everything is good!
            return topicName;
        } else {
            deleteTestTopic(topicName);
        // fall through the loop
        }
    }
    throw new Exception("Could not write a valid sequence to Kafka after " + maxNumAttempts + " attempts");
}

Also used : KafkaDeserializationSchemaWrapper(org.apache.flink.streaming.connectors.kafka.internals.KafkaDeserializationSchemaWrapper) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Properties(java.util.Properties) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) RetryOnException(org.apache.flink.testutils.junit.RetryOnException) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) NotLeaderForPartitionException(org.apache.kafka.common.errors.NotLeaderForPartitionException) SuccessException(org.apache.flink.test.util.SuccessException) TimeoutException(org.apache.kafka.common.errors.TimeoutException) JobCancellationException(org.apache.flink.runtime.client.JobCancellationException) TypeInformationSerializationSchema(org.apache.flink.api.common.serialization.TypeInformationSerializationSchema) Tuple2FlinkPartitioner(org.apache.flink.streaming.connectors.kafka.testutils.Tuple2FlinkPartitioner) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RichParallelSourceFunction(org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Aggregations

IOException (java.io.IOException)2 Properties (java.util.Properties)2 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)2 TypeInformationSerializationSchema (org.apache.flink.api.common.serialization.TypeInformationSerializationSchema)2 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)2 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)2 ProgramInvocationException (org.apache.flink.client.program.ProgramInvocationException)2 JobCancellationException (org.apache.flink.runtime.client.JobCancellationException)2 JobExecutionException (org.apache.flink.runtime.client.JobExecutionException)2 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)2 RichParallelSourceFunction (org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction)2 KafkaDeserializationSchemaWrapper (org.apache.flink.streaming.connectors.kafka.internals.KafkaDeserializationSchemaWrapper)2 Tuple2FlinkPartitioner (org.apache.flink.streaming.connectors.kafka.testutils.Tuple2FlinkPartitioner)2 SuccessException (org.apache.flink.test.util.SuccessException)2 RetryOnException (org.apache.flink.testutils.junit.RetryOnException)2 NotLeaderForPartitionException (org.apache.kafka.common.errors.NotLeaderForPartitionException)2 TimeoutException (org.apache.kafka.common.errors.TimeoutException)2 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)1