Examples with Tuple2Partitioner - org.apache.flink.streaming.connectors.kafka.testutils.Tuple2Partitioner

Example 1 with Tuple2Partitioner

use of org.apache.flink.streaming.connectors.kafka.testutils.Tuple2Partitioner in project flink by apache.

the class KafkaConsumerTestBase method writeSequence.

protected String writeSequence(String baseTopicName, final int numElements, final int parallelism, final int replicationFactor) throws Exception {
    LOG.info("\n===================================\n" + "== Writing sequence of " + numElements + " into " + baseTopicName + " with p=" + parallelism + "\n" + "===================================");
    final TypeInformation<Tuple2<Integer, Integer>> resultType = TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {
    });
    final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema = new KeyedSerializationSchemaWrapper<>(new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));
    final KeyedDeserializationSchema<Tuple2<Integer, Integer>> deserSchema = new KeyedDeserializationSchemaWrapper<>(new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));
    final int maxNumAttempts = 10;
    for (int attempt = 1; attempt <= maxNumAttempts; attempt++) {
        final String topicName = baseTopicName + '-' + attempt;
        LOG.info("Writing attempt #1");
        // -------- Write the Sequence --------
        createTestTopic(topicName, parallelism, replicationFactor);
        StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
        writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
        writeEnv.getConfig().disableSysoutLogging();
        DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

            private boolean running = true;

            @Override
            public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
                int cnt = 0;
                int partition = getRuntimeContext().getIndexOfThisSubtask();
                while (running && cnt < numElements) {
                    ctx.collect(new Tuple2<>(partition, cnt));
                    cnt++;
                }
            }

            @Override
            public void cancel() {
                running = false;
            }
        }).setParallelism(parallelism);
        // the producer must not produce duplicates
        Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
        producerProperties.setProperty("retries", "0");
        producerProperties.putAll(secureProps);
        kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2Partitioner(parallelism)).setParallelism(parallelism);
        try {
            writeEnv.execute("Write sequence");
        } catch (Exception e) {
            LOG.error("Write attempt failed, trying again", e);
            deleteTestTopic(topicName);
            JobManagerCommunicationUtils.waitUntilNoJobIsRunning(flink.getLeaderGateway(timeout));
            continue;
        }
        LOG.info("Finished writing sequence");
        // -------- Validate the Sequence --------
        // we need to validate the sequence, because kafka's producers are not exactly once
        LOG.info("Validating sequence");
        JobManagerCommunicationUtils.waitUntilNoJobIsRunning(flink.getLeaderGateway(timeout));
        final StreamExecutionEnvironment readEnv = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
        readEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
        readEnv.getConfig().disableSysoutLogging();
        readEnv.setParallelism(parallelism);
        Properties readProps = (Properties) standardProps.clone();
        readProps.setProperty("group.id", "flink-tests-validator");
        readProps.putAll(secureProps);
        FlinkKafkaConsumerBase<Tuple2<Integer, Integer>> consumer = kafkaServer.getConsumer(topicName, deserSchema, readProps);
        readEnv.addSource(consumer).map(new RichMapFunction<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>>() {

            private final int totalCount = parallelism * numElements;

            private int count = 0;

            @Override
            public Tuple2<Integer, Integer> map(Tuple2<Integer, Integer> value) throws Exception {
                if (++count == totalCount) {
                    throw new SuccessException();
                } else {
                    return value;
                }
            }
        }).setParallelism(1).addSink(new DiscardingSink<Tuple2<Integer, Integer>>()).setParallelism(1);
        final AtomicReference<Throwable> errorRef = new AtomicReference<>();
        Thread runner = new Thread() {

            @Override
            public void run() {
                try {
                    tryExecute(readEnv, "sequence validation");
                } catch (Throwable t) {
                    errorRef.set(t);
                }
            }
        };
        runner.start();
        final long deadline = System.nanoTime() + 10_000_000_000L;
        long delay;
        while (runner.isAlive() && (delay = deadline - System.nanoTime()) > 0) {
            runner.join(delay / 1_000_000L);
        }
        boolean success;
        if (runner.isAlive()) {
            // did not finish in time, maybe the producer dropped one or more records and
            // the validation did not reach the exit point
            success = false;
            JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout));
        } else {
            Throwable error = errorRef.get();
            if (error != null) {
                success = false;
                LOG.info("Attempt " + attempt + " failed with exception", error);
            } else {
                success = true;
            }
        }
        JobManagerCommunicationUtils.waitUntilNoJobIsRunning(flink.getLeaderGateway(timeout));
        if (success) {
            // everything is good!
            return topicName;
        } else {
            deleteTestTopic(topicName);
        // fall through the loop
        }
    }
    throw new Exception("Could not write a valid sequence to Kafka after " + maxNumAttempts + " attempts");
}

Also used : KeyedSerializationSchemaWrapper(org.apache.flink.streaming.util.serialization.KeyedSerializationSchemaWrapper) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Properties(java.util.Properties) KeyedDeserializationSchemaWrapper(org.apache.flink.streaming.util.serialization.KeyedDeserializationSchemaWrapper) Tuple2Partitioner(org.apache.flink.streaming.connectors.kafka.testutils.Tuple2Partitioner) AtomicReference(java.util.concurrent.atomic.AtomicReference) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) RetryOnException(org.apache.flink.testutils.junit.RetryOnException) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) SuccessException(org.apache.flink.test.util.SuccessException) NoResourceAvailableException(org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) TimeoutException(org.apache.kafka.common.errors.TimeoutException) JobCancellationException(org.apache.flink.runtime.client.JobCancellationException) IOException(java.io.IOException) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) RichParallelSourceFunction(org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction) SuccessException(org.apache.flink.test.util.SuccessException) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Aggregations

IOException (java.io.IOException)1 Properties (java.util.Properties)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)1 RichMapFunction (org.apache.flink.api.common.functions.RichMapFunction)1 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)1 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)1 ProgramInvocationException (org.apache.flink.client.program.ProgramInvocationException)1 JobCancellationException (org.apache.flink.runtime.client.JobCancellationException)1 JobExecutionException (org.apache.flink.runtime.client.JobExecutionException)1 NoResourceAvailableException (org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException)1 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)1 DiscardingSink (org.apache.flink.streaming.api.functions.sink.DiscardingSink)1 RichParallelSourceFunction (org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction)1 Tuple2Partitioner (org.apache.flink.streaming.connectors.kafka.testutils.Tuple2Partitioner)1 KeyedDeserializationSchemaWrapper (org.apache.flink.streaming.util.serialization.KeyedDeserializationSchemaWrapper)1 KeyedSerializationSchemaWrapper (org.apache.flink.streaming.util.serialization.KeyedSerializationSchemaWrapper)1 SuccessException (org.apache.flink.test.util.SuccessException)1 RetryOnException (org.apache.flink.testutils.junit.RetryOnException)1 TimeoutException (org.apache.kafka.common.errors.TimeoutException)1