Examples with JobCancellationException - org.apache.flink.runtime.client.JobCancellationException

Example 1 with JobCancellationException

use of org.apache.flink.runtime.client.JobCancellationException in project flink by apache.

the class KafkaConsumerTestBase method runMetricsTest.

/**
	 * Test metrics reporting for consumer
	 *
	 * @throws Exception
	 */
public void runMetricsTest() throws Throwable {
    // create a stream with 5 topics
    final String topic = "metricsStream";
    createTestTopic(topic, 5, 1);
    final Tuple1<Throwable> error = new Tuple1<>(null);
    Runnable job = new Runnable() {

        @Override
        public void run() {
            try {
                // start job writing & reading data.
                final StreamExecutionEnvironment env1 = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
                env1.setParallelism(1);
                env1.getConfig().setRestartStrategy(RestartStrategies.noRestart());
                env1.getConfig().disableSysoutLogging();
                // let the source read everything into the network buffers
                env1.disableOperatorChaining();
                Properties props = new Properties();
                props.putAll(standardProps);
                props.putAll(secureProps);
                TypeInformationSerializationSchema<Tuple2<Integer, Integer>> schema = new TypeInformationSerializationSchema<>(TypeInfoParser.<Tuple2<Integer, Integer>>parse("Tuple2<Integer, Integer>"), env1.getConfig());
                DataStream<Tuple2<Integer, Integer>> fromKafka = env1.addSource(kafkaServer.getConsumer(topic, schema, standardProps));
                fromKafka.flatMap(new FlatMapFunction<Tuple2<Integer, Integer>, Void>() {

                    @Override
                    public void flatMap(Tuple2<Integer, Integer> value, Collector<Void> out) throws Exception {
                    // no op
                    }
                });
                DataStream<Tuple2<Integer, Integer>> fromGen = env1.addSource(new RichSourceFunction<Tuple2<Integer, Integer>>() {

                    boolean running = true;

                    @Override
                    public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
                        int i = 0;
                        while (running) {
                            ctx.collect(Tuple2.of(i++, getRuntimeContext().getIndexOfThisSubtask()));
                            Thread.sleep(1);
                        }
                    }

                    @Override
                    public void cancel() {
                        running = false;
                    }
                });
                kafkaServer.produceIntoKafka(fromGen, topic, new KeyedSerializationSchemaWrapper<>(schema), standardProps, null);
                env1.execute("Metrics test job");
            } catch (Throwable t) {
                LOG.warn("Got exception during execution", t);
                if (!(t.getCause() instanceof JobCancellationException)) {
                    // we'll cancel the job
                    error.f0 = t;
                }
            }
        }
    };
    Thread jobThread = new Thread(job);
    jobThread.start();
    try {
        // connect to JMX
        MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer();
        // wait until we've found all 5 offset metrics
        Set<ObjectName> offsetMetrics = mBeanServer.queryNames(new ObjectName("*current-offsets*:*"), null);
        while (offsetMetrics.size() < 5) {
            // test will time out if metrics are not properly working
            if (error.f0 != null) {
                // fail test early
                throw error.f0;
            }
            offsetMetrics = mBeanServer.queryNames(new ObjectName("*current-offsets*:*"), null);
            Thread.sleep(50);
        }
        Assert.assertEquals(5, offsetMetrics.size());
        // The test will fail if we never meet the condition
        while (true) {
            int numPosOffsets = 0;
            // check that offsets are correctly reported
            for (ObjectName object : offsetMetrics) {
                Object offset = mBeanServer.getAttribute(object, "Value");
                if ((long) offset >= 0) {
                    numPosOffsets++;
                }
            }
            if (numPosOffsets == 5) {
                break;
            }
            // wait for the consumer to consume on all partitions
            Thread.sleep(50);
        }
        // check if producer metrics are also available.
        Set<ObjectName> producerMetrics = mBeanServer.queryNames(new ObjectName("*KafkaProducer*:*"), null);
        Assert.assertTrue("No producer metrics found", producerMetrics.size() > 30);
        LOG.info("Found all JMX metrics. Cancelling job.");
    } finally {
        // cancel
        JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout));
    }
    while (jobThread.isAlive()) {
        Thread.sleep(50);
    }
    if (error.f0 != null) {
        throw error.f0;
    }
    deleteTestTopic(topic);
}

Also used : Properties(java.util.Properties) JobCancellationException(org.apache.flink.runtime.client.JobCancellationException) MBeanServer(javax.management.MBeanServer) RetryOnException(org.apache.flink.testutils.junit.RetryOnException) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) SuccessException(org.apache.flink.test.util.SuccessException) NoResourceAvailableException(org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) TimeoutException(org.apache.kafka.common.errors.TimeoutException) JobCancellationException(org.apache.flink.runtime.client.JobCancellationException) IOException(java.io.IOException) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) ObjectName(javax.management.ObjectName) TypeInformationSerializationSchema(org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 2 with JobCancellationException

use of org.apache.flink.runtime.client.JobCancellationException in project flink by apache.

the class KafkaConsumerTestBase method runAutoOffsetRetrievalAndCommitToKafka.

/**
	 * This test ensures that when the consumers retrieve some start offset from kafka (earliest, latest), that this offset
	 * is committed to Kafka, even if some partitions are not read.
	 *
	 * Test:
	 * - Create 3 partitions
	 * - write 50 messages into each.
	 * - Start three consumers with auto.offset.reset='latest' and wait until they committed into Kafka.
	 * - Check if the offsets in Kafka are set to 50 for the three partitions
	 *
	 * See FLINK-3440 as well
	 */
public void runAutoOffsetRetrievalAndCommitToKafka() throws Exception {
    // 3 partitions with 50 records each (0-49, so the expected commit offset of each partition should be 50)
    final int parallelism = 3;
    final int recordsInEachPartition = 50;
    final String topicName = writeSequence("testAutoOffsetRetrievalAndCommitToKafkaTopic", recordsInEachPartition, parallelism, 1);
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env.getConfig().disableSysoutLogging();
    env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    env.setParallelism(parallelism);
    env.enableCheckpointing(200);
    Properties readProps = new Properties();
    readProps.putAll(standardProps);
    // set to reset to latest, so that partitions are initially not read
    readProps.setProperty("auto.offset.reset", "latest");
    DataStream<String> stream = env.addSource(kafkaServer.getConsumer(topicName, new SimpleStringSchema(), readProps));
    stream.addSink(new DiscardingSink<String>());
    final AtomicReference<Throwable> errorRef = new AtomicReference<>();
    final Thread runner = new Thread("runner") {

        @Override
        public void run() {
            try {
                env.execute();
            } catch (Throwable t) {
                if (!(t.getCause() instanceof JobCancellationException)) {
                    errorRef.set(t);
                }
            }
        }
    };
    runner.start();
    KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = kafkaServer.createOffsetHandler();
    // the final committed offset in Kafka should be 50
    final Long l50 = 50L;
    final long deadline = 30_000_000_000L + System.nanoTime();
    do {
        Long o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0);
        Long o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1);
        Long o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2);
        if (l50.equals(o1) && l50.equals(o2) && l50.equals(o3)) {
            break;
        }
        Thread.sleep(100);
    } while (System.nanoTime() < deadline);
    // cancel the job
    JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout));
    final Throwable t = errorRef.get();
    if (t != null) {
        throw new RuntimeException("Job failed with an exception", t);
    }
    // final check to see if offsets are correctly in Kafka
    Long o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0);
    Long o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1);
    Long o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2);
    Assert.assertEquals(Long.valueOf(50L), o1);
    Assert.assertEquals(Long.valueOf(50L), o2);
    Assert.assertEquals(Long.valueOf(50L), o3);
    kafkaOffsetHandler.close();
    deleteTestTopic(topicName);
}

Also used : AtomicReference(java.util.concurrent.atomic.AtomicReference) Properties(java.util.Properties) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) JobCancellationException(org.apache.flink.runtime.client.JobCancellationException) SimpleStringSchema(org.apache.flink.streaming.util.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 3 with JobCancellationException

use of org.apache.flink.runtime.client.JobCancellationException in project flink by apache.

the class KafkaConsumerTestBase method runCommitOffsetsToKafka.

/**
	 * Ensures that the committed offsets to Kafka are the offsets of "the next record to process"
	 */
public void runCommitOffsetsToKafka() throws Exception {
    // 3 partitions with 50 records each (0-49, so the expected commit offset of each partition should be 50)
    final int parallelism = 3;
    final int recordsInEachPartition = 50;
    final String topicName = writeSequence("testCommitOffsetsToKafkaTopic", recordsInEachPartition, parallelism, 1);
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env.getConfig().disableSysoutLogging();
    env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    env.setParallelism(parallelism);
    env.enableCheckpointing(200);
    DataStream<String> stream = env.addSource(kafkaServer.getConsumer(topicName, new SimpleStringSchema(), standardProps));
    stream.addSink(new DiscardingSink<String>());
    final AtomicReference<Throwable> errorRef = new AtomicReference<>();
    final Thread runner = new Thread("runner") {

        @Override
        public void run() {
            try {
                env.execute();
            } catch (Throwable t) {
                if (!(t.getCause() instanceof JobCancellationException)) {
                    errorRef.set(t);
                }
            }
        }
    };
    runner.start();
    // the final committed offset in Kafka should be 50
    final Long l50 = 50L;
    final long deadline = 30_000_000_000L + System.nanoTime();
    KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = kafkaServer.createOffsetHandler();
    do {
        Long o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0);
        Long o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1);
        Long o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2);
        if (l50.equals(o1) && l50.equals(o2) && l50.equals(o3)) {
            break;
        }
        Thread.sleep(100);
    } while (System.nanoTime() < deadline);
    // cancel the job
    JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout));
    final Throwable t = errorRef.get();
    if (t != null) {
        throw new RuntimeException("Job failed with an exception", t);
    }
    // final check to see if offsets are correctly in Kafka
    Long o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0);
    Long o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1);
    Long o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2);
    Assert.assertEquals(Long.valueOf(50L), o1);
    Assert.assertEquals(Long.valueOf(50L), o2);
    Assert.assertEquals(Long.valueOf(50L), o3);
    kafkaOffsetHandler.close();
    deleteTestTopic(topicName);
}

Also used : AtomicReference(java.util.concurrent.atomic.AtomicReference) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) JobCancellationException(org.apache.flink.runtime.client.JobCancellationException) SimpleStringSchema(org.apache.flink.streaming.util.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 4 with JobCancellationException

use of org.apache.flink.runtime.client.JobCancellationException in project flink by apache.

the class KafkaConsumerTestBase method runStartFromLatestOffsets.

/**
	 * This test ensures that when explicitly set to start from latest record, the consumer
	 * ignores the "auto.offset.reset" behaviour as well as any committed group offsets in Kafka.
	 */
public void runStartFromLatestOffsets() throws Exception {
    // 50 records written to each of 3 partitions before launching a latest-starting consuming job
    final int parallelism = 3;
    final int recordsInEachPartition = 50;
    // each partition will be written an extra 200 records
    final int extraRecordsInEachPartition = 200;
    // all already existing data in the topic, before the consuming topology has started, should be ignored
    final String topicName = writeSequence("testStartFromLatestOffsetsTopic", recordsInEachPartition, parallelism, 1);
    // the committed offsets should be ignored
    KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = kafkaServer.createOffsetHandler();
    kafkaOffsetHandler.setCommittedOffset(topicName, 0, 23);
    kafkaOffsetHandler.setCommittedOffset(topicName, 1, 31);
    kafkaOffsetHandler.setCommittedOffset(topicName, 2, 43);
    // job names for the topologies for writing and consuming the extra records
    final String consumeExtraRecordsJobName = "Consume Extra Records Job";
    final String writeExtraRecordsJobName = "Write Extra Records Job";
    // seriliazation / deserialization schemas for writing and consuming the extra records
    final TypeInformation<Tuple2<Integer, Integer>> resultType = TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {
    });
    final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema = new KeyedSerializationSchemaWrapper<>(new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));
    final KeyedDeserializationSchema<Tuple2<Integer, Integer>> deserSchema = new KeyedDeserializationSchemaWrapper<>(new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));
    // setup and run the latest-consuming job
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env.getConfig().disableSysoutLogging();
    env.setParallelism(parallelism);
    final Properties readProps = new Properties();
    readProps.putAll(standardProps);
    // this should be ignored
    readProps.setProperty("auto.offset.reset", "earliest");
    FlinkKafkaConsumerBase<Tuple2<Integer, Integer>> latestReadingConsumer = kafkaServer.getConsumer(topicName, deserSchema, readProps);
    latestReadingConsumer.setStartFromLatest();
    env.addSource(latestReadingConsumer).setParallelism(parallelism).flatMap(new FlatMapFunction<Tuple2<Integer, Integer>, Object>() {

        @Override
        public void flatMap(Tuple2<Integer, Integer> value, Collector<Object> out) throws Exception {
            if (value.f1 - recordsInEachPartition < 0) {
                throw new RuntimeException("test failed; consumed a record that was previously written: " + value);
            }
        }
    }).setParallelism(1).addSink(new DiscardingSink<>());
    final AtomicReference<Throwable> error = new AtomicReference<>();
    Thread consumeThread = new Thread(new Runnable() {

        @Override
        public void run() {
            try {
                env.execute(consumeExtraRecordsJobName);
            } catch (Throwable t) {
                if (!(t.getCause() instanceof JobCancellationException)) {
                    error.set(t);
                }
            }
        }
    });
    consumeThread.start();
    // wait until the consuming job has started, to be extra safe
    JobManagerCommunicationUtils.waitUntilJobIsRunning(flink.getLeaderGateway(timeout), consumeExtraRecordsJobName);
    // setup the extra records writing job
    final StreamExecutionEnvironment env2 = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    DataStream<Tuple2<Integer, Integer>> extraRecordsStream = env2.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {

        private boolean running = true;

        @Override
        public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
            // the extra records should start from the last written value
            int count = recordsInEachPartition;
            int partition = getRuntimeContext().getIndexOfThisSubtask();
            while (running && count < recordsInEachPartition + extraRecordsInEachPartition) {
                ctx.collect(new Tuple2<>(partition, count));
                count++;
            }
        }

        @Override
        public void cancel() {
            running = false;
        }
    }).setParallelism(parallelism);
    kafkaServer.produceIntoKafka(extraRecordsStream, topicName, serSchema, readProps, null);
    try {
        env2.execute(writeExtraRecordsJobName);
    } catch (Exception e) {
        throw new RuntimeException("Writing extra records failed", e);
    }
    // cancel the consume job after all extra records are written
    JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout), consumeExtraRecordsJobName);
    consumeThread.join();
    kafkaOffsetHandler.close();
    deleteTestTopic(topicName);
    // check whether the consuming thread threw any test errors;
    // test will fail here if the consume job had incorrectly read any records other than the extra records
    final Throwable consumerError = error.get();
    if (consumerError != null) {
        throw new Exception("Exception in the consuming thread", consumerError);
    }
}

Also used : KeyedSerializationSchemaWrapper(org.apache.flink.streaming.util.serialization.KeyedSerializationSchemaWrapper) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Properties(java.util.Properties) JobCancellationException(org.apache.flink.runtime.client.JobCancellationException) KeyedDeserializationSchemaWrapper(org.apache.flink.streaming.util.serialization.KeyedDeserializationSchemaWrapper) AtomicReference(java.util.concurrent.atomic.AtomicReference) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) RetryOnException(org.apache.flink.testutils.junit.RetryOnException) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) SuccessException(org.apache.flink.test.util.SuccessException) NoResourceAvailableException(org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) TimeoutException(org.apache.kafka.common.errors.TimeoutException) JobCancellationException(org.apache.flink.runtime.client.JobCancellationException) IOException(java.io.IOException) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RichParallelSourceFunction(org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Aggregations

TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)4 JobCancellationException (org.apache.flink.runtime.client.JobCancellationException)4 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)4 Properties (java.util.Properties)3 AtomicReference (java.util.concurrent.atomic.AtomicReference)3 IOException (java.io.IOException)2 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)2 ProgramInvocationException (org.apache.flink.client.program.ProgramInvocationException)2 JobExecutionException (org.apache.flink.runtime.client.JobExecutionException)2 NoResourceAvailableException (org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException)2 SimpleStringSchema (org.apache.flink.streaming.util.serialization.SimpleStringSchema)2 SuccessException (org.apache.flink.test.util.SuccessException)2 RetryOnException (org.apache.flink.testutils.junit.RetryOnException)2 TimeoutException (org.apache.kafka.common.errors.TimeoutException)2 MBeanServer (javax.management.MBeanServer)1 ObjectName (javax.management.ObjectName)1 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)1 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)1 RichParallelSourceFunction (org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction)1 KeyedDeserializationSchemaWrapper (org.apache.flink.streaming.util.serialization.KeyedDeserializationSchemaWrapper)1