use of org.apache.flink.runtime.client.JobCancellationException in project flink by apache.
the class KafkaConsumerTestBase method runMetricsTest.
/**
* Test metrics reporting for consumer
*
* @throws Exception
*/
public void runMetricsTest() throws Throwable {
// create a stream with 5 topics
final String topic = "metricsStream";
createTestTopic(topic, 5, 1);
final Tuple1<Throwable> error = new Tuple1<>(null);
Runnable job = new Runnable() {
@Override
public void run() {
try {
// start job writing & reading data.
final StreamExecutionEnvironment env1 = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env1.setParallelism(1);
env1.getConfig().setRestartStrategy(RestartStrategies.noRestart());
env1.getConfig().disableSysoutLogging();
// let the source read everything into the network buffers
env1.disableOperatorChaining();
Properties props = new Properties();
props.putAll(standardProps);
props.putAll(secureProps);
TypeInformationSerializationSchema<Tuple2<Integer, Integer>> schema = new TypeInformationSerializationSchema<>(TypeInfoParser.<Tuple2<Integer, Integer>>parse("Tuple2<Integer, Integer>"), env1.getConfig());
DataStream<Tuple2<Integer, Integer>> fromKafka = env1.addSource(kafkaServer.getConsumer(topic, schema, standardProps));
fromKafka.flatMap(new FlatMapFunction<Tuple2<Integer, Integer>, Void>() {
@Override
public void flatMap(Tuple2<Integer, Integer> value, Collector<Void> out) throws Exception {
// no op
}
});
DataStream<Tuple2<Integer, Integer>> fromGen = env1.addSource(new RichSourceFunction<Tuple2<Integer, Integer>>() {
boolean running = true;
@Override
public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
int i = 0;
while (running) {
ctx.collect(Tuple2.of(i++, getRuntimeContext().getIndexOfThisSubtask()));
Thread.sleep(1);
}
}
@Override
public void cancel() {
running = false;
}
});
kafkaServer.produceIntoKafka(fromGen, topic, new KeyedSerializationSchemaWrapper<>(schema), standardProps, null);
env1.execute("Metrics test job");
} catch (Throwable t) {
LOG.warn("Got exception during execution", t);
if (!(t.getCause() instanceof JobCancellationException)) {
// we'll cancel the job
error.f0 = t;
}
}
}
};
Thread jobThread = new Thread(job);
jobThread.start();
try {
// connect to JMX
MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer();
// wait until we've found all 5 offset metrics
Set<ObjectName> offsetMetrics = mBeanServer.queryNames(new ObjectName("*current-offsets*:*"), null);
while (offsetMetrics.size() < 5) {
// test will time out if metrics are not properly working
if (error.f0 != null) {
// fail test early
throw error.f0;
}
offsetMetrics = mBeanServer.queryNames(new ObjectName("*current-offsets*:*"), null);
Thread.sleep(50);
}
Assert.assertEquals(5, offsetMetrics.size());
// The test will fail if we never meet the condition
while (true) {
int numPosOffsets = 0;
// check that offsets are correctly reported
for (ObjectName object : offsetMetrics) {
Object offset = mBeanServer.getAttribute(object, "Value");
if ((long) offset >= 0) {
numPosOffsets++;
}
}
if (numPosOffsets == 5) {
break;
}
// wait for the consumer to consume on all partitions
Thread.sleep(50);
}
// check if producer metrics are also available.
Set<ObjectName> producerMetrics = mBeanServer.queryNames(new ObjectName("*KafkaProducer*:*"), null);
Assert.assertTrue("No producer metrics found", producerMetrics.size() > 30);
LOG.info("Found all JMX metrics. Cancelling job.");
} finally {
// cancel
JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout));
}
while (jobThread.isAlive()) {
Thread.sleep(50);
}
if (error.f0 != null) {
throw error.f0;
}
deleteTestTopic(topic);
}
use of org.apache.flink.runtime.client.JobCancellationException in project flink by apache.
the class KafkaConsumerTestBase method runAutoOffsetRetrievalAndCommitToKafka.
/**
* This test ensures that when the consumers retrieve some start offset from kafka (earliest, latest), that this offset
* is committed to Kafka, even if some partitions are not read.
*
* Test:
* - Create 3 partitions
* - write 50 messages into each.
* - Start three consumers with auto.offset.reset='latest' and wait until they committed into Kafka.
* - Check if the offsets in Kafka are set to 50 for the three partitions
*
* See FLINK-3440 as well
*/
public void runAutoOffsetRetrievalAndCommitToKafka() throws Exception {
// 3 partitions with 50 records each (0-49, so the expected commit offset of each partition should be 50)
final int parallelism = 3;
final int recordsInEachPartition = 50;
final String topicName = writeSequence("testAutoOffsetRetrievalAndCommitToKafkaTopic", recordsInEachPartition, parallelism, 1);
final StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env.getConfig().disableSysoutLogging();
env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
env.setParallelism(parallelism);
env.enableCheckpointing(200);
Properties readProps = new Properties();
readProps.putAll(standardProps);
// set to reset to latest, so that partitions are initially not read
readProps.setProperty("auto.offset.reset", "latest");
DataStream<String> stream = env.addSource(kafkaServer.getConsumer(topicName, new SimpleStringSchema(), readProps));
stream.addSink(new DiscardingSink<String>());
final AtomicReference<Throwable> errorRef = new AtomicReference<>();
final Thread runner = new Thread("runner") {
@Override
public void run() {
try {
env.execute();
} catch (Throwable t) {
if (!(t.getCause() instanceof JobCancellationException)) {
errorRef.set(t);
}
}
}
};
runner.start();
KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = kafkaServer.createOffsetHandler();
// the final committed offset in Kafka should be 50
final Long l50 = 50L;
final long deadline = 30_000_000_000L + System.nanoTime();
do {
Long o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0);
Long o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1);
Long o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2);
if (l50.equals(o1) && l50.equals(o2) && l50.equals(o3)) {
break;
}
Thread.sleep(100);
} while (System.nanoTime() < deadline);
// cancel the job
JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout));
final Throwable t = errorRef.get();
if (t != null) {
throw new RuntimeException("Job failed with an exception", t);
}
// final check to see if offsets are correctly in Kafka
Long o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0);
Long o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1);
Long o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2);
Assert.assertEquals(Long.valueOf(50L), o1);
Assert.assertEquals(Long.valueOf(50L), o2);
Assert.assertEquals(Long.valueOf(50L), o3);
kafkaOffsetHandler.close();
deleteTestTopic(topicName);
}
use of org.apache.flink.runtime.client.JobCancellationException in project flink by apache.
the class KafkaConsumerTestBase method runCommitOffsetsToKafka.
/**
* Ensures that the committed offsets to Kafka are the offsets of "the next record to process"
*/
public void runCommitOffsetsToKafka() throws Exception {
// 3 partitions with 50 records each (0-49, so the expected commit offset of each partition should be 50)
final int parallelism = 3;
final int recordsInEachPartition = 50;
final String topicName = writeSequence("testCommitOffsetsToKafkaTopic", recordsInEachPartition, parallelism, 1);
final StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env.getConfig().disableSysoutLogging();
env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
env.setParallelism(parallelism);
env.enableCheckpointing(200);
DataStream<String> stream = env.addSource(kafkaServer.getConsumer(topicName, new SimpleStringSchema(), standardProps));
stream.addSink(new DiscardingSink<String>());
final AtomicReference<Throwable> errorRef = new AtomicReference<>();
final Thread runner = new Thread("runner") {
@Override
public void run() {
try {
env.execute();
} catch (Throwable t) {
if (!(t.getCause() instanceof JobCancellationException)) {
errorRef.set(t);
}
}
}
};
runner.start();
// the final committed offset in Kafka should be 50
final Long l50 = 50L;
final long deadline = 30_000_000_000L + System.nanoTime();
KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = kafkaServer.createOffsetHandler();
do {
Long o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0);
Long o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1);
Long o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2);
if (l50.equals(o1) && l50.equals(o2) && l50.equals(o3)) {
break;
}
Thread.sleep(100);
} while (System.nanoTime() < deadline);
// cancel the job
JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout));
final Throwable t = errorRef.get();
if (t != null) {
throw new RuntimeException("Job failed with an exception", t);
}
// final check to see if offsets are correctly in Kafka
Long o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0);
Long o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1);
Long o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2);
Assert.assertEquals(Long.valueOf(50L), o1);
Assert.assertEquals(Long.valueOf(50L), o2);
Assert.assertEquals(Long.valueOf(50L), o3);
kafkaOffsetHandler.close();
deleteTestTopic(topicName);
}
use of org.apache.flink.runtime.client.JobCancellationException in project flink by apache.
the class KafkaConsumerTestBase method runStartFromLatestOffsets.
/**
* This test ensures that when explicitly set to start from latest record, the consumer
* ignores the "auto.offset.reset" behaviour as well as any committed group offsets in Kafka.
*/
public void runStartFromLatestOffsets() throws Exception {
// 50 records written to each of 3 partitions before launching a latest-starting consuming job
final int parallelism = 3;
final int recordsInEachPartition = 50;
// each partition will be written an extra 200 records
final int extraRecordsInEachPartition = 200;
// all already existing data in the topic, before the consuming topology has started, should be ignored
final String topicName = writeSequence("testStartFromLatestOffsetsTopic", recordsInEachPartition, parallelism, 1);
// the committed offsets should be ignored
KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = kafkaServer.createOffsetHandler();
kafkaOffsetHandler.setCommittedOffset(topicName, 0, 23);
kafkaOffsetHandler.setCommittedOffset(topicName, 1, 31);
kafkaOffsetHandler.setCommittedOffset(topicName, 2, 43);
// job names for the topologies for writing and consuming the extra records
final String consumeExtraRecordsJobName = "Consume Extra Records Job";
final String writeExtraRecordsJobName = "Write Extra Records Job";
// seriliazation / deserialization schemas for writing and consuming the extra records
final TypeInformation<Tuple2<Integer, Integer>> resultType = TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {
});
final KeyedSerializationSchema<Tuple2<Integer, Integer>> serSchema = new KeyedSerializationSchemaWrapper<>(new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));
final KeyedDeserializationSchema<Tuple2<Integer, Integer>> deserSchema = new KeyedDeserializationSchemaWrapper<>(new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));
// setup and run the latest-consuming job
final StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env.getConfig().disableSysoutLogging();
env.setParallelism(parallelism);
final Properties readProps = new Properties();
readProps.putAll(standardProps);
// this should be ignored
readProps.setProperty("auto.offset.reset", "earliest");
FlinkKafkaConsumerBase<Tuple2<Integer, Integer>> latestReadingConsumer = kafkaServer.getConsumer(topicName, deserSchema, readProps);
latestReadingConsumer.setStartFromLatest();
env.addSource(latestReadingConsumer).setParallelism(parallelism).flatMap(new FlatMapFunction<Tuple2<Integer, Integer>, Object>() {
@Override
public void flatMap(Tuple2<Integer, Integer> value, Collector<Object> out) throws Exception {
if (value.f1 - recordsInEachPartition < 0) {
throw new RuntimeException("test failed; consumed a record that was previously written: " + value);
}
}
}).setParallelism(1).addSink(new DiscardingSink<>());
final AtomicReference<Throwable> error = new AtomicReference<>();
Thread consumeThread = new Thread(new Runnable() {
@Override
public void run() {
try {
env.execute(consumeExtraRecordsJobName);
} catch (Throwable t) {
if (!(t.getCause() instanceof JobCancellationException)) {
error.set(t);
}
}
}
});
consumeThread.start();
// wait until the consuming job has started, to be extra safe
JobManagerCommunicationUtils.waitUntilJobIsRunning(flink.getLeaderGateway(timeout), consumeExtraRecordsJobName);
// setup the extra records writing job
final StreamExecutionEnvironment env2 = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
DataStream<Tuple2<Integer, Integer>> extraRecordsStream = env2.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {
private boolean running = true;
@Override
public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
// the extra records should start from the last written value
int count = recordsInEachPartition;
int partition = getRuntimeContext().getIndexOfThisSubtask();
while (running && count < recordsInEachPartition + extraRecordsInEachPartition) {
ctx.collect(new Tuple2<>(partition, count));
count++;
}
}
@Override
public void cancel() {
running = false;
}
}).setParallelism(parallelism);
kafkaServer.produceIntoKafka(extraRecordsStream, topicName, serSchema, readProps, null);
try {
env2.execute(writeExtraRecordsJobName);
} catch (Exception e) {
throw new RuntimeException("Writing extra records failed", e);
}
// cancel the consume job after all extra records are written
JobManagerCommunicationUtils.cancelCurrentJob(flink.getLeaderGateway(timeout), consumeExtraRecordsJobName);
consumeThread.join();
kafkaOffsetHandler.close();
deleteTestTopic(topicName);
// check whether the consuming thread threw any test errors;
// test will fail here if the consume job had incorrectly read any records other than the extra records
final Throwable consumerError = error.get();
if (consumerError != null) {
throw new Exception("Exception in the consuming thread", consumerError);
}
}
Aggregations