use of org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction in project flink by apache.
the class KafkaConsumerTestBase method writeAppendSequence.
protected void writeAppendSequence(String topicName, final int originalNumElements, final int numElementsToAppend, final int parallelism) throws Exception {
LOG.info("\n===================================\n" + "== Appending sequence of " + numElementsToAppend + " into " + topicName + "===================================");
final TypeInformation<Tuple2<Integer, Integer>> resultType = TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {
});
final SerializationSchema<Tuple2<Integer, Integer>> serSchema = new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig());
final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema = new KafkaDeserializationSchemaWrapper<>(new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));
// -------- Write the append sequence --------
StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {
private boolean running = true;
@Override
public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
int cnt = originalNumElements;
int partition = getRuntimeContext().getIndexOfThisSubtask();
while (running && cnt < numElementsToAppend + originalNumElements) {
ctx.collect(new Tuple2<>(partition, cnt));
cnt++;
}
}
@Override
public void cancel() {
running = false;
}
}).setParallelism(parallelism);
// the producer must not produce duplicates
Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
producerProperties.setProperty("retries", "0");
producerProperties.putAll(secureProps);
kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism)).setParallelism(parallelism);
try {
writeEnv.execute("Write sequence");
} catch (Exception e) {
throw new Exception("Failed to append sequence to Kafka; append job failed.", e);
}
LOG.info("Finished writing append sequence");
// we need to validate the sequence, because kafka's producers are not exactly once
LOG.info("Validating sequence");
while (!getRunningJobs(client).isEmpty()) {
Thread.sleep(50);
}
if (!validateSequence(topicName, parallelism, deserSchema, originalNumElements + numElementsToAppend)) {
throw new Exception("Could not append a valid sequence to Kafka.");
}
}
use of org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction in project flink by apache.
the class KafkaConsumerTestBase method writeSequence.
protected String writeSequence(String baseTopicName, final int numElements, final int parallelism, final int replicationFactor) throws Exception {
LOG.info("\n===================================\n" + "== Writing sequence of " + numElements + " into " + baseTopicName + " with p=" + parallelism + "\n" + "===================================");
final TypeInformation<Tuple2<Integer, Integer>> resultType = TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {
});
final SerializationSchema<Tuple2<Integer, Integer>> serSchema = new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig());
final KafkaDeserializationSchema<Tuple2<Integer, Integer>> deserSchema = new KafkaDeserializationSchemaWrapper<>(new TypeInformationSerializationSchema<>(resultType, new ExecutionConfig()));
final int maxNumAttempts = 10;
for (int attempt = 1; attempt <= maxNumAttempts; attempt++) {
final String topicName = baseTopicName + '-' + attempt;
LOG.info("Writing attempt #" + attempt);
// -------- Write the Sequence --------
createTestTopic(topicName, parallelism, replicationFactor);
StreamExecutionEnvironment writeEnv = StreamExecutionEnvironment.getExecutionEnvironment();
writeEnv.getConfig().setRestartStrategy(RestartStrategies.noRestart());
DataStream<Tuple2<Integer, Integer>> stream = writeEnv.addSource(new RichParallelSourceFunction<Tuple2<Integer, Integer>>() {
private boolean running = true;
@Override
public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception {
int cnt = 0;
int partition = getRuntimeContext().getIndexOfThisSubtask();
while (running && cnt < numElements) {
ctx.collect(new Tuple2<>(partition, cnt));
cnt++;
}
}
@Override
public void cancel() {
running = false;
}
}).setParallelism(parallelism);
// the producer must not produce duplicates
Properties producerProperties = FlinkKafkaProducerBase.getPropertiesFromBrokerList(brokerConnectionStrings);
producerProperties.setProperty("retries", "0");
producerProperties.putAll(secureProps);
kafkaServer.produceIntoKafka(stream, topicName, serSchema, producerProperties, new Tuple2FlinkPartitioner(parallelism)).setParallelism(parallelism);
try {
writeEnv.execute("Write sequence");
} catch (Exception e) {
LOG.error("Write attempt failed, trying again", e);
deleteTestTopic(topicName);
waitUntilNoJobIsRunning(client);
continue;
}
LOG.info("Finished writing sequence");
// -------- Validate the Sequence --------
// we need to validate the sequence, because kafka's producers are not exactly once
LOG.info("Validating sequence");
waitUntilNoJobIsRunning(client);
if (validateSequence(topicName, parallelism, deserSchema, numElements)) {
// everything is good!
return topicName;
} else {
deleteTestTopic(topicName);
// fall through the loop
}
}
throw new Exception("Could not write a valid sequence to Kafka after " + maxNumAttempts + " attempts");
}
Aggregations