use of org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner in project flink by apache.
the class KafkaITCase method testTimestamps.
/**
* Kafka 20 specific test, ensuring Timestamps are properly written to and read from Kafka.
*/
@Test(timeout = 60000)
public void testTimestamps() throws Exception {
final String topic = "tstopic";
createTestTopic(topic, 3, 1);
// ---------- Produce an event time stream into Kafka -------------------
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
DataStream<Long> streamWithTimestamps = env.addSource(new SourceFunction<Long>() {
private static final long serialVersionUID = -2255115836471289626L;
boolean running = true;
@Override
public void run(SourceContext<Long> ctx) throws Exception {
long i = 0;
while (running) {
ctx.collectWithTimestamp(i, i * 2);
if (i++ == 1110L) {
running = false;
}
}
}
@Override
public void cancel() {
running = false;
}
});
final TypeInformationSerializationSchema<Long> longSer = new TypeInformationSerializationSchema<>(Types.LONG, env.getConfig());
FlinkKafkaProducer<Long> prod = new FlinkKafkaProducer<>(topic, new KeyedSerializationSchemaWrapper<>(longSer), standardProps, Optional.of(new FlinkKafkaPartitioner<Long>() {
private static final long serialVersionUID = -6730989584364230617L;
@Override
public int partition(Long next, byte[] key, byte[] value, String targetTopic, int[] partitions) {
return (int) (next % 3);
}
}));
prod.setWriteTimestampToKafka(true);
streamWithTimestamps.addSink(prod).setParallelism(3);
env.execute("Produce some");
// ---------- Consume stream from Kafka -------------------
env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
FlinkKafkaConsumer<Long> kafkaSource = new FlinkKafkaConsumer<>(topic, new KafkaITCase.LimitedLongDeserializer(), standardProps);
kafkaSource.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks<Long>() {
private static final long serialVersionUID = -4834111173247835189L;
@Nullable
@Override
public Watermark checkAndGetNextWatermark(Long lastElement, long extractedTimestamp) {
if (lastElement % 11 == 0) {
return new Watermark(lastElement);
}
return null;
}
@Override
public long extractTimestamp(Long element, long previousElementTimestamp) {
return previousElementTimestamp;
}
});
DataStream<Long> stream = env.addSource(kafkaSource);
GenericTypeInfo<Object> objectTypeInfo = new GenericTypeInfo<>(Object.class);
stream.transform("timestamp validating operator", objectTypeInfo, new TimestampValidatingOperator()).setParallelism(1);
env.execute("Consume again");
deleteTestTopic(topic);
}
use of org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner in project flink by apache.
the class FlinkKafkaProducer method open.
// ----------------------------------- Utilities --------------------------
/**
* Initializes the connection to Kafka.
*/
@Override
public void open(Configuration configuration) throws Exception {
if (logFailuresOnly) {
callback = new Callback() {
@Override
public void onCompletion(RecordMetadata metadata, Exception e) {
if (e != null) {
LOG.error("Error while sending record to Kafka: " + e.getMessage(), e);
}
acknowledgeMessage();
}
};
} else {
callback = new Callback() {
@Override
public void onCompletion(RecordMetadata metadata, Exception exception) {
if (exception != null && asyncException == null) {
asyncException = exception;
}
acknowledgeMessage();
}
};
}
RuntimeContext ctx = getRuntimeContext();
if (flinkKafkaPartitioner != null) {
flinkKafkaPartitioner.open(ctx.getIndexOfThisSubtask(), ctx.getNumberOfParallelSubtasks());
}
if (kafkaSchema instanceof KafkaContextAware) {
KafkaContextAware<IN> contextAwareSchema = (KafkaContextAware<IN>) kafkaSchema;
contextAwareSchema.setParallelInstanceId(ctx.getIndexOfThisSubtask());
contextAwareSchema.setNumParallelInstances(ctx.getNumberOfParallelSubtasks());
}
if (kafkaSchema != null) {
kafkaSchema.open(RuntimeContextInitializationContextAdapters.serializationAdapter(getRuntimeContext(), metricGroup -> metricGroup.addGroup("user")));
}
super.open(configuration);
}
use of org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner in project flink by apache.
the class FlinkKafkaProducerBase method open.
// ----------------------------------- Utilities --------------------------
/**
* Initializes the connection to Kafka.
*/
@Override
public void open(Configuration configuration) throws Exception {
if (schema instanceof KeyedSerializationSchemaWrapper) {
((KeyedSerializationSchemaWrapper<IN>) schema).getSerializationSchema().open(RuntimeContextInitializationContextAdapters.serializationAdapter(getRuntimeContext(), metricGroup -> metricGroup.addGroup("user")));
}
producer = getKafkaProducer(this.producerConfig);
RuntimeContext ctx = getRuntimeContext();
if (null != flinkKafkaPartitioner) {
flinkKafkaPartitioner.open(ctx.getIndexOfThisSubtask(), ctx.getNumberOfParallelSubtasks());
}
LOG.info("Starting FlinkKafkaProducer ({}/{}) to produce into default topic {}", ctx.getIndexOfThisSubtask() + 1, ctx.getNumberOfParallelSubtasks(), defaultTopicId);
// register Kafka metrics to Flink accumulators
if (!Boolean.parseBoolean(producerConfig.getProperty(KEY_DISABLE_METRICS, "false"))) {
Map<MetricName, ? extends Metric> metrics = this.producer.metrics();
if (metrics == null) {
// MapR's Kafka implementation returns null here.
LOG.info("Producer implementation does not support metrics");
} else {
final MetricGroup kafkaMetricGroup = getRuntimeContext().getMetricGroup().addGroup("KafkaProducer");
for (Map.Entry<MetricName, ? extends Metric> metric : metrics.entrySet()) {
kafkaMetricGroup.gauge(metric.getKey().name(), new KafkaMetricWrapper(metric.getValue()));
}
}
}
if (flushOnCheckpoint && !((StreamingRuntimeContext) this.getRuntimeContext()).isCheckpointingEnabled()) {
LOG.warn("Flushing on checkpoint is enabled, but checkpointing is not enabled. Disabling flushing.");
flushOnCheckpoint = false;
}
if (logFailuresOnly) {
callback = new Callback() {
@Override
public void onCompletion(RecordMetadata metadata, Exception e) {
if (e != null) {
LOG.error("Error while sending record to Kafka: " + e.getMessage(), e);
}
acknowledgeMessage();
}
};
} else {
callback = new Callback() {
@Override
public void onCompletion(RecordMetadata metadata, Exception exception) {
if (exception != null && asyncException == null) {
asyncException = exception;
}
acknowledgeMessage();
}
};
}
}
use of org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner in project flink by apache.
the class KafkaConnectorOptionsUtil method initializePartitioner.
/**
* Returns a class value with the given class name.
*/
private static <T> FlinkKafkaPartitioner<T> initializePartitioner(String name, ClassLoader classLoader) {
try {
Class<?> clazz = Class.forName(name, true, classLoader);
if (!FlinkKafkaPartitioner.class.isAssignableFrom(clazz)) {
throw new ValidationException(String.format("Sink partitioner class '%s' should extend from the required class %s", name, FlinkKafkaPartitioner.class.getName()));
}
@SuppressWarnings("unchecked") final FlinkKafkaPartitioner<T> kafkaPartitioner = InstantiationUtil.instantiate(name, FlinkKafkaPartitioner.class, classLoader);
return kafkaPartitioner;
} catch (ClassNotFoundException | FlinkException e) {
throw new ValidationException(String.format("Could not find and instantiate partitioner class '%s'", name), e);
}
}
use of org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner in project flink by apache.
the class KafkaProducerTestBase method testExactlyOnce.
/**
* This test sets KafkaProducer so that it will automatically flush the data and and fails the
* broker to check whether flushed records since last checkpoint were not duplicated.
*/
protected void testExactlyOnce(boolean regularSink, int sinksCount) throws Exception {
final String topic = (regularSink ? "exactlyOnceTopicRegularSink" : "exactlyTopicCustomOperator") + sinksCount;
final int partition = 0;
final int numElements = 1000;
final int failAfterElements = 333;
for (int i = 0; i < sinksCount; i++) {
createTestTopic(topic + i, 1, 1);
}
TypeInformationSerializationSchema<Integer> schema = new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig());
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.enableCheckpointing(500);
env.setParallelism(1);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
Properties properties = new Properties();
properties.putAll(standardProps);
properties.putAll(secureProps);
// process exactly failAfterElements number of elements and then shutdown Kafka broker and
// fail application
List<Integer> expectedElements = getIntegersSequence(numElements);
DataStream<Integer> inputStream = env.addSource(new IntegerSource(numElements)).map(new FailingIdentityMapper<Integer>(failAfterElements));
for (int i = 0; i < sinksCount; i++) {
FlinkKafkaPartitioner<Integer> partitioner = new FlinkKafkaPartitioner<Integer>() {
@Override
public int partition(Integer record, byte[] key, byte[] value, String targetTopic, int[] partitions) {
return partition;
}
};
if (regularSink) {
StreamSink<Integer> kafkaSink = kafkaServer.getProducerSink(topic + i, schema, properties, partitioner);
inputStream.addSink(kafkaSink.getUserFunction());
} else {
kafkaServer.produceIntoKafka(inputStream, topic + i, schema, properties, partitioner);
}
}
FailingIdentityMapper.failedBefore = false;
TestUtils.tryExecute(env, "Exactly once test");
for (int i = 0; i < sinksCount; i++) {
// assert that before failure we successfully snapshot/flushed all expected elements
assertExactlyOnceForTopic(properties, topic + i, expectedElements);
deleteTestTopic(topic + i);
}
}
Aggregations