use of org.apache.flink.streaming.api.operators.StreamingRuntimeContext in project flink by apache.
the class FlinkKafkaConsumerBase method open.
// ------------------------------------------------------------------------
// Work methods
// ------------------------------------------------------------------------
@Override
public void open(Configuration configuration) throws Exception {
// determine the offset commit mode
this.offsetCommitMode = OffsetCommitModes.fromConfiguration(getIsAutoCommitEnabled(), enableCommitOnCheckpoints, ((StreamingRuntimeContext) getRuntimeContext()).isCheckpointingEnabled());
// create the partition discoverer
this.partitionDiscoverer = createPartitionDiscoverer(topicsDescriptor, getRuntimeContext().getIndexOfThisSubtask(), getRuntimeContext().getNumberOfParallelSubtasks());
this.partitionDiscoverer.open();
subscribedPartitionsToStartOffsets = new HashMap<>();
final List<KafkaTopicPartition> allPartitions = partitionDiscoverer.discoverPartitions();
if (restoredState != null) {
for (KafkaTopicPartition partition : allPartitions) {
if (!restoredState.containsKey(partition)) {
restoredState.put(partition, KafkaTopicPartitionStateSentinel.EARLIEST_OFFSET);
}
}
for (Map.Entry<KafkaTopicPartition, Long> restoredStateEntry : restoredState.entrySet()) {
// restored partitions that should not be subscribed by this subtask
if (KafkaTopicPartitionAssigner.assign(restoredStateEntry.getKey(), getRuntimeContext().getNumberOfParallelSubtasks()) == getRuntimeContext().getIndexOfThisSubtask()) {
subscribedPartitionsToStartOffsets.put(restoredStateEntry.getKey(), restoredStateEntry.getValue());
}
}
if (filterRestoredPartitionsWithCurrentTopicsDescriptor) {
subscribedPartitionsToStartOffsets.entrySet().removeIf(entry -> {
if (!topicsDescriptor.isMatchingTopic(entry.getKey().getTopic())) {
LOG.warn("{} is removed from subscribed partitions since it is no longer associated with topics descriptor of current execution.", entry.getKey());
return true;
}
return false;
});
}
LOG.info("Consumer subtask {} will start reading {} partitions with offsets in restored state: {}", getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets.size(), subscribedPartitionsToStartOffsets);
} else {
// when the partition is actually read.
switch(startupMode) {
case SPECIFIC_OFFSETS:
if (specificStartupOffsets == null) {
throw new IllegalStateException("Startup mode for the consumer set to " + StartupMode.SPECIFIC_OFFSETS + ", but no specific offsets were specified.");
}
for (KafkaTopicPartition seedPartition : allPartitions) {
Long specificOffset = specificStartupOffsets.get(seedPartition);
if (specificOffset != null) {
// since the specified offsets represent the next record to read, we
// subtract
// it by one so that the initial state of the consumer will be correct
subscribedPartitionsToStartOffsets.put(seedPartition, specificOffset - 1);
} else {
// default to group offset behaviour if the user-provided specific
// offsets
// do not contain a value for this partition
subscribedPartitionsToStartOffsets.put(seedPartition, KafkaTopicPartitionStateSentinel.GROUP_OFFSET);
}
}
break;
case TIMESTAMP:
if (startupOffsetsTimestamp == null) {
throw new IllegalStateException("Startup mode for the consumer set to " + StartupMode.TIMESTAMP + ", but no startup timestamp was specified.");
}
for (Map.Entry<KafkaTopicPartition, Long> partitionToOffset : fetchOffsetsWithTimestamp(allPartitions, startupOffsetsTimestamp).entrySet()) {
subscribedPartitionsToStartOffsets.put(partitionToOffset.getKey(), (partitionToOffset.getValue() == null) ? // we default to using the latest offset for the partition
KafkaTopicPartitionStateSentinel.LATEST_OFFSET : // be correct
partitionToOffset.getValue() - 1);
}
break;
default:
for (KafkaTopicPartition seedPartition : allPartitions) {
subscribedPartitionsToStartOffsets.put(seedPartition, startupMode.getStateSentinel());
}
}
if (!subscribedPartitionsToStartOffsets.isEmpty()) {
switch(startupMode) {
case EARLIEST:
LOG.info("Consumer subtask {} will start reading the following {} partitions from the earliest offsets: {}", getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets.size(), subscribedPartitionsToStartOffsets.keySet());
break;
case LATEST:
LOG.info("Consumer subtask {} will start reading the following {} partitions from the latest offsets: {}", getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets.size(), subscribedPartitionsToStartOffsets.keySet());
break;
case TIMESTAMP:
LOG.info("Consumer subtask {} will start reading the following {} partitions from timestamp {}: {}", getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets.size(), startupOffsetsTimestamp, subscribedPartitionsToStartOffsets.keySet());
break;
case SPECIFIC_OFFSETS:
LOG.info("Consumer subtask {} will start reading the following {} partitions from the specified startup offsets {}: {}", getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets.size(), specificStartupOffsets, subscribedPartitionsToStartOffsets.keySet());
List<KafkaTopicPartition> partitionsDefaultedToGroupOffsets = new ArrayList<>(subscribedPartitionsToStartOffsets.size());
for (Map.Entry<KafkaTopicPartition, Long> subscribedPartition : subscribedPartitionsToStartOffsets.entrySet()) {
if (subscribedPartition.getValue() == KafkaTopicPartitionStateSentinel.GROUP_OFFSET) {
partitionsDefaultedToGroupOffsets.add(subscribedPartition.getKey());
}
}
if (partitionsDefaultedToGroupOffsets.size() > 0) {
LOG.warn("Consumer subtask {} cannot find offsets for the following {} partitions in the specified startup offsets: {}" + "; their startup offsets will be defaulted to their committed group offsets in Kafka.", getRuntimeContext().getIndexOfThisSubtask(), partitionsDefaultedToGroupOffsets.size(), partitionsDefaultedToGroupOffsets);
}
break;
case GROUP_OFFSETS:
LOG.info("Consumer subtask {} will start reading the following {} partitions from the committed group offsets in Kafka: {}", getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets.size(), subscribedPartitionsToStartOffsets.keySet());
}
} else {
LOG.info("Consumer subtask {} initially has no partitions to read from.", getRuntimeContext().getIndexOfThisSubtask());
}
}
this.deserializer.open(RuntimeContextInitializationContextAdapters.deserializationAdapter(getRuntimeContext(), metricGroup -> metricGroup.addGroup("user")));
}
use of org.apache.flink.streaming.api.operators.StreamingRuntimeContext in project flink by apache.
the class FlinkKafkaConsumerBase method run.
@Override
public void run(SourceContext<T> sourceContext) throws Exception {
if (subscribedPartitionsToStartOffsets == null) {
throw new Exception("The partitions were not set for the consumer");
}
// initialize commit metrics and default offset callback method
this.successfulCommits = this.getRuntimeContext().getMetricGroup().counter(COMMITS_SUCCEEDED_METRICS_COUNTER);
this.failedCommits = this.getRuntimeContext().getMetricGroup().counter(COMMITS_FAILED_METRICS_COUNTER);
final int subtaskIndex = this.getRuntimeContext().getIndexOfThisSubtask();
this.offsetCommitCallback = new KafkaCommitCallback() {
@Override
public void onSuccess() {
successfulCommits.inc();
}
@Override
public void onException(Throwable cause) {
LOG.warn(String.format("Consumer subtask %d failed async Kafka commit.", subtaskIndex), cause);
failedCommits.inc();
}
};
// status will automatically be triggered back to be active.
if (subscribedPartitionsToStartOffsets.isEmpty()) {
sourceContext.markAsTemporarilyIdle();
}
LOG.info("Consumer subtask {} creating fetcher with offsets {}.", getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets);
// from this point forward:
// - 'snapshotState' will draw offsets from the fetcher,
// instead of being built from `subscribedPartitionsToStartOffsets`
// - 'notifyCheckpointComplete' will start to do work (i.e. commit offsets to
// Kafka through the fetcher, if configured to do so)
this.kafkaFetcher = createFetcher(sourceContext, subscribedPartitionsToStartOffsets, watermarkStrategy, (StreamingRuntimeContext) getRuntimeContext(), offsetCommitMode, getRuntimeContext().getMetricGroup().addGroup(KAFKA_CONSUMER_METRICS_GROUP), useMetrics);
if (!running) {
return;
}
// executed
if (discoveryIntervalMillis == PARTITION_DISCOVERY_DISABLED) {
kafkaFetcher.runFetchLoop();
} else {
runWithPartitionDiscovery();
}
}
use of org.apache.flink.streaming.api.operators.StreamingRuntimeContext in project flink by apache.
the class FlinkKafkaProducerBase method open.
// ----------------------------------- Utilities --------------------------
/**
* Initializes the connection to Kafka.
*/
@Override
public void open(Configuration configuration) throws Exception {
if (schema instanceof KeyedSerializationSchemaWrapper) {
((KeyedSerializationSchemaWrapper<IN>) schema).getSerializationSchema().open(RuntimeContextInitializationContextAdapters.serializationAdapter(getRuntimeContext(), metricGroup -> metricGroup.addGroup("user")));
}
producer = getKafkaProducer(this.producerConfig);
RuntimeContext ctx = getRuntimeContext();
if (null != flinkKafkaPartitioner) {
flinkKafkaPartitioner.open(ctx.getIndexOfThisSubtask(), ctx.getNumberOfParallelSubtasks());
}
LOG.info("Starting FlinkKafkaProducer ({}/{}) to produce into default topic {}", ctx.getIndexOfThisSubtask() + 1, ctx.getNumberOfParallelSubtasks(), defaultTopicId);
// register Kafka metrics to Flink accumulators
if (!Boolean.parseBoolean(producerConfig.getProperty(KEY_DISABLE_METRICS, "false"))) {
Map<MetricName, ? extends Metric> metrics = this.producer.metrics();
if (metrics == null) {
// MapR's Kafka implementation returns null here.
LOG.info("Producer implementation does not support metrics");
} else {
final MetricGroup kafkaMetricGroup = getRuntimeContext().getMetricGroup().addGroup("KafkaProducer");
for (Map.Entry<MetricName, ? extends Metric> metric : metrics.entrySet()) {
kafkaMetricGroup.gauge(metric.getKey().name(), new KafkaMetricWrapper(metric.getValue()));
}
}
}
if (flushOnCheckpoint && !((StreamingRuntimeContext) this.getRuntimeContext()).isCheckpointingEnabled()) {
LOG.warn("Flushing on checkpoint is enabled, but checkpointing is not enabled. Disabling flushing.");
flushOnCheckpoint = false;
}
if (logFailuresOnly) {
callback = new Callback() {
@Override
public void onCompletion(RecordMetadata metadata, Exception e) {
if (e != null) {
LOG.error("Error while sending record to Kafka: " + e.getMessage(), e);
}
acknowledgeMessage();
}
};
} else {
callback = new Callback() {
@Override
public void onCompletion(RecordMetadata metadata, Exception exception) {
if (exception != null && asyncException == null) {
asyncException = exception;
}
acknowledgeMessage();
}
};
}
}
use of org.apache.flink.streaming.api.operators.StreamingRuntimeContext in project flink by apache.
the class SinkWriterOperator method emitCommittables.
private void emitCommittables(Long checkpointId) throws IOException, InterruptedException {
if (!emitDownstream) {
// although no committables are forwarded
if (sinkWriter instanceof PrecommittingSinkWriter) {
((PrecommittingSinkWriter<?, ?>) sinkWriter).prepareCommit();
}
return;
}
Collection<CommT> committables = ((PrecommittingSinkWriter<?, CommT>) sinkWriter).prepareCommit();
StreamingRuntimeContext runtimeContext = getRuntimeContext();
int indexOfThisSubtask = runtimeContext.getIndexOfThisSubtask();
output.collect(new StreamRecord<>(new CommittableSummary<>(indexOfThisSubtask, runtimeContext.getNumberOfParallelSubtasks(), checkpointId, committables.size(), committables.size(), 0)));
for (CommT committable : committables) {
output.collect(new StreamRecord<>(new CommittableWithLineage<>(committable, checkpointId, indexOfThisSubtask)));
}
}
use of org.apache.flink.streaming.api.operators.StreamingRuntimeContext in project flink by apache.
the class BufferDataOverWindowOperatorTest method test.
private void test(OverWindowFrame[] frames, GenericRowData[] expect) throws Exception {
MockEnvironment env = new MockEnvironmentBuilder().setIOManager(ioManager).setMemoryManager(memoryManager).build();
StreamTask<Object, StreamOperator<Object>> task = new StreamTask<Object, StreamOperator<Object>>(env) {
@Override
protected void init() {
}
};
operator = new BufferDataOverWindowOperator(frames, comparator, true) {
{
output = new NonBufferOverWindowOperatorTest.ConsumerOutput(new Consumer<RowData>() {
@Override
public void accept(RowData r) {
collect.add(GenericRowData.of(r.getInt(0), r.getLong(1), r.getLong(2), r.getLong(3), r.getLong(4)));
}
});
}
@Override
public ClassLoader getUserCodeClassloader() {
return Thread.currentThread().getContextClassLoader();
}
@Override
public StreamConfig getOperatorConfig() {
StreamConfig conf = mock(StreamConfig.class);
when(conf.<RowData>getTypeSerializerIn1(getUserCodeClassloader())).thenReturn(inputSer);
when(conf.getManagedMemoryFractionOperatorUseCaseOfSlot(eq(ManagedMemoryUseCase.OPERATOR), any(Configuration.class), any(ClassLoader.class))).thenReturn(0.99);
return conf;
}
@Override
public StreamTask<?, ?> getContainingTask() {
return task;
}
@Override
public StreamingRuntimeContext getRuntimeContext() {
return mock(StreamingRuntimeContext.class);
}
};
operator.setProcessingTimeService(new TestProcessingTimeService());
operator.open();
addRow(0, 1L, 4L);
/* 1 **/
addRow(0, 1L, 1L);
/* 2 **/
addRow(0, 1L, 1L);
/* 3 **/
addRow(0, 1L, 1L);
/* 4 **/
addRow(1, 5L, 2L);
/* 5 **/
addRow(2, 5L, 4L);
/* 6 **/
addRow(2, 6L, 2L);
/* 7 **/
addRow(2, 6L, 2L);
/* 8 **/
addRow(2, 6L, 2L);
/* 9 **/
operator.endInput();
GenericRowData[] outputs = this.collect.toArray(new GenericRowData[0]);
Assert.assertArrayEquals(expect, outputs);
operator.close();
}
Aggregations