use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.
the class KafkaConsumerTestBase method runStartFromSpecificOffsets.
/**
* This test ensures that the consumer correctly uses user-supplied specific offsets when
* explicitly configured to start from specific offsets. For partitions which a specific offset
* can not be found for, the starting position for them should fallback to the group offsets
* behaviour.
*
* <p>4 partitions will have 50 records with offsets 0 to 49. The supplied specific offsets map
* is: partition 0 --> start from offset 19 partition 1 --> not set partition 2 --> start from
* offset 22 partition 3 --> not set partition 4 --> start from offset 26 (this should be
* ignored because the partition does not exist)
*
* <p>The partitions and their committed group offsets are setup as: partition 0 --> committed
* offset 23 partition 1 --> committed offset 31 partition 2 --> committed offset 43 partition 3
* --> no commit offset
*
* <p>When configured to start from these specific offsets, each partition should read:
* partition 0 --> start from offset 19, read to offset 49 (31 records) partition 1 --> fallback
* to group offsets, so start from offset 31, read to offset 49 (19 records) partition 2 -->
* start from offset 22, read to offset 49 (28 records) partition 3 --> fallback to group
* offsets, but since there is no group offset for this partition, will default to
* "auto.offset.reset" (set to "earliest"), so start from offset 0, read to offset 49 (50
* records)
*/
public void runStartFromSpecificOffsets() throws Exception {
// 4 partitions with 50 records each (offsets 0-49)
final int parallelism = 4;
final int recordsInEachPartition = 50;
final String topicName = writeSequence("testStartFromSpecificOffsetsTopic", recordsInEachPartition, parallelism, 1);
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
Properties readProps = new Properties();
readProps.putAll(standardProps);
readProps.setProperty("auto.offset.reset", // partition 3 should default back to this behaviour
"earliest");
Map<KafkaTopicPartition, Long> specificStartupOffsets = new HashMap<>();
specificStartupOffsets.put(new KafkaTopicPartition(topicName, 0), 19L);
specificStartupOffsets.put(new KafkaTopicPartition(topicName, 2), 22L);
specificStartupOffsets.put(new KafkaTopicPartition(topicName, 4), // non-existing partition, should be ignored
26L);
// only the committed offset for partition 1 should be used, because partition 1 has no
// entry in specific offset map
KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = kafkaServer.createOffsetHandler();
kafkaOffsetHandler.setCommittedOffset(topicName, 0, 23);
kafkaOffsetHandler.setCommittedOffset(topicName, 1, 31);
kafkaOffsetHandler.setCommittedOffset(topicName, 2, 43);
Map<Integer, Tuple2<Integer, Integer>> partitionsToValueCountAndStartOffsets = new HashMap<>();
partitionsToValueCountAndStartOffsets.put(0, // partition 0 should read offset 19-49
new Tuple2<>(31, 19));
partitionsToValueCountAndStartOffsets.put(1, // partition 1 should read offset 31-49
new Tuple2<>(19, 31));
partitionsToValueCountAndStartOffsets.put(2, // partition 2 should read offset 22-49
new Tuple2<>(28, 22));
partitionsToValueCountAndStartOffsets.put(3, // partition 3 should read offset 0-49
new Tuple2<>(50, 0));
readSequence(env, StartupMode.SPECIFIC_OFFSETS, specificStartupOffsets, null, readProps, topicName, partitionsToValueCountAndStartOffsets);
kafkaOffsetHandler.close();
deleteTestTopic(topicName);
}
use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.
the class FlinkKafkaConsumerBaseMigrationTest method testRestoreFromEmptyStateWithPartitions.
/**
* Test restoring from an empty state taken using a previous Flink version, when some partitions
* could be found for topics.
*/
@Test
public void testRestoreFromEmptyStateWithPartitions() throws Exception {
final List<KafkaTopicPartition> partitions = new ArrayList<>(PARTITION_STATE.keySet());
final DummyFlinkKafkaConsumer<String> consumerFunction = new DummyFlinkKafkaConsumer<>(TOPICS, partitions, FlinkKafkaConsumerBase.PARTITION_DISCOVERY_DISABLED);
StreamSource<String, DummyFlinkKafkaConsumer<String>> consumerOperator = new StreamSource<>(consumerFunction);
final AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);
testHarness.setTimeCharacteristic(TimeCharacteristic.ProcessingTime);
testHarness.setup();
// restore state from binary snapshot file
testHarness.initializeState(OperatorSnapshotUtil.getResourceFilename("kafka-consumer-migration-test-flink" + testMigrateVersion + "-empty-state-snapshot"));
testHarness.open();
// the expected state in "kafka-consumer-migration-test-flink1.x-snapshot-empty-state";
// all new partitions after the snapshot are considered as partitions that were created
// while the
// consumer wasn't running, and should start from the earliest offset.
final HashMap<KafkaTopicPartition, Long> expectedSubscribedPartitionsWithStartOffsets = new HashMap<>();
for (KafkaTopicPartition partition : PARTITION_STATE.keySet()) {
expectedSubscribedPartitionsWithStartOffsets.put(partition, KafkaTopicPartitionStateSentinel.EARLIEST_OFFSET);
}
// assert that there are partitions and is identical to expected list
assertTrue(consumerFunction.getSubscribedPartitionsToStartOffsets() != null);
assertTrue(!consumerFunction.getSubscribedPartitionsToStartOffsets().isEmpty());
assertEquals(expectedSubscribedPartitionsWithStartOffsets, consumerFunction.getSubscribedPartitionsToStartOffsets());
// the new partitions should have been considered as restored state
assertTrue(consumerFunction.getRestoredState() != null);
assertTrue(!consumerFunction.getSubscribedPartitionsToStartOffsets().isEmpty());
for (Map.Entry<KafkaTopicPartition, Long> expectedEntry : expectedSubscribedPartitionsWithStartOffsets.entrySet()) {
assertEquals(expectedEntry.getValue(), consumerFunction.getRestoredState().get(expectedEntry.getKey()));
}
consumerOperator.close();
consumerOperator.cancel();
}
use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.
the class FlinkKafkaConsumerBaseMigrationTest method writeSnapshot.
private void writeSnapshot(String path, HashMap<KafkaTopicPartition, Long> state) throws Exception {
final OneShotLatch latch = new OneShotLatch();
final AbstractFetcher<String, ?> fetcher = mock(AbstractFetcher.class);
doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
latch.trigger();
return null;
}
}).when(fetcher).runFetchLoop();
when(fetcher.snapshotCurrentState()).thenReturn(state);
final List<KafkaTopicPartition> partitions = new ArrayList<>(PARTITION_STATE.keySet());
final DummyFlinkKafkaConsumer<String> consumerFunction = new DummyFlinkKafkaConsumer<>(fetcher, TOPICS, partitions, FlinkKafkaConsumerBase.PARTITION_DISCOVERY_DISABLED);
StreamSource<String, DummyFlinkKafkaConsumer<String>> consumerOperator = new StreamSource<>(consumerFunction);
final AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);
testHarness.setTimeCharacteristic(TimeCharacteristic.ProcessingTime);
testHarness.setup();
testHarness.open();
final Throwable[] error = new Throwable[1];
// run the source asynchronously
Thread runner = new Thread() {
@Override
public void run() {
try {
consumerFunction.run(new DummySourceContext() {
@Override
public void collect(String element) {
}
});
} catch (Throwable t) {
t.printStackTrace();
error[0] = t;
}
}
};
runner.start();
if (!latch.isTriggered()) {
latch.await();
}
final OperatorSubtaskState snapshot;
synchronized (testHarness.getCheckpointLock()) {
snapshot = testHarness.snapshot(0L, 0L);
}
OperatorSnapshotUtil.writeStateHandle(snapshot, path);
consumerOperator.close();
runner.join();
}
use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.
the class FlinkKafkaConsumerBase method open.
// ------------------------------------------------------------------------
// Work methods
// ------------------------------------------------------------------------
@Override
public void open(Configuration configuration) throws Exception {
// determine the offset commit mode
this.offsetCommitMode = OffsetCommitModes.fromConfiguration(getIsAutoCommitEnabled(), enableCommitOnCheckpoints, ((StreamingRuntimeContext) getRuntimeContext()).isCheckpointingEnabled());
// create the partition discoverer
this.partitionDiscoverer = createPartitionDiscoverer(topicsDescriptor, getRuntimeContext().getIndexOfThisSubtask(), getRuntimeContext().getNumberOfParallelSubtasks());
this.partitionDiscoverer.open();
subscribedPartitionsToStartOffsets = new HashMap<>();
final List<KafkaTopicPartition> allPartitions = partitionDiscoverer.discoverPartitions();
if (restoredState != null) {
for (KafkaTopicPartition partition : allPartitions) {
if (!restoredState.containsKey(partition)) {
restoredState.put(partition, KafkaTopicPartitionStateSentinel.EARLIEST_OFFSET);
}
}
for (Map.Entry<KafkaTopicPartition, Long> restoredStateEntry : restoredState.entrySet()) {
// restored partitions that should not be subscribed by this subtask
if (KafkaTopicPartitionAssigner.assign(restoredStateEntry.getKey(), getRuntimeContext().getNumberOfParallelSubtasks()) == getRuntimeContext().getIndexOfThisSubtask()) {
subscribedPartitionsToStartOffsets.put(restoredStateEntry.getKey(), restoredStateEntry.getValue());
}
}
if (filterRestoredPartitionsWithCurrentTopicsDescriptor) {
subscribedPartitionsToStartOffsets.entrySet().removeIf(entry -> {
if (!topicsDescriptor.isMatchingTopic(entry.getKey().getTopic())) {
LOG.warn("{} is removed from subscribed partitions since it is no longer associated with topics descriptor of current execution.", entry.getKey());
return true;
}
return false;
});
}
LOG.info("Consumer subtask {} will start reading {} partitions with offsets in restored state: {}", getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets.size(), subscribedPartitionsToStartOffsets);
} else {
// when the partition is actually read.
switch(startupMode) {
case SPECIFIC_OFFSETS:
if (specificStartupOffsets == null) {
throw new IllegalStateException("Startup mode for the consumer set to " + StartupMode.SPECIFIC_OFFSETS + ", but no specific offsets were specified.");
}
for (KafkaTopicPartition seedPartition : allPartitions) {
Long specificOffset = specificStartupOffsets.get(seedPartition);
if (specificOffset != null) {
// since the specified offsets represent the next record to read, we
// subtract
// it by one so that the initial state of the consumer will be correct
subscribedPartitionsToStartOffsets.put(seedPartition, specificOffset - 1);
} else {
// default to group offset behaviour if the user-provided specific
// offsets
// do not contain a value for this partition
subscribedPartitionsToStartOffsets.put(seedPartition, KafkaTopicPartitionStateSentinel.GROUP_OFFSET);
}
}
break;
case TIMESTAMP:
if (startupOffsetsTimestamp == null) {
throw new IllegalStateException("Startup mode for the consumer set to " + StartupMode.TIMESTAMP + ", but no startup timestamp was specified.");
}
for (Map.Entry<KafkaTopicPartition, Long> partitionToOffset : fetchOffsetsWithTimestamp(allPartitions, startupOffsetsTimestamp).entrySet()) {
subscribedPartitionsToStartOffsets.put(partitionToOffset.getKey(), (partitionToOffset.getValue() == null) ? // we default to using the latest offset for the partition
KafkaTopicPartitionStateSentinel.LATEST_OFFSET : // be correct
partitionToOffset.getValue() - 1);
}
break;
default:
for (KafkaTopicPartition seedPartition : allPartitions) {
subscribedPartitionsToStartOffsets.put(seedPartition, startupMode.getStateSentinel());
}
}
if (!subscribedPartitionsToStartOffsets.isEmpty()) {
switch(startupMode) {
case EARLIEST:
LOG.info("Consumer subtask {} will start reading the following {} partitions from the earliest offsets: {}", getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets.size(), subscribedPartitionsToStartOffsets.keySet());
break;
case LATEST:
LOG.info("Consumer subtask {} will start reading the following {} partitions from the latest offsets: {}", getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets.size(), subscribedPartitionsToStartOffsets.keySet());
break;
case TIMESTAMP:
LOG.info("Consumer subtask {} will start reading the following {} partitions from timestamp {}: {}", getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets.size(), startupOffsetsTimestamp, subscribedPartitionsToStartOffsets.keySet());
break;
case SPECIFIC_OFFSETS:
LOG.info("Consumer subtask {} will start reading the following {} partitions from the specified startup offsets {}: {}", getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets.size(), specificStartupOffsets, subscribedPartitionsToStartOffsets.keySet());
List<KafkaTopicPartition> partitionsDefaultedToGroupOffsets = new ArrayList<>(subscribedPartitionsToStartOffsets.size());
for (Map.Entry<KafkaTopicPartition, Long> subscribedPartition : subscribedPartitionsToStartOffsets.entrySet()) {
if (subscribedPartition.getValue() == KafkaTopicPartitionStateSentinel.GROUP_OFFSET) {
partitionsDefaultedToGroupOffsets.add(subscribedPartition.getKey());
}
}
if (partitionsDefaultedToGroupOffsets.size() > 0) {
LOG.warn("Consumer subtask {} cannot find offsets for the following {} partitions in the specified startup offsets: {}" + "; their startup offsets will be defaulted to their committed group offsets in Kafka.", getRuntimeContext().getIndexOfThisSubtask(), partitionsDefaultedToGroupOffsets.size(), partitionsDefaultedToGroupOffsets);
}
break;
case GROUP_OFFSETS:
LOG.info("Consumer subtask {} will start reading the following {} partitions from the committed group offsets in Kafka: {}", getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets.size(), subscribedPartitionsToStartOffsets.keySet());
}
} else {
LOG.info("Consumer subtask {} initially has no partitions to read from.", getRuntimeContext().getIndexOfThisSubtask());
}
}
this.deserializer.open(RuntimeContextInitializationContextAdapters.deserializationAdapter(getRuntimeContext(), metricGroup -> metricGroup.addGroup("user")));
}
use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.
the class FlinkKafkaConsumerBase method initializeState.
// ------------------------------------------------------------------------
// Checkpoint and restore
// ------------------------------------------------------------------------
@Override
public final void initializeState(FunctionInitializationContext context) throws Exception {
OperatorStateStore stateStore = context.getOperatorStateStore();
this.unionOffsetStates = stateStore.getUnionListState(new ListStateDescriptor<>(OFFSETS_STATE_NAME, createStateSerializer(getRuntimeContext().getExecutionConfig())));
if (context.isRestored()) {
restoredState = new TreeMap<>(new KafkaTopicPartition.Comparator());
// populate actual holder for restored state
for (Tuple2<KafkaTopicPartition, Long> kafkaOffset : unionOffsetStates.get()) {
restoredState.put(kafkaOffset.f0, kafkaOffset.f1);
}
LOG.info("Consumer subtask {} restored state: {}.", getRuntimeContext().getIndexOfThisSubtask(), restoredState);
} else {
LOG.info("Consumer subtask {} has no restore state.", getRuntimeContext().getIndexOfThisSubtask());
}
}
Aggregations