use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.
the class FlinkKafkaConsumerBase method logPartitionInfo.
/**
* Logs the partition information in INFO level.
*
* @param logger The logger to log to.
* @param partitionInfos List of subscribed partitions
*/
protected static void logPartitionInfo(Logger logger, List<KafkaTopicPartition> partitionInfos) {
Map<String, Integer> countPerTopic = new HashMap<>();
for (KafkaTopicPartition partition : partitionInfos) {
Integer count = countPerTopic.get(partition.getTopic());
if (count == null) {
count = 1;
} else {
count++;
}
countPerTopic.put(partition.getTopic(), count);
}
StringBuilder sb = new StringBuilder("Consumer is going to read the following topics (with number of partitions): ");
for (Map.Entry<String, Integer> e : countPerTopic.entrySet()) {
sb.append(e.getKey()).append(" (").append(e.getValue()).append("), ");
}
logger.info(sb.toString());
}
use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.
the class KafkaConsumerTestBase method readSequence.
// ------------------------------------------------------------------------
// Reading writing test data sets
// ------------------------------------------------------------------------
/**
* Runs a job using the provided environment to read a sequence of records from a single Kafka
* topic. The method allows to individually specify the expected starting offset and total read
* value count of each partition. The job will be considered successful only if all partition
* read results match the start offset and value count criteria.
*/
protected void readSequence(final StreamExecutionEnvironment env, final StartupMode startupMode, final Map<KafkaTopicPartition, Long> specificStartupOffsets, final Long startupTimestamp, final Properties cc, final String topicName, final Map<Integer, Tuple2<Integer, Integer>> partitionsToValuesCountAndStartOffset) throws Exception {
final int sourceParallelism = partitionsToValuesCountAndStartOffset.keySet().size();
int finalCountTmp = 0;
for (Map.Entry<Integer, Tuple2<Integer, Integer>> valuesCountAndStartOffset : partitionsToValuesCountAndStartOffset.entrySet()) {
finalCountTmp += valuesCountAndStartOffset.getValue().f0;
}
final int finalCount = finalCountTmp;
final TypeInformation<Tuple2<Integer, Integer>> intIntTupleType = TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {
});
final TypeInformationSerializationSchema<Tuple2<Integer, Integer>> deser = new TypeInformationSerializationSchema<>(intIntTupleType, env.getConfig());
// create the consumer
cc.putAll(secureProps);
DataStreamSource<Tuple2<Integer, Integer>> source;
if (useNewSource) {
KafkaSourceBuilder<Tuple2<Integer, Integer>> sourceBuilder = kafkaServer.getSourceBuilder(topicName, deser, cc);
Map<TopicPartition, Long> startOffsets = new HashMap<>();
if (specificStartupOffsets != null) {
specificStartupOffsets.forEach((ktp, offset) -> startOffsets.put(new TopicPartition(ktp.getTopic(), ktp.getPartition()), offset));
}
setKafkaSourceOffset(startupMode, sourceBuilder, startOffsets, startupTimestamp);
source = env.fromSource(sourceBuilder.build(), WatermarkStrategy.noWatermarks(), "KafkaSource");
} else {
FlinkKafkaConsumerBase<Tuple2<Integer, Integer>> consumer = kafkaServer.getConsumer(topicName, deser, cc);
setKafkaConsumerOffset(startupMode, consumer, specificStartupOffsets, startupTimestamp);
source = env.addSource(consumer);
}
source.setParallelism(sourceParallelism).map(new ThrottledMapper<>(20)).setParallelism(sourceParallelism).flatMap(new RichFlatMapFunction<Tuple2<Integer, Integer>, Integer>() {
private HashMap<Integer, BitSet> partitionsToValueCheck;
private int count = 0;
@Override
public void open(Configuration parameters) throws Exception {
partitionsToValueCheck = new HashMap<>();
for (Integer partition : partitionsToValuesCountAndStartOffset.keySet()) {
partitionsToValueCheck.put(partition, new BitSet());
}
}
@Override
public void flatMap(Tuple2<Integer, Integer> value, Collector<Integer> out) throws Exception {
int partition = value.f0;
int val = value.f1;
BitSet bitSet = partitionsToValueCheck.get(partition);
if (bitSet == null) {
throw new RuntimeException("Got a record from an unknown partition");
} else {
bitSet.set(val - partitionsToValuesCountAndStartOffset.get(partition).f1);
}
count++;
LOG.info("Received message {}, total {} messages", value, count);
// verify if we've seen everything
if (count == finalCount) {
for (Map.Entry<Integer, BitSet> partitionsToValueCheck : this.partitionsToValueCheck.entrySet()) {
BitSet check = partitionsToValueCheck.getValue();
int expectedValueCount = partitionsToValuesCountAndStartOffset.get(partitionsToValueCheck.getKey()).f0;
if (check.cardinality() != expectedValueCount) {
throw new RuntimeException("Expected cardinality to be " + expectedValueCount + ", but was " + check.cardinality());
} else if (check.nextClearBit(0) != expectedValueCount) {
throw new RuntimeException("Expected next clear bit to be " + expectedValueCount + ", but was " + check.cardinality());
}
}
// test has passed
throw new SuccessException();
}
}
}).setParallelism(1);
tryExecute(env, "Read data from Kafka");
LOG.info("Successfully read sequence for verification");
}
use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.
the class KafkaConnectorOptionsUtil method getStartupOptions.
public static StartupOptions getStartupOptions(ReadableConfig tableOptions) {
final Map<KafkaTopicPartition, Long> specificOffsets = new HashMap<>();
final StartupMode startupMode = tableOptions.getOptional(SCAN_STARTUP_MODE).map(KafkaConnectorOptionsUtil::fromOption).orElse(StartupMode.GROUP_OFFSETS);
if (startupMode == StartupMode.SPECIFIC_OFFSETS) {
// It will be refactored after support specific offset for multiple topics in
// FLINK-18602. We have already checked tableOptions.get(TOPIC) contains one topic in
// validateScanStartupMode().
buildSpecificOffsets(tableOptions, tableOptions.get(TOPIC).get(0), specificOffsets);
}
final StartupOptions options = new StartupOptions();
options.startupMode = startupMode;
options.specificOffsets = specificOffsets;
if (startupMode == StartupMode.TIMESTAMP) {
options.startupTimestampMillis = tableOptions.get(SCAN_STARTUP_TIMESTAMP_MILLIS);
}
return options;
}
use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.
the class FlinkKafkaConsumerBaseTest method testSnapshotStateWithCommitOnCheckpointsEnabled.
@Test
@SuppressWarnings("unchecked")
public void testSnapshotStateWithCommitOnCheckpointsEnabled() throws Exception {
// --------------------------------------------------------------------
// prepare fake states
// --------------------------------------------------------------------
final HashMap<KafkaTopicPartition, Long> state1 = new HashMap<>();
state1.put(new KafkaTopicPartition("abc", 13), 16768L);
state1.put(new KafkaTopicPartition("def", 7), 987654321L);
final HashMap<KafkaTopicPartition, Long> state2 = new HashMap<>();
state2.put(new KafkaTopicPartition("abc", 13), 16770L);
state2.put(new KafkaTopicPartition("def", 7), 987654329L);
final HashMap<KafkaTopicPartition, Long> state3 = new HashMap<>();
state3.put(new KafkaTopicPartition("abc", 13), 16780L);
state3.put(new KafkaTopicPartition("def", 7), 987654377L);
// --------------------------------------------------------------------
final MockFetcher<String> fetcher = new MockFetcher<>(state1, state2, state3);
final FlinkKafkaConsumerBase<String> consumer = new DummyFlinkKafkaConsumer<>(fetcher, mock(AbstractPartitionDiscoverer.class), false);
final TestingListState<Serializable> listState = new TestingListState<>();
// setup and run the consumer; wait until the consumer reaches the main fetch loop before
// continuing test
setupConsumer(consumer, false, listState, true, 0, 1);
final CheckedThread runThread = new CheckedThread() {
@Override
public void go() throws Exception {
consumer.run(new TestSourceContext<>());
}
};
runThread.start();
fetcher.waitUntilRun();
assertEquals(0, consumer.getPendingOffsetsToCommit().size());
// checkpoint 1
consumer.snapshotState(new StateSnapshotContextSynchronousImpl(138, 138));
HashMap<KafkaTopicPartition, Long> snapshot1 = new HashMap<>();
for (Serializable serializable : listState.get()) {
Tuple2<KafkaTopicPartition, Long> kafkaTopicPartitionLongTuple2 = (Tuple2<KafkaTopicPartition, Long>) serializable;
snapshot1.put(kafkaTopicPartitionLongTuple2.f0, kafkaTopicPartitionLongTuple2.f1);
}
assertEquals(state1, snapshot1);
assertEquals(1, consumer.getPendingOffsetsToCommit().size());
assertEquals(state1, consumer.getPendingOffsetsToCommit().get(138L));
// checkpoint 2
consumer.snapshotState(new StateSnapshotContextSynchronousImpl(140, 140));
HashMap<KafkaTopicPartition, Long> snapshot2 = new HashMap<>();
for (Serializable serializable : listState.get()) {
Tuple2<KafkaTopicPartition, Long> kafkaTopicPartitionLongTuple2 = (Tuple2<KafkaTopicPartition, Long>) serializable;
snapshot2.put(kafkaTopicPartitionLongTuple2.f0, kafkaTopicPartitionLongTuple2.f1);
}
assertEquals(state2, snapshot2);
assertEquals(2, consumer.getPendingOffsetsToCommit().size());
assertEquals(state2, consumer.getPendingOffsetsToCommit().get(140L));
// ack checkpoint 1
consumer.notifyCheckpointComplete(138L);
assertEquals(1, consumer.getPendingOffsetsToCommit().size());
assertTrue(consumer.getPendingOffsetsToCommit().containsKey(140L));
assertEquals(state1, fetcher.getAndClearLastCommittedOffsets());
assertEquals(1, fetcher.getCommitCount());
// checkpoint 3
consumer.snapshotState(new StateSnapshotContextSynchronousImpl(141, 141));
HashMap<KafkaTopicPartition, Long> snapshot3 = new HashMap<>();
for (Serializable serializable : listState.get()) {
Tuple2<KafkaTopicPartition, Long> kafkaTopicPartitionLongTuple2 = (Tuple2<KafkaTopicPartition, Long>) serializable;
snapshot3.put(kafkaTopicPartitionLongTuple2.f0, kafkaTopicPartitionLongTuple2.f1);
}
assertEquals(state3, snapshot3);
assertEquals(2, consumer.getPendingOffsetsToCommit().size());
assertEquals(state3, consumer.getPendingOffsetsToCommit().get(141L));
// ack checkpoint 3, subsumes number 2
consumer.notifyCheckpointComplete(141L);
assertEquals(0, consumer.getPendingOffsetsToCommit().size());
assertEquals(state3, fetcher.getAndClearLastCommittedOffsets());
assertEquals(2, fetcher.getCommitCount());
// invalid checkpoint
consumer.notifyCheckpointComplete(666);
assertEquals(0, consumer.getPendingOffsetsToCommit().size());
assertNull(fetcher.getAndClearLastCommittedOffsets());
assertEquals(2, fetcher.getCommitCount());
consumer.cancel();
runThread.sync();
}
use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.
the class FlinkKafkaConsumerBaseTest method testSnapshotStateWithCommitOnCheckpointsDisabled.
@Test
@SuppressWarnings("unchecked")
public void testSnapshotStateWithCommitOnCheckpointsDisabled() throws Exception {
// --------------------------------------------------------------------
// prepare fake states
// --------------------------------------------------------------------
final HashMap<KafkaTopicPartition, Long> state1 = new HashMap<>();
state1.put(new KafkaTopicPartition("abc", 13), 16768L);
state1.put(new KafkaTopicPartition("def", 7), 987654321L);
final HashMap<KafkaTopicPartition, Long> state2 = new HashMap<>();
state2.put(new KafkaTopicPartition("abc", 13), 16770L);
state2.put(new KafkaTopicPartition("def", 7), 987654329L);
final HashMap<KafkaTopicPartition, Long> state3 = new HashMap<>();
state3.put(new KafkaTopicPartition("abc", 13), 16780L);
state3.put(new KafkaTopicPartition("def", 7), 987654377L);
// --------------------------------------------------------------------
final MockFetcher<String> fetcher = new MockFetcher<>(state1, state2, state3);
final FlinkKafkaConsumerBase<String> consumer = new DummyFlinkKafkaConsumer<>(fetcher, mock(AbstractPartitionDiscoverer.class), false);
// disable offset committing
consumer.setCommitOffsetsOnCheckpoints(false);
final TestingListState<Serializable> listState = new TestingListState<>();
// setup and run the consumer; wait until the consumer reaches the main fetch loop before
// continuing test
setupConsumer(consumer, false, listState, true, 0, 1);
final CheckedThread runThread = new CheckedThread() {
@Override
public void go() throws Exception {
consumer.run(new TestSourceContext<>());
}
};
runThread.start();
fetcher.waitUntilRun();
assertEquals(0, consumer.getPendingOffsetsToCommit().size());
// checkpoint 1
consumer.snapshotState(new StateSnapshotContextSynchronousImpl(138, 138));
HashMap<KafkaTopicPartition, Long> snapshot1 = new HashMap<>();
for (Serializable serializable : listState.get()) {
Tuple2<KafkaTopicPartition, Long> kafkaTopicPartitionLongTuple2 = (Tuple2<KafkaTopicPartition, Long>) serializable;
snapshot1.put(kafkaTopicPartitionLongTuple2.f0, kafkaTopicPartitionLongTuple2.f1);
}
assertEquals(state1, snapshot1);
assertEquals(0, consumer.getPendingOffsetsToCommit().size());
// checkpoint 2
consumer.snapshotState(new StateSnapshotContextSynchronousImpl(140, 140));
HashMap<KafkaTopicPartition, Long> snapshot2 = new HashMap<>();
for (Serializable serializable : listState.get()) {
Tuple2<KafkaTopicPartition, Long> kafkaTopicPartitionLongTuple2 = (Tuple2<KafkaTopicPartition, Long>) serializable;
snapshot2.put(kafkaTopicPartitionLongTuple2.f0, kafkaTopicPartitionLongTuple2.f1);
}
assertEquals(state2, snapshot2);
assertEquals(0, consumer.getPendingOffsetsToCommit().size());
// ack checkpoint 1
consumer.notifyCheckpointComplete(138L);
assertEquals(0, fetcher.getCommitCount());
// no offsets should be committed
assertNull(fetcher.getAndClearLastCommittedOffsets());
// checkpoint 3
consumer.snapshotState(new StateSnapshotContextSynchronousImpl(141, 141));
HashMap<KafkaTopicPartition, Long> snapshot3 = new HashMap<>();
for (Serializable serializable : listState.get()) {
Tuple2<KafkaTopicPartition, Long> kafkaTopicPartitionLongTuple2 = (Tuple2<KafkaTopicPartition, Long>) serializable;
snapshot3.put(kafkaTopicPartitionLongTuple2.f0, kafkaTopicPartitionLongTuple2.f1);
}
assertEquals(state3, snapshot3);
assertEquals(0, consumer.getPendingOffsetsToCommit().size());
// ack checkpoint 3, subsumes number 2
consumer.notifyCheckpointComplete(141L);
assertEquals(0, fetcher.getCommitCount());
// no offsets should be committed
assertNull(fetcher.getAndClearLastCommittedOffsets());
// invalid checkpoint
consumer.notifyCheckpointComplete(666);
assertEquals(0, fetcher.getCommitCount());
// no offsets should be committed
assertNull(fetcher.getAndClearLastCommittedOffsets());
consumer.cancel();
runThread.sync();
}
Aggregations