use of org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology in project kafka by apache.
the class StreamsPartitionAssignor method assign.
/*
* This assigns tasks to consumer clients in the following steps.
*
* 0. decode the subscriptions to assemble the metadata for each client and check for version probing
*
* 1. check all repartition source topics and use internal topic manager to make sure
* they have been created with the right number of partitions. Also verify and/or create
* any changelog topics with the correct number of partitions.
*
* 2. use the partition grouper to generate tasks along with their assigned partitions, then use
* the configured TaskAssignor to construct the mapping of tasks to clients.
*
* 3. construct the global mapping of host to partitions to enable query routing.
*
* 4. within each client, assign tasks to consumer clients.
*/
@Override
public GroupAssignment assign(final Cluster metadata, final GroupSubscription groupSubscription) {
final Map<String, Subscription> subscriptions = groupSubscription.groupSubscription();
// ---------------- Step Zero ---------------- //
// construct the client metadata from the decoded subscription info
final Map<UUID, ClientMetadata> clientMetadataMap = new HashMap<>();
final Set<TopicPartition> allOwnedPartitions = new HashSet<>();
int minReceivedMetadataVersion = LATEST_SUPPORTED_VERSION;
int minSupportedMetadataVersion = LATEST_SUPPORTED_VERSION;
boolean shutdownRequested = false;
boolean assignmentErrorFound = false;
int futureMetadataVersion = UNKNOWN;
for (final Map.Entry<String, Subscription> entry : subscriptions.entrySet()) {
final String consumerId = entry.getKey();
final Subscription subscription = entry.getValue();
final SubscriptionInfo info = SubscriptionInfo.decode(subscription.userData());
final int usedVersion = info.version();
if (info.errorCode() == AssignorError.SHUTDOWN_REQUESTED.code()) {
shutdownRequested = true;
}
minReceivedMetadataVersion = updateMinReceivedVersion(usedVersion, minReceivedMetadataVersion);
minSupportedMetadataVersion = updateMinSupportedVersion(info.latestSupportedVersion(), minSupportedMetadataVersion);
final UUID processId;
if (usedVersion > LATEST_SUPPORTED_VERSION) {
futureMetadataVersion = usedVersion;
processId = FUTURE_ID;
if (!clientMetadataMap.containsKey(FUTURE_ID)) {
clientMetadataMap.put(FUTURE_ID, new ClientMetadata(null));
}
} else {
processId = info.processId();
}
ClientMetadata clientMetadata = clientMetadataMap.get(processId);
// create the new client metadata if necessary
if (clientMetadata == null) {
clientMetadata = new ClientMetadata(info.userEndPoint());
clientMetadataMap.put(info.processId(), clientMetadata);
}
// add the consumer and any info in its subscription to the client
clientMetadata.addConsumer(consumerId, subscription.ownedPartitions());
final int prevSize = allOwnedPartitions.size();
allOwnedPartitions.addAll(subscription.ownedPartitions());
if (allOwnedPartitions.size() < prevSize + subscription.ownedPartitions().size()) {
assignmentErrorFound = true;
}
clientMetadata.addPreviousTasksAndOffsetSums(consumerId, info.taskOffsetSums());
}
if (assignmentErrorFound) {
log.warn("The previous assignment contains a partition more than once. " + "\t Mapping: {}", subscriptions);
}
try {
final boolean versionProbing = checkMetadataVersions(minReceivedMetadataVersion, minSupportedMetadataVersion, futureMetadataVersion);
log.debug("Constructed client metadata {} from the member subscriptions.", clientMetadataMap);
if (shutdownRequested) {
return new GroupAssignment(errorAssignment(clientMetadataMap, AssignorError.SHUTDOWN_REQUESTED.code()));
}
// parse the topology to determine the repartition source topics,
// making sure they are created with the number of partitions as
// the maximum of the depending sub-topologies source topics' number of partitions
final RepartitionTopics repartitionTopics = prepareRepartitionTopics(metadata);
final Map<TopicPartition, PartitionInfo> allRepartitionTopicPartitions = repartitionTopics.topicPartitionsInfo();
final Cluster fullMetadata = metadata.withPartitions(allRepartitionTopicPartitions);
log.debug("Created repartition topics {} from the parsed topology.", allRepartitionTopicPartitions.values());
// ---------------- Step Two ---------------- //
// construct the assignment of tasks to clients
final Map<Subtopology, TopicsInfo> topicGroups = taskManager.topologyMetadata().subtopologyTopicsInfoMapExcluding(repartitionTopics.topologiesWithMissingInputTopics());
final Set<String> allSourceTopics = new HashSet<>();
final Map<Subtopology, Set<String>> sourceTopicsByGroup = new HashMap<>();
for (final Map.Entry<Subtopology, TopicsInfo> entry : topicGroups.entrySet()) {
allSourceTopics.addAll(entry.getValue().sourceTopics);
sourceTopicsByGroup.put(entry.getKey(), entry.getValue().sourceTopics);
}
// get the tasks as partition groups from the partition grouper
final Map<TaskId, Set<TopicPartition>> partitionsForTask = partitionGrouper.partitionGroups(sourceTopicsByGroup, fullMetadata);
final Set<TaskId> statefulTasks = new HashSet<>();
final boolean probingRebalanceNeeded = assignTasksToClients(fullMetadata, allSourceTopics, topicGroups, clientMetadataMap, partitionsForTask, statefulTasks);
// ---------------- Step Three ---------------- //
// construct the global partition assignment per host map
final Map<HostInfo, Set<TopicPartition>> partitionsByHost = new HashMap<>();
final Map<HostInfo, Set<TopicPartition>> standbyPartitionsByHost = new HashMap<>();
if (minReceivedMetadataVersion >= 2) {
populatePartitionsByHostMaps(partitionsByHost, standbyPartitionsByHost, partitionsForTask, clientMetadataMap);
}
streamsMetadataState.onChange(partitionsByHost, standbyPartitionsByHost, fullMetadata);
// ---------------- Step Four ---------------- //
// compute the assignment of tasks to threads within each client and build the final group assignment
final Map<String, Assignment> assignment = computeNewAssignment(statefulTasks, clientMetadataMap, partitionsForTask, partitionsByHost, standbyPartitionsByHost, allOwnedPartitions, minReceivedMetadataVersion, minSupportedMetadataVersion, versionProbing, probingRebalanceNeeded);
return new GroupAssignment(assignment);
} catch (final MissingSourceTopicException e) {
log.error("Caught an error in the task assignment. Returning an error assignment.", e);
return new GroupAssignment(errorAssignment(clientMetadataMap, AssignorError.INCOMPLETE_SOURCE_TOPIC_METADATA.code()));
} catch (final TaskAssignmentException e) {
log.error("Caught an error in the task assignment. Returning an error assignment.", e);
return new GroupAssignment(errorAssignment(clientMetadataMap, AssignorError.ASSIGNMENT_ERROR.code()));
}
}
use of org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology in project kafka by apache.
the class StreamsPartitionAssignor method assignTasksToClients.
/**
* Assigns a set of tasks to each client (Streams instance) using the configured task assignor, and also
* populate the stateful tasks that have been assigned to the clients
* @return true if a probing rebalance should be triggered
*/
private boolean assignTasksToClients(final Cluster fullMetadata, final Set<String> allSourceTopics, final Map<Subtopology, TopicsInfo> topicGroups, final Map<UUID, ClientMetadata> clientMetadataMap, final Map<TaskId, Set<TopicPartition>> partitionsForTask, final Set<TaskId> statefulTasks) {
if (!statefulTasks.isEmpty()) {
throw new TaskAssignmentException("The stateful tasks should not be populated before assigning tasks to clients");
}
final Map<TopicPartition, TaskId> taskForPartition = new HashMap<>();
final Map<Subtopology, Set<TaskId>> tasksForTopicGroup = new HashMap<>();
populateTasksForMaps(taskForPartition, tasksForTopicGroup, allSourceTopics, partitionsForTask, fullMetadata);
final ChangelogTopics changelogTopics = new ChangelogTopics(internalTopicManager, topicGroups, tasksForTopicGroup, logPrefix);
changelogTopics.setup();
final Map<UUID, ClientState> clientStates = new HashMap<>();
final boolean lagComputationSuccessful = populateClientStatesMap(clientStates, clientMetadataMap, taskForPartition, changelogTopics);
log.info("All members participating in this rebalance: \n{}.", clientStates.entrySet().stream().map(entry -> entry.getKey() + ": " + entry.getValue().consumers()).collect(Collectors.joining(Utils.NL)));
final Set<TaskId> allTasks = partitionsForTask.keySet();
statefulTasks.addAll(changelogTopics.statefulTaskIds());
log.debug("Assigning tasks {} including stateful {} to clients {} with number of replicas {}", allTasks, statefulTasks, clientStates, numStandbyReplicas());
final TaskAssignor taskAssignor = createTaskAssignor(lagComputationSuccessful);
final boolean probingRebalanceNeeded = taskAssignor.assign(clientStates, allTasks, statefulTasks, assignmentConfigs);
log.info("Assigned tasks {} including stateful {} to clients as: \n{}.", allTasks, statefulTasks, clientStates.entrySet().stream().map(entry -> entry.getKey() + "=" + entry.getValue().currentAssignment()).collect(Collectors.joining(Utils.NL)));
return probingRebalanceNeeded;
}
use of org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology in project kafka by apache.
the class InternalTopologyBuilder method subtopologyToTopicsInfo.
/**
* Returns the map of topic groups keyed by the group id.
* A topic group is a group of topics in the same task.
*
* @return groups of topic names
*/
public synchronized Map<Subtopology, TopicsInfo> subtopologyToTopicsInfo() {
final Map<Subtopology, TopicsInfo> topicGroups = new LinkedHashMap<>();
if (nodeGroups == null) {
nodeGroups = makeNodeGroups();
}
for (final Map.Entry<Integer, Set<String>> entry : nodeGroups.entrySet()) {
final Set<String> sinkTopics = new HashSet<>();
final Set<String> sourceTopics = new HashSet<>();
final Map<String, InternalTopicConfig> repartitionTopics = new HashMap<>();
final Map<String, InternalTopicConfig> stateChangelogTopics = new HashMap<>();
for (final String node : entry.getValue()) {
// if the node is a source node, add to the source topics
final List<String> topics = nodeToSourceTopics.get(node);
if (topics != null) {
// if some of the topics are internal, add them to the internal topics
for (final String topic : topics) {
// skip global topic as they don't need partition assignment
if (globalTopics.contains(topic)) {
continue;
}
if (internalTopicNamesWithProperties.containsKey(topic)) {
// prefix the internal topic name with the application id
final String internalTopic = decorateTopic(topic);
final RepartitionTopicConfig repartitionTopicConfig = buildRepartitionTopicConfig(internalTopic, internalTopicNamesWithProperties.get(topic).getNumberOfPartitions());
repartitionTopics.put(repartitionTopicConfig.name(), repartitionTopicConfig);
sourceTopics.add(repartitionTopicConfig.name());
} else {
sourceTopics.add(topic);
}
}
}
// if the node is a sink node, add to the sink topics
final String topic = nodeToSinkTopic.get(node);
if (topic != null) {
if (internalTopicNamesWithProperties.containsKey(topic)) {
// prefix the change log topic name with the application id
sinkTopics.add(decorateTopic(topic));
} else {
sinkTopics.add(topic);
}
}
// add to the changelog topics
for (final StateStoreFactory<?> stateFactory : stateFactories.values()) {
if (stateFactory.users().contains(node) && storeToChangelogTopic.containsKey(stateFactory.name())) {
final String topicName = storeToChangelogTopic.get(stateFactory.name());
if (!stateChangelogTopics.containsKey(topicName)) {
final InternalTopicConfig internalTopicConfig = createChangelogTopicConfig(stateFactory, topicName);
stateChangelogTopics.put(topicName, internalTopicConfig);
}
}
}
}
if (!sourceTopics.isEmpty()) {
topicGroups.put(new Subtopology(entry.getKey(), topologyName), new TopicsInfo(Collections.unmodifiableSet(sinkTopics), Collections.unmodifiableSet(sourceTopics), Collections.unmodifiableMap(repartitionTopics), Collections.unmodifiableMap(stateChangelogTopics)));
}
}
return Collections.unmodifiableMap(topicGroups);
}
use of org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology in project kafka by apache.
the class ChangelogTopics method setup.
public void setup() {
// add tasks to state change log topic subscribers
final Map<String, InternalTopicConfig> changelogTopicMetadata = new HashMap<>();
for (final Map.Entry<Subtopology, TopicsInfo> entry : topicGroups.entrySet()) {
final Subtopology subtopology = entry.getKey();
final TopicsInfo topicsInfo = entry.getValue();
final Set<TaskId> topicGroupTasks = tasksForTopicGroup.get(subtopology);
if (topicGroupTasks == null) {
log.debug("No tasks found for subtopology {}", subtopology);
continue;
} else if (topicsInfo.stateChangelogTopics.isEmpty()) {
continue;
}
for (final TaskId task : topicGroupTasks) {
final Set<TopicPartition> changelogTopicPartitions = topicsInfo.stateChangelogTopics.keySet().stream().map(topic -> new TopicPartition(topic, task.partition())).collect(Collectors.toSet());
changelogPartitionsForStatefulTask.put(task, changelogTopicPartitions);
}
for (final InternalTopicConfig topicConfig : topicsInfo.nonSourceChangelogTopics()) {
// the expected number of partitions is the max value of TaskId.partition + 1
int numPartitions = UNKNOWN;
for (final TaskId task : topicGroupTasks) {
if (numPartitions < task.partition() + 1) {
numPartitions = task.partition() + 1;
}
}
topicConfig.setNumberOfPartitions(numPartitions);
changelogTopicMetadata.put(topicConfig.name(), topicConfig);
}
sourceTopicBasedChangelogTopics.addAll(topicsInfo.sourceTopicChangelogs());
}
final Set<String> newlyCreatedChangelogTopics = internalTopicManager.makeReady(changelogTopicMetadata);
log.debug("Created state changelog topics {} from the parsed topology.", changelogTopicMetadata.values());
for (final Map.Entry<TaskId, Set<TopicPartition>> entry : changelogPartitionsForStatefulTask.entrySet()) {
final TaskId taskId = entry.getKey();
final Set<TopicPartition> topicPartitions = entry.getValue();
for (final TopicPartition topicPartition : topicPartitions) {
if (!newlyCreatedChangelogTopics.contains(topicPartition.topic())) {
preExistingChangelogPartitionsForTask.computeIfAbsent(taskId, task -> new HashSet<>()).add(topicPartition);
if (!sourceTopicBasedChangelogTopics.contains(topicPartition.topic())) {
preExistingNonSourceTopicBasedChangelogPartitions.add(topicPartition);
} else {
preExistingSourceTopicBasedChangelogPartitions.add(topicPartition);
}
}
}
}
}
use of org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology in project kafka by apache.
the class InternalTopologyBuilderTest method testTopicGroupsByStateStore.
@Test
public void testTopicGroupsByStateStore() {
builder.setApplicationId("X");
builder.addSource(null, "source-1", null, null, null, "topic-1", "topic-1x");
builder.addSource(null, "source-2", null, null, null, "topic-2");
builder.addSource(null, "source-3", null, null, null, "topic-3");
builder.addSource(null, "source-4", null, null, null, "topic-4");
builder.addSource(null, "source-5", null, null, null, "topic-5");
builder.addProcessor("processor-1", new MockApiProcessorSupplier<>(), "source-1");
builder.addProcessor("processor-2", new MockApiProcessorSupplier<>(), "source-2");
builder.addStateStore(new MockKeyValueStoreBuilder("store-1", false), "processor-1", "processor-2");
builder.addProcessor("processor-3", new MockApiProcessorSupplier<>(), "source-3");
builder.addProcessor("processor-4", new MockApiProcessorSupplier<>(), "source-4");
builder.addStateStore(new MockKeyValueStoreBuilder("store-2", false), "processor-3", "processor-4");
builder.addProcessor("processor-5", new MockApiProcessorSupplier<>(), "source-5");
builder.addStateStore(new MockKeyValueStoreBuilder("store-3", false));
builder.connectProcessorAndStateStores("processor-5", "store-3");
builder.buildTopology();
final Map<Subtopology, InternalTopologyBuilder.TopicsInfo> topicGroups = builder.subtopologyToTopicsInfo();
final Map<Subtopology, InternalTopologyBuilder.TopicsInfo> expectedTopicGroups = new HashMap<>();
final String store1 = ProcessorStateManager.storeChangelogTopic("X", "store-1", builder.topologyName());
final String store2 = ProcessorStateManager.storeChangelogTopic("X", "store-2", builder.topologyName());
final String store3 = ProcessorStateManager.storeChangelogTopic("X", "store-3", builder.topologyName());
expectedTopicGroups.put(SUBTOPOLOGY_0, new InternalTopologyBuilder.TopicsInfo(Collections.emptySet(), mkSet("topic-1", "topic-1x", "topic-2"), Collections.emptyMap(), Collections.singletonMap(store1, new UnwindowedChangelogTopicConfig(store1, Collections.emptyMap()))));
expectedTopicGroups.put(SUBTOPOLOGY_1, new InternalTopologyBuilder.TopicsInfo(Collections.emptySet(), mkSet("topic-3", "topic-4"), Collections.emptyMap(), Collections.singletonMap(store2, new UnwindowedChangelogTopicConfig(store2, Collections.emptyMap()))));
expectedTopicGroups.put(SUBTOPOLOGY_2, new InternalTopologyBuilder.TopicsInfo(Collections.emptySet(), mkSet("topic-5"), Collections.emptyMap(), Collections.singletonMap(store3, new UnwindowedChangelogTopicConfig(store3, Collections.emptyMap()))));
assertEquals(3, topicGroups.size());
assertEquals(expectedTopicGroups, topicGroups);
}
Aggregations