Search in sources :

Example 6 with Subtopology

use of org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology in project kafka by apache.

the class StreamsPartitionAssignor method assign.

/*
     * This assigns tasks to consumer clients in the following steps.
     *
     * 0. decode the subscriptions to assemble the metadata for each client and check for version probing
     *
     * 1. check all repartition source topics and use internal topic manager to make sure
     *    they have been created with the right number of partitions. Also verify and/or create
     *    any changelog topics with the correct number of partitions.
     *
     * 2. use the partition grouper to generate tasks along with their assigned partitions, then use
     *    the configured TaskAssignor to construct the mapping of tasks to clients.
     *
     * 3. construct the global mapping of host to partitions to enable query routing.
     *
     * 4. within each client, assign tasks to consumer clients.
     */
@Override
public GroupAssignment assign(final Cluster metadata, final GroupSubscription groupSubscription) {
    final Map<String, Subscription> subscriptions = groupSubscription.groupSubscription();
    // ---------------- Step Zero ---------------- //
    // construct the client metadata from the decoded subscription info
    final Map<UUID, ClientMetadata> clientMetadataMap = new HashMap<>();
    final Set<TopicPartition> allOwnedPartitions = new HashSet<>();
    int minReceivedMetadataVersion = LATEST_SUPPORTED_VERSION;
    int minSupportedMetadataVersion = LATEST_SUPPORTED_VERSION;
    boolean shutdownRequested = false;
    boolean assignmentErrorFound = false;
    int futureMetadataVersion = UNKNOWN;
    for (final Map.Entry<String, Subscription> entry : subscriptions.entrySet()) {
        final String consumerId = entry.getKey();
        final Subscription subscription = entry.getValue();
        final SubscriptionInfo info = SubscriptionInfo.decode(subscription.userData());
        final int usedVersion = info.version();
        if (info.errorCode() == AssignorError.SHUTDOWN_REQUESTED.code()) {
            shutdownRequested = true;
        }
        minReceivedMetadataVersion = updateMinReceivedVersion(usedVersion, minReceivedMetadataVersion);
        minSupportedMetadataVersion = updateMinSupportedVersion(info.latestSupportedVersion(), minSupportedMetadataVersion);
        final UUID processId;
        if (usedVersion > LATEST_SUPPORTED_VERSION) {
            futureMetadataVersion = usedVersion;
            processId = FUTURE_ID;
            if (!clientMetadataMap.containsKey(FUTURE_ID)) {
                clientMetadataMap.put(FUTURE_ID, new ClientMetadata(null));
            }
        } else {
            processId = info.processId();
        }
        ClientMetadata clientMetadata = clientMetadataMap.get(processId);
        // create the new client metadata if necessary
        if (clientMetadata == null) {
            clientMetadata = new ClientMetadata(info.userEndPoint());
            clientMetadataMap.put(info.processId(), clientMetadata);
        }
        // add the consumer and any info in its subscription to the client
        clientMetadata.addConsumer(consumerId, subscription.ownedPartitions());
        final int prevSize = allOwnedPartitions.size();
        allOwnedPartitions.addAll(subscription.ownedPartitions());
        if (allOwnedPartitions.size() < prevSize + subscription.ownedPartitions().size()) {
            assignmentErrorFound = true;
        }
        clientMetadata.addPreviousTasksAndOffsetSums(consumerId, info.taskOffsetSums());
    }
    if (assignmentErrorFound) {
        log.warn("The previous assignment contains a partition more than once. " + "\t Mapping: {}", subscriptions);
    }
    try {
        final boolean versionProbing = checkMetadataVersions(minReceivedMetadataVersion, minSupportedMetadataVersion, futureMetadataVersion);
        log.debug("Constructed client metadata {} from the member subscriptions.", clientMetadataMap);
        if (shutdownRequested) {
            return new GroupAssignment(errorAssignment(clientMetadataMap, AssignorError.SHUTDOWN_REQUESTED.code()));
        }
        // parse the topology to determine the repartition source topics,
        // making sure they are created with the number of partitions as
        // the maximum of the depending sub-topologies source topics' number of partitions
        final RepartitionTopics repartitionTopics = prepareRepartitionTopics(metadata);
        final Map<TopicPartition, PartitionInfo> allRepartitionTopicPartitions = repartitionTopics.topicPartitionsInfo();
        final Cluster fullMetadata = metadata.withPartitions(allRepartitionTopicPartitions);
        log.debug("Created repartition topics {} from the parsed topology.", allRepartitionTopicPartitions.values());
        // ---------------- Step Two ---------------- //
        // construct the assignment of tasks to clients
        final Map<Subtopology, TopicsInfo> topicGroups = taskManager.topologyMetadata().subtopologyTopicsInfoMapExcluding(repartitionTopics.topologiesWithMissingInputTopics());
        final Set<String> allSourceTopics = new HashSet<>();
        final Map<Subtopology, Set<String>> sourceTopicsByGroup = new HashMap<>();
        for (final Map.Entry<Subtopology, TopicsInfo> entry : topicGroups.entrySet()) {
            allSourceTopics.addAll(entry.getValue().sourceTopics);
            sourceTopicsByGroup.put(entry.getKey(), entry.getValue().sourceTopics);
        }
        // get the tasks as partition groups from the partition grouper
        final Map<TaskId, Set<TopicPartition>> partitionsForTask = partitionGrouper.partitionGroups(sourceTopicsByGroup, fullMetadata);
        final Set<TaskId> statefulTasks = new HashSet<>();
        final boolean probingRebalanceNeeded = assignTasksToClients(fullMetadata, allSourceTopics, topicGroups, clientMetadataMap, partitionsForTask, statefulTasks);
        // ---------------- Step Three ---------------- //
        // construct the global partition assignment per host map
        final Map<HostInfo, Set<TopicPartition>> partitionsByHost = new HashMap<>();
        final Map<HostInfo, Set<TopicPartition>> standbyPartitionsByHost = new HashMap<>();
        if (minReceivedMetadataVersion >= 2) {
            populatePartitionsByHostMaps(partitionsByHost, standbyPartitionsByHost, partitionsForTask, clientMetadataMap);
        }
        streamsMetadataState.onChange(partitionsByHost, standbyPartitionsByHost, fullMetadata);
        // ---------------- Step Four ---------------- //
        // compute the assignment of tasks to threads within each client and build the final group assignment
        final Map<String, Assignment> assignment = computeNewAssignment(statefulTasks, clientMetadataMap, partitionsForTask, partitionsByHost, standbyPartitionsByHost, allOwnedPartitions, minReceivedMetadataVersion, minSupportedMetadataVersion, versionProbing, probingRebalanceNeeded);
        return new GroupAssignment(assignment);
    } catch (final MissingSourceTopicException e) {
        log.error("Caught an error in the task assignment. Returning an error assignment.", e);
        return new GroupAssignment(errorAssignment(clientMetadataMap, AssignorError.INCOMPLETE_SOURCE_TOPIC_METADATA.code()));
    } catch (final TaskAssignmentException e) {
        log.error("Caught an error in the task assignment. Returning an error assignment.", e);
        return new GroupAssignment(errorAssignment(clientMetadataMap, AssignorError.ASSIGNMENT_ERROR.code()));
    }
}
Also used : SortedSet(java.util.SortedSet) Set(java.util.Set) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) TaskId(org.apache.kafka.streams.processor.TaskId) HashMap(java.util.HashMap) SubscriptionInfo(org.apache.kafka.streams.processor.internals.assignment.SubscriptionInfo) TopicsInfo(org.apache.kafka.streams.processor.internals.InternalTopologyBuilder.TopicsInfo) PartitionInfo(org.apache.kafka.common.PartitionInfo) UUID(java.util.UUID) UUID.randomUUID(java.util.UUID.randomUUID) HashSet(java.util.HashSet) TaskAssignmentException(org.apache.kafka.streams.errors.TaskAssignmentException) Cluster(org.apache.kafka.common.Cluster) MissingSourceTopicException(org.apache.kafka.streams.errors.MissingSourceTopicException) TopicPartition(org.apache.kafka.common.TopicPartition) Map(java.util.Map) Utils.filterMap(org.apache.kafka.common.utils.Utils.filterMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) Subtopology(org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology) HostInfo(org.apache.kafka.streams.state.HostInfo)

Example 7 with Subtopology

use of org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology in project kafka by apache.

the class StreamsPartitionAssignor method assignTasksToClients.

/**
 * Assigns a set of tasks to each client (Streams instance) using the configured task assignor, and also
 * populate the stateful tasks that have been assigned to the clients
 * @return true if a probing rebalance should be triggered
 */
private boolean assignTasksToClients(final Cluster fullMetadata, final Set<String> allSourceTopics, final Map<Subtopology, TopicsInfo> topicGroups, final Map<UUID, ClientMetadata> clientMetadataMap, final Map<TaskId, Set<TopicPartition>> partitionsForTask, final Set<TaskId> statefulTasks) {
    if (!statefulTasks.isEmpty()) {
        throw new TaskAssignmentException("The stateful tasks should not be populated before assigning tasks to clients");
    }
    final Map<TopicPartition, TaskId> taskForPartition = new HashMap<>();
    final Map<Subtopology, Set<TaskId>> tasksForTopicGroup = new HashMap<>();
    populateTasksForMaps(taskForPartition, tasksForTopicGroup, allSourceTopics, partitionsForTask, fullMetadata);
    final ChangelogTopics changelogTopics = new ChangelogTopics(internalTopicManager, topicGroups, tasksForTopicGroup, logPrefix);
    changelogTopics.setup();
    final Map<UUID, ClientState> clientStates = new HashMap<>();
    final boolean lagComputationSuccessful = populateClientStatesMap(clientStates, clientMetadataMap, taskForPartition, changelogTopics);
    log.info("All members participating in this rebalance: \n{}.", clientStates.entrySet().stream().map(entry -> entry.getKey() + ": " + entry.getValue().consumers()).collect(Collectors.joining(Utils.NL)));
    final Set<TaskId> allTasks = partitionsForTask.keySet();
    statefulTasks.addAll(changelogTopics.statefulTaskIds());
    log.debug("Assigning tasks {} including stateful {} to clients {} with number of replicas {}", allTasks, statefulTasks, clientStates, numStandbyReplicas());
    final TaskAssignor taskAssignor = createTaskAssignor(lagComputationSuccessful);
    final boolean probingRebalanceNeeded = taskAssignor.assign(clientStates, allTasks, statefulTasks, assignmentConfigs);
    log.info("Assigned tasks {} including stateful {} to clients as: \n{}.", allTasks, statefulTasks, clientStates.entrySet().stream().map(entry -> entry.getKey() + "=" + entry.getValue().currentAssignment()).collect(Collectors.joining(Utils.NL)));
    return probingRebalanceNeeded;
}
Also used : TaskAssignmentException(org.apache.kafka.streams.errors.TaskAssignmentException) ClientState(org.apache.kafka.streams.processor.internals.assignment.ClientState) TaskId(org.apache.kafka.streams.processor.TaskId) SortedSet(java.util.SortedSet) Set(java.util.Set) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) HashMap(java.util.HashMap) FallbackPriorTaskAssignor(org.apache.kafka.streams.processor.internals.assignment.FallbackPriorTaskAssignor) StickyTaskAssignor(org.apache.kafka.streams.processor.internals.assignment.StickyTaskAssignor) TaskAssignor(org.apache.kafka.streams.processor.internals.assignment.TaskAssignor) TopicPartition(org.apache.kafka.common.TopicPartition) UUID(java.util.UUID) UUID.randomUUID(java.util.UUID.randomUUID) Subtopology(org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology)

Example 8 with Subtopology

use of org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology in project kafka by apache.

the class InternalTopologyBuilder method subtopologyToTopicsInfo.

/**
 * Returns the map of topic groups keyed by the group id.
 * A topic group is a group of topics in the same task.
 *
 * @return groups of topic names
 */
public synchronized Map<Subtopology, TopicsInfo> subtopologyToTopicsInfo() {
    final Map<Subtopology, TopicsInfo> topicGroups = new LinkedHashMap<>();
    if (nodeGroups == null) {
        nodeGroups = makeNodeGroups();
    }
    for (final Map.Entry<Integer, Set<String>> entry : nodeGroups.entrySet()) {
        final Set<String> sinkTopics = new HashSet<>();
        final Set<String> sourceTopics = new HashSet<>();
        final Map<String, InternalTopicConfig> repartitionTopics = new HashMap<>();
        final Map<String, InternalTopicConfig> stateChangelogTopics = new HashMap<>();
        for (final String node : entry.getValue()) {
            // if the node is a source node, add to the source topics
            final List<String> topics = nodeToSourceTopics.get(node);
            if (topics != null) {
                // if some of the topics are internal, add them to the internal topics
                for (final String topic : topics) {
                    // skip global topic as they don't need partition assignment
                    if (globalTopics.contains(topic)) {
                        continue;
                    }
                    if (internalTopicNamesWithProperties.containsKey(topic)) {
                        // prefix the internal topic name with the application id
                        final String internalTopic = decorateTopic(topic);
                        final RepartitionTopicConfig repartitionTopicConfig = buildRepartitionTopicConfig(internalTopic, internalTopicNamesWithProperties.get(topic).getNumberOfPartitions());
                        repartitionTopics.put(repartitionTopicConfig.name(), repartitionTopicConfig);
                        sourceTopics.add(repartitionTopicConfig.name());
                    } else {
                        sourceTopics.add(topic);
                    }
                }
            }
            // if the node is a sink node, add to the sink topics
            final String topic = nodeToSinkTopic.get(node);
            if (topic != null) {
                if (internalTopicNamesWithProperties.containsKey(topic)) {
                    // prefix the change log topic name with the application id
                    sinkTopics.add(decorateTopic(topic));
                } else {
                    sinkTopics.add(topic);
                }
            }
            // add to the changelog topics
            for (final StateStoreFactory<?> stateFactory : stateFactories.values()) {
                if (stateFactory.users().contains(node) && storeToChangelogTopic.containsKey(stateFactory.name())) {
                    final String topicName = storeToChangelogTopic.get(stateFactory.name());
                    if (!stateChangelogTopics.containsKey(topicName)) {
                        final InternalTopicConfig internalTopicConfig = createChangelogTopicConfig(stateFactory, topicName);
                        stateChangelogTopics.put(topicName, internalTopicConfig);
                    }
                }
            }
        }
        if (!sourceTopics.isEmpty()) {
            topicGroups.put(new Subtopology(entry.getKey(), topologyName), new TopicsInfo(Collections.unmodifiableSet(sinkTopics), Collections.unmodifiableSet(sourceTopics), Collections.unmodifiableMap(repartitionTopics), Collections.unmodifiableMap(stateChangelogTopics)));
        }
    }
    return Collections.unmodifiableMap(topicGroups);
}
Also used : TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) LinkedHashMap(java.util.LinkedHashMap) Subtopology(org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) HashSet(java.util.HashSet)

Example 9 with Subtopology

use of org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology in project kafka by apache.

the class ChangelogTopics method setup.

public void setup() {
    // add tasks to state change log topic subscribers
    final Map<String, InternalTopicConfig> changelogTopicMetadata = new HashMap<>();
    for (final Map.Entry<Subtopology, TopicsInfo> entry : topicGroups.entrySet()) {
        final Subtopology subtopology = entry.getKey();
        final TopicsInfo topicsInfo = entry.getValue();
        final Set<TaskId> topicGroupTasks = tasksForTopicGroup.get(subtopology);
        if (topicGroupTasks == null) {
            log.debug("No tasks found for subtopology {}", subtopology);
            continue;
        } else if (topicsInfo.stateChangelogTopics.isEmpty()) {
            continue;
        }
        for (final TaskId task : topicGroupTasks) {
            final Set<TopicPartition> changelogTopicPartitions = topicsInfo.stateChangelogTopics.keySet().stream().map(topic -> new TopicPartition(topic, task.partition())).collect(Collectors.toSet());
            changelogPartitionsForStatefulTask.put(task, changelogTopicPartitions);
        }
        for (final InternalTopicConfig topicConfig : topicsInfo.nonSourceChangelogTopics()) {
            // the expected number of partitions is the max value of TaskId.partition + 1
            int numPartitions = UNKNOWN;
            for (final TaskId task : topicGroupTasks) {
                if (numPartitions < task.partition() + 1) {
                    numPartitions = task.partition() + 1;
                }
            }
            topicConfig.setNumberOfPartitions(numPartitions);
            changelogTopicMetadata.put(topicConfig.name(), topicConfig);
        }
        sourceTopicBasedChangelogTopics.addAll(topicsInfo.sourceTopicChangelogs());
    }
    final Set<String> newlyCreatedChangelogTopics = internalTopicManager.makeReady(changelogTopicMetadata);
    log.debug("Created state changelog topics {} from the parsed topology.", changelogTopicMetadata.values());
    for (final Map.Entry<TaskId, Set<TopicPartition>> entry : changelogPartitionsForStatefulTask.entrySet()) {
        final TaskId taskId = entry.getKey();
        final Set<TopicPartition> topicPartitions = entry.getValue();
        for (final TopicPartition topicPartition : topicPartitions) {
            if (!newlyCreatedChangelogTopics.contains(topicPartition.topic())) {
                preExistingChangelogPartitionsForTask.computeIfAbsent(taskId, task -> new HashSet<>()).add(topicPartition);
                if (!sourceTopicBasedChangelogTopics.contains(topicPartition.topic())) {
                    preExistingNonSourceTopicBasedChangelogPartitions.add(topicPartition);
                } else {
                    preExistingSourceTopicBasedChangelogPartitions.add(topicPartition);
                }
            }
        }
    }
}
Also used : TopicPartition(org.apache.kafka.common.TopicPartition) Logger(org.slf4j.Logger) TaskId(org.apache.kafka.streams.processor.TaskId) Set(java.util.Set) HashMap(java.util.HashMap) Collectors(java.util.stream.Collectors) TopicsInfo(org.apache.kafka.streams.processor.internals.InternalTopologyBuilder.TopicsInfo) HashSet(java.util.HashSet) UNKNOWN(org.apache.kafka.streams.processor.internals.assignment.StreamsAssignmentProtocolVersions.UNKNOWN) LogContext(org.apache.kafka.common.utils.LogContext) Map(java.util.Map) Subtopology(org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology) Collections(java.util.Collections) TaskId(org.apache.kafka.streams.processor.TaskId) Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) TopicsInfo(org.apache.kafka.streams.processor.internals.InternalTopologyBuilder.TopicsInfo) TopicPartition(org.apache.kafka.common.TopicPartition) HashMap(java.util.HashMap) Map(java.util.Map) Subtopology(org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology) HashSet(java.util.HashSet)

Example 10 with Subtopology

use of org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology in project kafka by apache.

the class InternalTopologyBuilderTest method testTopicGroupsByStateStore.

@Test
public void testTopicGroupsByStateStore() {
    builder.setApplicationId("X");
    builder.addSource(null, "source-1", null, null, null, "topic-1", "topic-1x");
    builder.addSource(null, "source-2", null, null, null, "topic-2");
    builder.addSource(null, "source-3", null, null, null, "topic-3");
    builder.addSource(null, "source-4", null, null, null, "topic-4");
    builder.addSource(null, "source-5", null, null, null, "topic-5");
    builder.addProcessor("processor-1", new MockApiProcessorSupplier<>(), "source-1");
    builder.addProcessor("processor-2", new MockApiProcessorSupplier<>(), "source-2");
    builder.addStateStore(new MockKeyValueStoreBuilder("store-1", false), "processor-1", "processor-2");
    builder.addProcessor("processor-3", new MockApiProcessorSupplier<>(), "source-3");
    builder.addProcessor("processor-4", new MockApiProcessorSupplier<>(), "source-4");
    builder.addStateStore(new MockKeyValueStoreBuilder("store-2", false), "processor-3", "processor-4");
    builder.addProcessor("processor-5", new MockApiProcessorSupplier<>(), "source-5");
    builder.addStateStore(new MockKeyValueStoreBuilder("store-3", false));
    builder.connectProcessorAndStateStores("processor-5", "store-3");
    builder.buildTopology();
    final Map<Subtopology, InternalTopologyBuilder.TopicsInfo> topicGroups = builder.subtopologyToTopicsInfo();
    final Map<Subtopology, InternalTopologyBuilder.TopicsInfo> expectedTopicGroups = new HashMap<>();
    final String store1 = ProcessorStateManager.storeChangelogTopic("X", "store-1", builder.topologyName());
    final String store2 = ProcessorStateManager.storeChangelogTopic("X", "store-2", builder.topologyName());
    final String store3 = ProcessorStateManager.storeChangelogTopic("X", "store-3", builder.topologyName());
    expectedTopicGroups.put(SUBTOPOLOGY_0, new InternalTopologyBuilder.TopicsInfo(Collections.emptySet(), mkSet("topic-1", "topic-1x", "topic-2"), Collections.emptyMap(), Collections.singletonMap(store1, new UnwindowedChangelogTopicConfig(store1, Collections.emptyMap()))));
    expectedTopicGroups.put(SUBTOPOLOGY_1, new InternalTopologyBuilder.TopicsInfo(Collections.emptySet(), mkSet("topic-3", "topic-4"), Collections.emptyMap(), Collections.singletonMap(store2, new UnwindowedChangelogTopicConfig(store2, Collections.emptyMap()))));
    expectedTopicGroups.put(SUBTOPOLOGY_2, new InternalTopologyBuilder.TopicsInfo(Collections.emptySet(), mkSet("topic-5"), Collections.emptyMap(), Collections.singletonMap(store3, new UnwindowedChangelogTopicConfig(store3, Collections.emptyMap()))));
    assertEquals(3, topicGroups.size());
    assertEquals(expectedTopicGroups, topicGroups);
}
Also used : HashMap(java.util.HashMap) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Subtopology(org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology) MockKeyValueStoreBuilder(org.apache.kafka.test.MockKeyValueStoreBuilder) Test(org.junit.Test)

Aggregations

Subtopology (org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology)16 TaskId (org.apache.kafka.streams.processor.TaskId)13 HashMap (java.util.HashMap)12 Set (java.util.Set)12 TopicPartition (org.apache.kafka.common.TopicPartition)11 HashSet (java.util.HashSet)10 Map (java.util.Map)8 TopicsInfo (org.apache.kafka.streams.processor.internals.InternalTopologyBuilder.TopicsInfo)7 Test (org.junit.Test)7 TreeSet (java.util.TreeSet)5 Utils.mkSet (org.apache.kafka.common.utils.Utils.mkSet)5 TreeMap (java.util.TreeMap)4 UUID (java.util.UUID)4 SortedSet (java.util.SortedSet)3 UUID.randomUUID (java.util.UUID.randomUUID)3 Collectors (java.util.stream.Collectors)3 Collection (java.util.Collection)2 Collections (java.util.Collections)2 Collections.emptyMap (java.util.Collections.emptyMap)2 Objects (java.util.Objects)2