Search in sources :

Example 96 with PartitionInfo

use of org.apache.kafka.common.PartitionInfo in project kafka by apache.

the class RecordCollectorImpl method send.

/**
 * @throws StreamsException fatal error that should cause the thread to die
 * @throws TaskMigratedException recoverable error that would cause the task to be removed
 */
@Override
public <K, V> void send(final String topic, final K key, final V value, final Headers headers, final Long timestamp, final Serializer<K> keySerializer, final Serializer<V> valueSerializer, final StreamPartitioner<? super K, ? super V> partitioner) {
    final Integer partition;
    if (partitioner != null) {
        final List<PartitionInfo> partitions;
        try {
            partitions = streamsProducer.partitionsFor(topic);
        } catch (final TimeoutException timeoutException) {
            log.warn("Could not get partitions for topic {}, will retry", topic);
            // re-throw to trigger `task.timeout.ms`
            throw timeoutException;
        } catch (final KafkaException fatal) {
            // so we treat everything the same as a fatal exception
            throw new StreamsException("Could not determine the number of partitions for topic '" + topic + "' for task " + taskId + " due to " + fatal.toString(), fatal);
        }
        if (partitions.size() > 0) {
            partition = partitioner.partition(topic, key, value, partitions.size());
        } else {
            throw new StreamsException("Could not get partition information for topic " + topic + " for task " + taskId + ". This can happen if the topic does not exist.");
        }
    } else {
        partition = null;
    }
    send(topic, key, value, headers, partition, timestamp, keySerializer, valueSerializer);
}
Also used : StreamsException(org.apache.kafka.streams.errors.StreamsException) KafkaException(org.apache.kafka.common.KafkaException) PartitionInfo(org.apache.kafka.common.PartitionInfo) TimeoutException(org.apache.kafka.common.errors.TimeoutException)

Example 97 with PartitionInfo

use of org.apache.kafka.common.PartitionInfo in project kafka by apache.

the class RepartitionTopics method setup.

public void setup() {
    final Map<String, InternalTopicConfig> repartitionTopicMetadata = computeRepartitionTopicConfig(clusterMetadata);
    if (repartitionTopicMetadata.isEmpty()) {
        if (missingInputTopicsBySubtopology.isEmpty()) {
            log.info("Skipping the repartition topic validation since there are no repartition topics.");
        } else {
            log.info("Skipping the repartition topic validation since all topologies containing repartition" + "topics are missing external user source topics and cannot be processed.");
        }
    } else {
        // ensure the co-partitioning topics within the group have the same number of partitions,
        // and enforce the number of partitions for those repartition topics to be the same if they
        // are co-partitioned as well.
        ensureCopartitioning(topologyMetadata.copartitionGroups(), repartitionTopicMetadata, clusterMetadata);
        // make sure the repartition source topics exist with the right number of partitions,
        // create these topics if necessary
        internalTopicManager.makeReady(repartitionTopicMetadata);
        // repartition source topics
        for (final Map.Entry<String, InternalTopicConfig> entry : repartitionTopicMetadata.entrySet()) {
            final String topic = entry.getKey();
            final int numPartitions = entry.getValue().numberOfPartitions().orElse(-1);
            for (int partition = 0; partition < numPartitions; partition++) {
                topicPartitionInfos.put(new TopicPartition(topic, partition), new PartitionInfo(topic, partition, null, new Node[0], new Node[0]));
            }
        }
    }
}
Also used : TopicPartition(org.apache.kafka.common.TopicPartition) Node(org.apache.kafka.common.Node) PartitionInfo(org.apache.kafka.common.PartitionInfo) HashMap(java.util.HashMap) Map(java.util.Map)

Example 98 with PartitionInfo

use of org.apache.kafka.common.PartitionInfo in project kafka by apache.

the class PartitionGrouper method partitionGroups.

/**
 * Generate tasks with the assigned topic partitions.
 *
 * @param topicGroups   group of topics that need to be joined together
 * @param metadata      metadata of the consuming cluster
 * @return The map from generated task ids to the assigned partitions
 */
public Map<TaskId, Set<TopicPartition>> partitionGroups(final Map<Subtopology, Set<String>> topicGroups, final Cluster metadata) {
    final Map<TaskId, Set<TopicPartition>> groups = new HashMap<>();
    for (final Map.Entry<Subtopology, Set<String>> entry : topicGroups.entrySet()) {
        final Subtopology subtopology = entry.getKey();
        final Set<String> topicGroup = entry.getValue();
        final int maxNumPartitions = maxNumPartitions(metadata, topicGroup);
        for (int partitionId = 0; partitionId < maxNumPartitions; partitionId++) {
            final Set<TopicPartition> group = new HashSet<>(topicGroup.size());
            for (final String topic : topicGroup) {
                final List<PartitionInfo> partitions = metadata.partitionsForTopic(topic);
                if (partitionId < partitions.size()) {
                    group.add(new TopicPartition(topic, partitionId));
                }
            }
            groups.put(new TaskId(subtopology.nodeGroupId, partitionId, subtopology.namedTopology), Collections.unmodifiableSet(group));
        }
    }
    return Collections.unmodifiableMap(groups);
}
Also used : TaskId(org.apache.kafka.streams.processor.TaskId) Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) TopicPartition(org.apache.kafka.common.TopicPartition) PartitionInfo(org.apache.kafka.common.PartitionInfo) HashMap(java.util.HashMap) Map(java.util.Map) Subtopology(org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology) HashSet(java.util.HashSet)

Example 99 with PartitionInfo

use of org.apache.kafka.common.PartitionInfo in project kafka by apache.

the class StreamsPartitionAssignor method assign.

/*
     * This assigns tasks to consumer clients in the following steps.
     *
     * 0. decode the subscriptions to assemble the metadata for each client and check for version probing
     *
     * 1. check all repartition source topics and use internal topic manager to make sure
     *    they have been created with the right number of partitions. Also verify and/or create
     *    any changelog topics with the correct number of partitions.
     *
     * 2. use the partition grouper to generate tasks along with their assigned partitions, then use
     *    the configured TaskAssignor to construct the mapping of tasks to clients.
     *
     * 3. construct the global mapping of host to partitions to enable query routing.
     *
     * 4. within each client, assign tasks to consumer clients.
     */
@Override
public GroupAssignment assign(final Cluster metadata, final GroupSubscription groupSubscription) {
    final Map<String, Subscription> subscriptions = groupSubscription.groupSubscription();
    // ---------------- Step Zero ---------------- //
    // construct the client metadata from the decoded subscription info
    final Map<UUID, ClientMetadata> clientMetadataMap = new HashMap<>();
    final Set<TopicPartition> allOwnedPartitions = new HashSet<>();
    int minReceivedMetadataVersion = LATEST_SUPPORTED_VERSION;
    int minSupportedMetadataVersion = LATEST_SUPPORTED_VERSION;
    boolean shutdownRequested = false;
    boolean assignmentErrorFound = false;
    int futureMetadataVersion = UNKNOWN;
    for (final Map.Entry<String, Subscription> entry : subscriptions.entrySet()) {
        final String consumerId = entry.getKey();
        final Subscription subscription = entry.getValue();
        final SubscriptionInfo info = SubscriptionInfo.decode(subscription.userData());
        final int usedVersion = info.version();
        if (info.errorCode() == AssignorError.SHUTDOWN_REQUESTED.code()) {
            shutdownRequested = true;
        }
        minReceivedMetadataVersion = updateMinReceivedVersion(usedVersion, minReceivedMetadataVersion);
        minSupportedMetadataVersion = updateMinSupportedVersion(info.latestSupportedVersion(), minSupportedMetadataVersion);
        final UUID processId;
        if (usedVersion > LATEST_SUPPORTED_VERSION) {
            futureMetadataVersion = usedVersion;
            processId = FUTURE_ID;
            if (!clientMetadataMap.containsKey(FUTURE_ID)) {
                clientMetadataMap.put(FUTURE_ID, new ClientMetadata(null));
            }
        } else {
            processId = info.processId();
        }
        ClientMetadata clientMetadata = clientMetadataMap.get(processId);
        // create the new client metadata if necessary
        if (clientMetadata == null) {
            clientMetadata = new ClientMetadata(info.userEndPoint());
            clientMetadataMap.put(info.processId(), clientMetadata);
        }
        // add the consumer and any info in its subscription to the client
        clientMetadata.addConsumer(consumerId, subscription.ownedPartitions());
        final int prevSize = allOwnedPartitions.size();
        allOwnedPartitions.addAll(subscription.ownedPartitions());
        if (allOwnedPartitions.size() < prevSize + subscription.ownedPartitions().size()) {
            assignmentErrorFound = true;
        }
        clientMetadata.addPreviousTasksAndOffsetSums(consumerId, info.taskOffsetSums());
    }
    if (assignmentErrorFound) {
        log.warn("The previous assignment contains a partition more than once. " + "\t Mapping: {}", subscriptions);
    }
    try {
        final boolean versionProbing = checkMetadataVersions(minReceivedMetadataVersion, minSupportedMetadataVersion, futureMetadataVersion);
        log.debug("Constructed client metadata {} from the member subscriptions.", clientMetadataMap);
        if (shutdownRequested) {
            return new GroupAssignment(errorAssignment(clientMetadataMap, AssignorError.SHUTDOWN_REQUESTED.code()));
        }
        // parse the topology to determine the repartition source topics,
        // making sure they are created with the number of partitions as
        // the maximum of the depending sub-topologies source topics' number of partitions
        final RepartitionTopics repartitionTopics = prepareRepartitionTopics(metadata);
        final Map<TopicPartition, PartitionInfo> allRepartitionTopicPartitions = repartitionTopics.topicPartitionsInfo();
        final Cluster fullMetadata = metadata.withPartitions(allRepartitionTopicPartitions);
        log.debug("Created repartition topics {} from the parsed topology.", allRepartitionTopicPartitions.values());
        // ---------------- Step Two ---------------- //
        // construct the assignment of tasks to clients
        final Map<Subtopology, TopicsInfo> topicGroups = taskManager.topologyMetadata().subtopologyTopicsInfoMapExcluding(repartitionTopics.topologiesWithMissingInputTopics());
        final Set<String> allSourceTopics = new HashSet<>();
        final Map<Subtopology, Set<String>> sourceTopicsByGroup = new HashMap<>();
        for (final Map.Entry<Subtopology, TopicsInfo> entry : topicGroups.entrySet()) {
            allSourceTopics.addAll(entry.getValue().sourceTopics);
            sourceTopicsByGroup.put(entry.getKey(), entry.getValue().sourceTopics);
        }
        // get the tasks as partition groups from the partition grouper
        final Map<TaskId, Set<TopicPartition>> partitionsForTask = partitionGrouper.partitionGroups(sourceTopicsByGroup, fullMetadata);
        final Set<TaskId> statefulTasks = new HashSet<>();
        final boolean probingRebalanceNeeded = assignTasksToClients(fullMetadata, allSourceTopics, topicGroups, clientMetadataMap, partitionsForTask, statefulTasks);
        // ---------------- Step Three ---------------- //
        // construct the global partition assignment per host map
        final Map<HostInfo, Set<TopicPartition>> partitionsByHost = new HashMap<>();
        final Map<HostInfo, Set<TopicPartition>> standbyPartitionsByHost = new HashMap<>();
        if (minReceivedMetadataVersion >= 2) {
            populatePartitionsByHostMaps(partitionsByHost, standbyPartitionsByHost, partitionsForTask, clientMetadataMap);
        }
        streamsMetadataState.onChange(partitionsByHost, standbyPartitionsByHost, fullMetadata);
        // ---------------- Step Four ---------------- //
        // compute the assignment of tasks to threads within each client and build the final group assignment
        final Map<String, Assignment> assignment = computeNewAssignment(statefulTasks, clientMetadataMap, partitionsForTask, partitionsByHost, standbyPartitionsByHost, allOwnedPartitions, minReceivedMetadataVersion, minSupportedMetadataVersion, versionProbing, probingRebalanceNeeded);
        return new GroupAssignment(assignment);
    } catch (final MissingSourceTopicException e) {
        log.error("Caught an error in the task assignment. Returning an error assignment.", e);
        return new GroupAssignment(errorAssignment(clientMetadataMap, AssignorError.INCOMPLETE_SOURCE_TOPIC_METADATA.code()));
    } catch (final TaskAssignmentException e) {
        log.error("Caught an error in the task assignment. Returning an error assignment.", e);
        return new GroupAssignment(errorAssignment(clientMetadataMap, AssignorError.ASSIGNMENT_ERROR.code()));
    }
}
Also used : SortedSet(java.util.SortedSet) Set(java.util.Set) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) TaskId(org.apache.kafka.streams.processor.TaskId) HashMap(java.util.HashMap) SubscriptionInfo(org.apache.kafka.streams.processor.internals.assignment.SubscriptionInfo) TopicsInfo(org.apache.kafka.streams.processor.internals.InternalTopologyBuilder.TopicsInfo) PartitionInfo(org.apache.kafka.common.PartitionInfo) UUID(java.util.UUID) UUID.randomUUID(java.util.UUID.randomUUID) HashSet(java.util.HashSet) TaskAssignmentException(org.apache.kafka.streams.errors.TaskAssignmentException) Cluster(org.apache.kafka.common.Cluster) MissingSourceTopicException(org.apache.kafka.streams.errors.MissingSourceTopicException) TopicPartition(org.apache.kafka.common.TopicPartition) Map(java.util.Map) Utils.filterMap(org.apache.kafka.common.utils.Utils.filterMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) Subtopology(org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology) HostInfo(org.apache.kafka.streams.state.HostInfo)

Example 100 with PartitionInfo

use of org.apache.kafka.common.PartitionInfo in project kafka by apache.

the class GlobalStateManagerImplTest method initializeConsumer.

private void initializeConsumer(final long numRecords, final long startOffset, final TopicPartition... topicPartitions) {
    consumer.assign(Arrays.asList(topicPartitions));
    final Map<TopicPartition, Long> startOffsets = new HashMap<>();
    final Map<TopicPartition, Long> endOffsets = new HashMap<>();
    for (final TopicPartition topicPartition : topicPartitions) {
        startOffsets.put(topicPartition, startOffset);
        endOffsets.put(topicPartition, startOffset + numRecords);
        consumer.updatePartitions(topicPartition.topic(), Collections.singletonList(new PartitionInfo(topicPartition.topic(), topicPartition.partition(), null, null, null)));
        for (int i = 0; i < numRecords; i++) {
            consumer.addRecord(new ConsumerRecord<>(topicPartition.topic(), topicPartition.partition(), startOffset + i, "key".getBytes(), "value".getBytes()));
        }
    }
    consumer.updateEndOffsets(endOffsets);
    consumer.updateBeginningOffsets(startOffsets);
}
Also used : HashMap(java.util.HashMap) TopicPartition(org.apache.kafka.common.TopicPartition) PartitionInfo(org.apache.kafka.common.PartitionInfo) OffsetCheckpoint(org.apache.kafka.streams.state.internals.OffsetCheckpoint)

Aggregations

PartitionInfo (org.apache.kafka.common.PartitionInfo)227 TopicPartition (org.apache.kafka.common.TopicPartition)142 HashMap (java.util.HashMap)87 Node (org.apache.kafka.common.Node)85 Test (org.junit.Test)82 Cluster (org.apache.kafka.common.Cluster)80 ArrayList (java.util.ArrayList)73 HashSet (java.util.HashSet)67 Set (java.util.Set)38 Map (java.util.Map)34 Test (org.junit.jupiter.api.Test)31 List (java.util.List)30 TaskId (org.apache.kafka.streams.processor.TaskId)25 StreamsConfig (org.apache.kafka.streams.StreamsConfig)16 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)16 MockConsumer (org.apache.kafka.clients.consumer.MockConsumer)15 Properties (java.util.Properties)13 MockTime (org.apache.kafka.common.utils.MockTime)13 OffsetAndMetadata (org.apache.kafka.clients.consumer.OffsetAndMetadata)11 HostInfo (org.apache.kafka.streams.state.HostInfo)11