Search in sources :

Example 46 with PartitionInfo

use of org.apache.kafka.common.PartitionInfo in project kafka by apache.

the class StreamPartitionAssignor method assign.

/*
     * This assigns tasks to consumer clients in the following steps.
     *
     * 0. check all repartition source topics and use internal topic manager to make sure
     *    they have been created with the right number of partitions.
     *
     * 1. using user customized partition grouper to generate tasks along with their
     *    assigned partitions; also make sure that the task's corresponding changelog topics
     *    have been created with the right number of partitions.
     *
     * 2. using TaskAssignor to assign tasks to consumer clients.
     *    - Assign a task to a client which was running it previously.
     *      If there is no such client, assign a task to a client which has its valid local state.
     *    - A client may have more than one stream threads.
     *      The assignor tries to assign tasks to a client proportionally to the number of threads.
     *    - We try not to assign the same set of tasks to two different clients
     *    We do the assignment in one-pass. The result may not satisfy above all.
     *
     * 3. within each client, tasks are assigned to consumer clients in round-robin manner.
     */
@Override
public Map<String, Assignment> assign(Cluster metadata, Map<String, Subscription> subscriptions) {
    // construct the client metadata from the decoded subscription info
    Map<UUID, ClientMetadata> clientsMetadata = new HashMap<>();
    for (Map.Entry<String, Subscription> entry : subscriptions.entrySet()) {
        String consumerId = entry.getKey();
        Subscription subscription = entry.getValue();
        SubscriptionInfo info = SubscriptionInfo.decode(subscription.userData());
        // create the new client metadata if necessary
        ClientMetadata clientMetadata = clientsMetadata.get(info.processId);
        if (clientMetadata == null) {
            clientMetadata = new ClientMetadata(info.userEndPoint);
            clientsMetadata.put(info.processId, clientMetadata);
        }
        // add the consumer to the client
        clientMetadata.addConsumer(consumerId, info);
    }
    log.info("stream-thread [{}] Constructed client metadata {} from the member subscriptions.", streamThread.getName(), clientsMetadata);
    // ---------------- Step Zero ---------------- //
    // parse the topology to determine the repartition source topics,
    // making sure they are created with the number of partitions as
    // the maximum of the depending sub-topologies source topics' number of partitions
    Map<Integer, TopologyBuilder.TopicsInfo> topicGroups = streamThread.builder.topicGroups();
    Map<String, InternalTopicMetadata> repartitionTopicMetadata = new HashMap<>();
    for (TopologyBuilder.TopicsInfo topicsInfo : topicGroups.values()) {
        for (InternalTopicConfig topic : topicsInfo.repartitionSourceTopics.values()) {
            repartitionTopicMetadata.put(topic.name(), new InternalTopicMetadata(topic));
        }
    }
    boolean numPartitionsNeeded;
    do {
        numPartitionsNeeded = false;
        for (TopologyBuilder.TopicsInfo topicsInfo : topicGroups.values()) {
            for (String topicName : topicsInfo.repartitionSourceTopics.keySet()) {
                int numPartitions = repartitionTopicMetadata.get(topicName).numPartitions;
                // try set the number of partitions for this repartition topic if it is not set yet
                if (numPartitions == UNKNOWN) {
                    for (TopologyBuilder.TopicsInfo otherTopicsInfo : topicGroups.values()) {
                        Set<String> otherSinkTopics = otherTopicsInfo.sinkTopics;
                        if (otherSinkTopics.contains(topicName)) {
                            // use the maximum of all its source topic partitions as the number of partitions
                            for (String sourceTopicName : otherTopicsInfo.sourceTopics) {
                                Integer numPartitionsCandidate;
                                // map().join().join(map())
                                if (repartitionTopicMetadata.containsKey(sourceTopicName)) {
                                    numPartitionsCandidate = repartitionTopicMetadata.get(sourceTopicName).numPartitions;
                                } else {
                                    numPartitionsCandidate = metadata.partitionCountForTopic(sourceTopicName);
                                    if (numPartitionsCandidate == null) {
                                        repartitionTopicMetadata.get(topicName).numPartitions = NOT_AVAILABLE;
                                    }
                                }
                                if (numPartitionsCandidate != null && numPartitionsCandidate > numPartitions) {
                                    numPartitions = numPartitionsCandidate;
                                }
                            }
                        }
                    }
                    // another iteration is needed
                    if (numPartitions == UNKNOWN)
                        numPartitionsNeeded = true;
                    else
                        repartitionTopicMetadata.get(topicName).numPartitions = numPartitions;
                }
            }
        }
    } while (numPartitionsNeeded);
    // augment the metadata with the newly computed number of partitions for all the
    // repartition source topics
    Map<TopicPartition, PartitionInfo> allRepartitionTopicPartitions = new HashMap<>();
    for (Map.Entry<String, InternalTopicMetadata> entry : repartitionTopicMetadata.entrySet()) {
        String topic = entry.getKey();
        Integer numPartitions = entry.getValue().numPartitions;
        for (int partition = 0; partition < numPartitions; partition++) {
            allRepartitionTopicPartitions.put(new TopicPartition(topic, partition), new PartitionInfo(topic, partition, null, new Node[0], new Node[0]));
        }
    }
    // ensure the co-partitioning topics within the group have the same number of partitions,
    // and enforce the number of partitions for those repartition topics to be the same if they
    // are co-partitioned as well.
    ensureCopartitioning(streamThread.builder.copartitionGroups(), repartitionTopicMetadata, metadata);
    // make sure the repartition source topics exist with the right number of partitions,
    // create these topics if necessary
    prepareTopic(repartitionTopicMetadata);
    metadataWithInternalTopics = metadata.withPartitions(allRepartitionTopicPartitions);
    log.debug("stream-thread [{}] Created repartition topics {} from the parsed topology.", streamThread.getName(), allRepartitionTopicPartitions.values());
    // ---------------- Step One ---------------- //
    // get the tasks as partition groups from the partition grouper
    Set<String> allSourceTopics = new HashSet<>();
    Map<Integer, Set<String>> sourceTopicsByGroup = new HashMap<>();
    for (Map.Entry<Integer, TopologyBuilder.TopicsInfo> entry : topicGroups.entrySet()) {
        allSourceTopics.addAll(entry.getValue().sourceTopics);
        sourceTopicsByGroup.put(entry.getKey(), entry.getValue().sourceTopics);
    }
    Map<TaskId, Set<TopicPartition>> partitionsForTask = streamThread.partitionGrouper.partitionGroups(sourceTopicsByGroup, metadataWithInternalTopics);
    // check if all partitions are assigned, and there are no duplicates of partitions in multiple tasks
    Set<TopicPartition> allAssignedPartitions = new HashSet<>();
    Map<Integer, Set<TaskId>> tasksByTopicGroup = new HashMap<>();
    for (Map.Entry<TaskId, Set<TopicPartition>> entry : partitionsForTask.entrySet()) {
        Set<TopicPartition> partitions = entry.getValue();
        for (TopicPartition partition : partitions) {
            if (allAssignedPartitions.contains(partition)) {
                log.warn("stream-thread [{}] Partition {} is assigned to more than one tasks: {}", streamThread.getName(), partition, partitionsForTask);
            }
        }
        allAssignedPartitions.addAll(partitions);
        TaskId id = entry.getKey();
        Set<TaskId> ids = tasksByTopicGroup.get(id.topicGroupId);
        if (ids == null) {
            ids = new HashSet<>();
            tasksByTopicGroup.put(id.topicGroupId, ids);
        }
        ids.add(id);
    }
    for (String topic : allSourceTopics) {
        List<PartitionInfo> partitionInfoList = metadataWithInternalTopics.partitionsForTopic(topic);
        if (!partitionInfoList.isEmpty()) {
            for (PartitionInfo partitionInfo : partitionInfoList) {
                TopicPartition partition = new TopicPartition(partitionInfo.topic(), partitionInfo.partition());
                if (!allAssignedPartitions.contains(partition)) {
                    log.warn("stream-thread [{}] Partition {} is not assigned to any tasks: {}", streamThread.getName(), partition, partitionsForTask);
                }
            }
        } else {
            log.warn("stream-thread [{}] No partitions found for topic {}", streamThread.getName(), topic);
        }
    }
    // add tasks to state change log topic subscribers
    Map<String, InternalTopicMetadata> changelogTopicMetadata = new HashMap<>();
    for (Map.Entry<Integer, TopologyBuilder.TopicsInfo> entry : topicGroups.entrySet()) {
        final int topicGroupId = entry.getKey();
        final Map<String, InternalTopicConfig> stateChangelogTopics = entry.getValue().stateChangelogTopics;
        for (InternalTopicConfig topicConfig : stateChangelogTopics.values()) {
            // the expected number of partitions is the max value of TaskId.partition + 1
            int numPartitions = UNKNOWN;
            if (tasksByTopicGroup.get(topicGroupId) != null) {
                for (TaskId task : tasksByTopicGroup.get(topicGroupId)) {
                    if (numPartitions < task.partition + 1)
                        numPartitions = task.partition + 1;
                }
                InternalTopicMetadata topicMetadata = new InternalTopicMetadata(topicConfig);
                topicMetadata.numPartitions = numPartitions;
                changelogTopicMetadata.put(topicConfig.name(), topicMetadata);
            } else {
                log.debug("stream-thread [{}] No tasks found for topic group {}", streamThread.getName(), topicGroupId);
            }
        }
    }
    prepareTopic(changelogTopicMetadata);
    log.debug("stream-thread [{}] Created state changelog topics {} from the parsed topology.", streamThread.getName(), changelogTopicMetadata);
    // ---------------- Step Two ---------------- //
    // assign tasks to clients
    Map<UUID, ClientState> states = new HashMap<>();
    for (Map.Entry<UUID, ClientMetadata> entry : clientsMetadata.entrySet()) {
        states.put(entry.getKey(), entry.getValue().state);
    }
    log.debug("stream-thread [{}] Assigning tasks {} to clients {} with number of replicas {}", streamThread.getName(), partitionsForTask.keySet(), states, numStandbyReplicas);
    final StickyTaskAssignor<UUID> taskAssignor = new StickyTaskAssignor<>(states, partitionsForTask.keySet());
    taskAssignor.assign(numStandbyReplicas);
    log.info("stream-thread [{}] Assigned tasks to clients as {}.", streamThread.getName(), states);
    // ---------------- Step Three ---------------- //
    // construct the global partition assignment per host map
    partitionsByHostState = new HashMap<>();
    for (Map.Entry<UUID, ClientMetadata> entry : clientsMetadata.entrySet()) {
        HostInfo hostInfo = entry.getValue().hostInfo;
        if (hostInfo != null) {
            final Set<TopicPartition> topicPartitions = new HashSet<>();
            final ClientState state = entry.getValue().state;
            for (final TaskId id : state.activeTasks()) {
                topicPartitions.addAll(partitionsForTask.get(id));
            }
            partitionsByHostState.put(hostInfo, topicPartitions);
        }
    }
    // within the client, distribute tasks to its owned consumers
    Map<String, Assignment> assignment = new HashMap<>();
    for (Map.Entry<UUID, ClientMetadata> entry : clientsMetadata.entrySet()) {
        final Set<String> consumers = entry.getValue().consumers;
        final ClientState state = entry.getValue().state;
        final ArrayList<TaskId> taskIds = new ArrayList<>(state.assignedTaskCount());
        final int numActiveTasks = state.activeTaskCount();
        taskIds.addAll(state.activeTasks());
        taskIds.addAll(state.standbyTasks());
        final int numConsumers = consumers.size();
        int i = 0;
        for (String consumer : consumers) {
            Map<TaskId, Set<TopicPartition>> standby = new HashMap<>();
            ArrayList<AssignedPartition> assignedPartitions = new ArrayList<>();
            final int numTaskIds = taskIds.size();
            for (int j = i; j < numTaskIds; j += numConsumers) {
                TaskId taskId = taskIds.get(j);
                if (j < numActiveTasks) {
                    for (TopicPartition partition : partitionsForTask.get(taskId)) {
                        assignedPartitions.add(new AssignedPartition(taskId, partition));
                    }
                } else {
                    Set<TopicPartition> standbyPartitions = standby.get(taskId);
                    if (standbyPartitions == null) {
                        standbyPartitions = new HashSet<>();
                        standby.put(taskId, standbyPartitions);
                    }
                    standbyPartitions.addAll(partitionsForTask.get(taskId));
                }
            }
            Collections.sort(assignedPartitions);
            List<TaskId> active = new ArrayList<>();
            List<TopicPartition> activePartitions = new ArrayList<>();
            for (AssignedPartition partition : assignedPartitions) {
                active.add(partition.taskId);
                activePartitions.add(partition.partition);
            }
            // finally, encode the assignment before sending back to coordinator
            assignment.put(consumer, new Assignment(activePartitions, new AssignmentInfo(active, standby, partitionsByHostState).encode()));
            i++;
        }
    }
    return assignment;
}
Also used : ClientState(org.apache.kafka.streams.processor.internals.assignment.ClientState) HashMap(java.util.HashMap) Node(org.apache.kafka.common.Node) ArrayList(java.util.ArrayList) SubscriptionInfo(org.apache.kafka.streams.processor.internals.assignment.SubscriptionInfo) StickyTaskAssignor(org.apache.kafka.streams.processor.internals.assignment.StickyTaskAssignor) AssignmentInfo(org.apache.kafka.streams.processor.internals.assignment.AssignmentInfo) PartitionInfo(org.apache.kafka.common.PartitionInfo) UUID(java.util.UUID) HashSet(java.util.HashSet) TopicPartition(org.apache.kafka.common.TopicPartition) HashMap(java.util.HashMap) Map(java.util.Map) HashSet(java.util.HashSet) Set(java.util.Set) TaskId(org.apache.kafka.streams.processor.TaskId) TopologyBuilder(org.apache.kafka.streams.processor.TopologyBuilder) HostInfo(org.apache.kafka.streams.state.HostInfo)

Example 47 with PartitionInfo

use of org.apache.kafka.common.PartitionInfo in project kafka by apache.

the class GlobalStateManagerImpl method topicPartitionsForStore.

private List<TopicPartition> topicPartitionsForStore(final StateStore store) {
    final String sourceTopic = topology.storeToChangelogTopic().get(store.name());
    final List<PartitionInfo> partitionInfos = consumer.partitionsFor(sourceTopic);
    if (partitionInfos == null || partitionInfos.isEmpty()) {
        throw new StreamsException(String.format("There are no partitions available for topic %s when initializing global store %s", sourceTopic, store.name()));
    }
    final List<TopicPartition> topicPartitions = new ArrayList<>();
    for (PartitionInfo partition : partitionInfos) {
        topicPartitions.add(new TopicPartition(partition.topic(), partition.partition()));
    }
    return topicPartitions;
}
Also used : TopicPartition(org.apache.kafka.common.TopicPartition) StreamsException(org.apache.kafka.streams.errors.StreamsException) ArrayList(java.util.ArrayList) PartitionInfo(org.apache.kafka.common.PartitionInfo)

Example 48 with PartitionInfo

use of org.apache.kafka.common.PartitionInfo in project kafka by apache.

the class DefaultPartitionGrouper method partitionGroups.

/**
     * Generate tasks with the assigned topic partitions.
     *
     * @param topicGroups   group of topics that need to be joined together
     * @param metadata      metadata of the consuming cluster
     * @return The map from generated task ids to the assigned partitions
     */
public Map<TaskId, Set<TopicPartition>> partitionGroups(Map<Integer, Set<String>> topicGroups, Cluster metadata) {
    Map<TaskId, Set<TopicPartition>> groups = new HashMap<>();
    for (Map.Entry<Integer, Set<String>> entry : topicGroups.entrySet()) {
        Integer topicGroupId = entry.getKey();
        Set<String> topicGroup = entry.getValue();
        int maxNumPartitions = maxNumPartitions(metadata, topicGroup);
        for (int partitionId = 0; partitionId < maxNumPartitions; partitionId++) {
            Set<TopicPartition> group = new HashSet<>(topicGroup.size());
            for (String topic : topicGroup) {
                List<PartitionInfo> partitions = metadata.partitionsForTopic(topic);
                if (partitionId < partitions.size()) {
                    group.add(new TopicPartition(topic, partitionId));
                }
            }
            groups.put(new TaskId(topicGroupId, partitionId), Collections.unmodifiableSet(group));
        }
    }
    return Collections.unmodifiableMap(groups);
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) TopicPartition(org.apache.kafka.common.TopicPartition) PartitionInfo(org.apache.kafka.common.PartitionInfo) HashMap(java.util.HashMap) Map(java.util.Map) HashSet(java.util.HashSet)

Example 49 with PartitionInfo

use of org.apache.kafka.common.PartitionInfo in project flink by apache.

the class KafkaProducerTest method testPropagateExceptions.

@Test
@SuppressWarnings("unchecked")
public void testPropagateExceptions() {
    try {
        // mock kafka producer
        KafkaProducer<?, ?> kafkaProducerMock = mock(KafkaProducer.class);
        // partition setup
        when(kafkaProducerMock.partitionsFor(anyString())).thenReturn(// returning a unmodifiable list to mimic KafkaProducer#partitionsFor() behaviour
        Collections.singletonList(new PartitionInfo("mock_topic", 42, null, null, null)));
        // failure when trying to send an element
        when(kafkaProducerMock.send(any(ProducerRecord.class), any(Callback.class))).thenAnswer(new Answer<Future<RecordMetadata>>() {

            @Override
            public Future<RecordMetadata> answer(InvocationOnMock invocation) throws Throwable {
                Callback callback = (Callback) invocation.getArguments()[1];
                callback.onCompletion(null, new Exception("Test error"));
                return null;
            }
        });
        // make sure the FlinkKafkaProducer instantiates our mock producer
        whenNew(KafkaProducer.class).withAnyArguments().thenReturn(kafkaProducerMock);
        // (1) producer that propagates errors
        FlinkKafkaProducer09<String> producerPropagating = new FlinkKafkaProducer09<>("mock_topic", new SimpleStringSchema(), FakeStandardProducerConfig.get(), null);
        OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink(producerPropagating));
        testHarness.open();
        try {
            testHarness.processElement(new StreamRecord<>("value"));
            testHarness.processElement(new StreamRecord<>("value"));
            fail("This should fail with an exception");
        } catch (Exception e) {
            assertNotNull(e.getCause());
            assertNotNull(e.getCause().getMessage());
            assertTrue(e.getCause().getMessage().contains("Test error"));
        }
        // (2) producer that only logs errors
        FlinkKafkaProducer09<String> producerLogging = new FlinkKafkaProducer09<>("mock_topic", new SimpleStringSchema(), FakeStandardProducerConfig.get(), null);
        producerLogging.setLogFailuresOnly(true);
        testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink(producerLogging));
        testHarness.open();
        testHarness.processElement(new StreamRecord<>("value"));
        testHarness.processElement(new StreamRecord<>("value"));
        testHarness.close();
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : StreamSink(org.apache.flink.streaming.api.operators.StreamSink) Mockito.anyString(org.mockito.Mockito.anyString) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) Callback(org.apache.kafka.clients.producer.Callback) InvocationOnMock(org.mockito.invocation.InvocationOnMock) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) Future(java.util.concurrent.Future) SimpleStringSchema(org.apache.flink.streaming.util.serialization.SimpleStringSchema) PartitionInfo(org.apache.kafka.common.PartitionInfo) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Aggregations

PartitionInfo (org.apache.kafka.common.PartitionInfo)49 TopicPartition (org.apache.kafka.common.TopicPartition)30 Test (org.junit.Test)23 HashMap (java.util.HashMap)17 ArrayList (java.util.ArrayList)15 Node (org.apache.kafka.common.Node)12 Map (java.util.Map)11 Cluster (org.apache.kafka.common.Cluster)11 HashSet (java.util.HashSet)10 Set (java.util.Set)7 TaskId (org.apache.kafka.streams.processor.TaskId)7 StreamsConfig (org.apache.kafka.streams.StreamsConfig)6 MockTime (org.apache.kafka.common.utils.MockTime)5 List (java.util.List)4 Properties (java.util.Properties)4 KStreamBuilder (org.apache.kafka.streams.kstream.KStreamBuilder)4 HostInfo (org.apache.kafka.streams.state.HostInfo)4 StreamsMetadata (org.apache.kafka.streams.state.StreamsMetadata)4 File (java.io.File)3 MockConsumer (org.apache.kafka.clients.consumer.MockConsumer)3