Search in sources :

Example 1 with Task

use of org.apache.kafka.streams.processor.internals.Task in project kafka by apache.

the class KafkaStreams method query.

/**
 * Run an interactive query against a state store.
 * <p>
 * This method allows callers outside of the Streams runtime to access the internal state of
 * stateful processors. See https://kafka.apache.org/documentation/streams/developer-guide/interactive-queries.html
 * for more information.
 *
 * @param <R> The result type specified by the query.
 * @throws StreamsNotStartedException If Streams has not yet been started. Just call {@link
 *                                    KafkaStreams#start()} and then retry this call.
 * @throws StreamsStoppedException    If Streams is in a terminal state like PENDING_SHUTDOWN,
 *                                    NOT_RUNNING, PENDING_ERROR, or ERROR. The caller should
 *                                    discover a new instance to query.
 * @throws UnknownStateStoreException If the specified store name does not exist in the
 *                                    topology.
 */
@Evolving
public <R> StateQueryResult<R> query(final StateQueryRequest<R> request) {
    final String storeName = request.getStoreName();
    if (!topologyMetadata.hasStore(storeName)) {
        throw new UnknownStateStoreException("Cannot get state store " + storeName + " because no such store is registered in the topology.");
    }
    if (state().hasNotStarted()) {
        throw new StreamsNotStartedException("KafkaStreams has not been started, you can retry after calling start().");
    }
    if (state().isShuttingDown() || state.hasCompletedShutdown()) {
        throw new StreamsStoppedException("KafkaStreams has been stopped (" + state + ")." + " This instance can no longer serve queries.");
    }
    final StateQueryResult<R> result = new StateQueryResult<>();
    final Map<String, StateStore> globalStateStores = topologyMetadata.globalStateStores();
    if (globalStateStores.containsKey(storeName)) {
        // See KAFKA-13523
        result.setGlobalResult(QueryResult.forFailure(FailureReason.UNKNOWN_QUERY_TYPE, "Global stores do not yet support the KafkaStreams#query API. Use KafkaStreams#store instead."));
    } else {
        for (final StreamThread thread : threads) {
            final Map<TaskId, Task> tasks = thread.allTasks();
            for (final Entry<TaskId, Task> entry : tasks.entrySet()) {
                final TaskId taskId = entry.getKey();
                final int partition = taskId.partition();
                if (request.isAllPartitions() || request.getPartitions().contains(partition)) {
                    final Task task = entry.getValue();
                    final StateStore store = task.getStore(storeName);
                    if (store != null) {
                        final StreamThread.State state = thread.state();
                        final boolean active = task.isActive();
                        if (request.isRequireActive() && (state != StreamThread.State.RUNNING || !active)) {
                            result.addResult(partition, QueryResult.forFailure(FailureReason.NOT_ACTIVE, "Query requires a running active task," + " but partition was in state " + state + " and was " + (active ? "active" : "not active") + "."));
                        } else {
                            final QueryResult<R> r = store.query(request.getQuery(), request.isRequireActive() ? PositionBound.unbounded() : request.getPositionBound(), new QueryConfig(request.executionInfoEnabled()));
                            result.addResult(partition, r);
                        }
                        // we can return right away.
                        if (!request.isAllPartitions() && result.getPartitionResults().keySet().containsAll(request.getPartitions())) {
                            return result;
                        }
                    }
                }
            }
        }
    }
    if (!request.isAllPartitions()) {
        for (final Integer partition : request.getPartitions()) {
            if (!result.getPartitionResults().containsKey(partition)) {
                result.addResult(partition, QueryResult.forFailure(FailureReason.NOT_PRESENT, "The requested partition was not present at the time of the query."));
            }
        }
    }
    return result;
}
Also used : State(org.apache.kafka.streams.processor.internals.GlobalStreamThread.State) Task(org.apache.kafka.streams.processor.internals.Task) TaskId(org.apache.kafka.streams.processor.TaskId) QueryConfig(org.apache.kafka.streams.query.QueryConfig) GlobalStreamThread(org.apache.kafka.streams.processor.internals.GlobalStreamThread) StreamThread(org.apache.kafka.streams.processor.internals.StreamThread) StateStore(org.apache.kafka.streams.processor.StateStore) StreamsNotStartedException(org.apache.kafka.streams.errors.StreamsNotStartedException) StreamsStoppedException(org.apache.kafka.streams.errors.StreamsStoppedException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) UnknownStateStoreException(org.apache.kafka.streams.errors.UnknownStateStoreException) StateQueryResult(org.apache.kafka.streams.query.StateQueryResult) Evolving(org.apache.kafka.common.annotation.InterfaceStability.Evolving)

Example 2 with Task

use of org.apache.kafka.streams.processor.internals.Task in project kafka by apache.

the class KafkaStreams method allLocalStorePartitionLags.

protected Map<String, Map<Integer, LagInfo>> allLocalStorePartitionLags(final List<Task> tasksToCollectLagFor) {
    final Map<String, Map<Integer, LagInfo>> localStorePartitionLags = new TreeMap<>();
    final Collection<TopicPartition> allPartitions = new LinkedList<>();
    final Map<TopicPartition, Long> allChangelogPositions = new HashMap<>();
    // Obtain the current positions, of all the active-restoring and standby tasks
    for (final Task task : tasksToCollectLagFor) {
        allPartitions.addAll(task.changelogPartitions());
        // Note that not all changelog partitions, will have positions; since some may not have started
        allChangelogPositions.putAll(task.changelogOffsets());
    }
    log.debug("Current changelog positions: {}", allChangelogPositions);
    final Map<TopicPartition, ListOffsetsResultInfo> allEndOffsets;
    allEndOffsets = fetchEndOffsets(allPartitions, adminClient);
    log.debug("Current end offsets :{}", allEndOffsets);
    for (final Map.Entry<TopicPartition, ListOffsetsResultInfo> entry : allEndOffsets.entrySet()) {
        // Avoiding an extra admin API lookup by computing lags for not-yet-started restorations
        // from zero instead of the real "earliest offset" for the changelog.
        // This will yield the correct relative order of lagginess for the tasks in the cluster,
        // but it is an over-estimate of how much work remains to restore the task from scratch.
        final long earliestOffset = 0L;
        final long changelogPosition = allChangelogPositions.getOrDefault(entry.getKey(), earliestOffset);
        final long latestOffset = entry.getValue().offset();
        final LagInfo lagInfo = new LagInfo(changelogPosition == Task.LATEST_OFFSET ? latestOffset : changelogPosition, latestOffset);
        final String storeName = streamsMetadataState.getStoreForChangelogTopic(entry.getKey().topic());
        localStorePartitionLags.computeIfAbsent(storeName, ignored -> new TreeMap<>()).put(entry.getKey().partition(), lagInfo);
    }
    return Collections.unmodifiableMap(localStorePartitionLags);
}
Also used : SHUTDOWN_CLIENT(org.apache.kafka.streams.errors.StreamsUncaughtExceptionHandler.StreamThreadExceptionResponse.SHUTDOWN_CLIENT) RecordingLevel(org.apache.kafka.common.metrics.Sensor.RecordingLevel) DefaultKafkaClientSupplier(org.apache.kafka.streams.processor.internals.DefaultKafkaClientSupplier) Arrays(java.util.Arrays) TopologyMetadata(org.apache.kafka.streams.processor.internals.TopologyMetadata) StreamsException(org.apache.kafka.streams.errors.StreamsException) StateDirectory(org.apache.kafka.streams.processor.internals.StateDirectory) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) LogContext(org.apache.kafka.common.utils.LogContext) Duration(java.time.Duration) Map(java.util.Map) Metric(org.apache.kafka.common.Metric) MetricName(org.apache.kafka.common.MetricName) StreamsMetricsImpl(org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl) CommonClientConfigs(org.apache.kafka.clients.CommonClientConfigs) TopicPartition(org.apache.kafka.common.TopicPartition) Sensor(org.apache.kafka.common.metrics.Sensor) QueryableStoreProvider(org.apache.kafka.streams.state.internals.QueryableStoreProvider) Evolving(org.apache.kafka.common.annotation.InterfaceStability.Evolving) QueryConfig(org.apache.kafka.streams.query.QueryConfig) MemberToRemove(org.apache.kafka.clients.admin.MemberToRemove) StreamsMetadataState(org.apache.kafka.streams.processor.internals.StreamsMetadataState) Time(org.apache.kafka.common.utils.Time) Collection(java.util.Collection) MetricConfig(org.apache.kafka.common.metrics.MetricConfig) FailureReason(org.apache.kafka.streams.query.FailureReason) ProcessorStateException(org.apache.kafka.streams.errors.ProcessorStateException) Set(java.util.Set) UUID(java.util.UUID) StreamsNotStartedException(org.apache.kafka.streams.errors.StreamsNotStartedException) StateRestoreListener(org.apache.kafka.streams.processor.StateRestoreListener) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) Objects(java.util.Objects) ListOffsetsResultInfo(org.apache.kafka.clients.admin.ListOffsetsResult.ListOffsetsResultInfo) List(java.util.List) Metrics(org.apache.kafka.common.metrics.Metrics) ClientUtils(org.apache.kafka.streams.processor.internals.ClientUtils) StateQueryResult(org.apache.kafka.streams.query.StateQueryResult) GlobalStreamThread(org.apache.kafka.streams.processor.internals.GlobalStreamThread) StreamThreadStateStoreProvider(org.apache.kafka.streams.state.internals.StreamThreadStateStoreProvider) ApiUtils.validateMillisecondDuration(org.apache.kafka.streams.internals.ApiUtils.validateMillisecondDuration) MetricsReporter(org.apache.kafka.common.metrics.MetricsReporter) Entry(java.util.Map.Entry) Optional(java.util.Optional) InvalidStateStoreException(org.apache.kafka.streams.errors.InvalidStateStoreException) ClientMetrics(org.apache.kafka.streams.internals.metrics.ClientMetrics) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) TaskId(org.apache.kafka.streams.processor.TaskId) KafkaMetricsContext(org.apache.kafka.common.metrics.KafkaMetricsContext) HostInfo(org.apache.kafka.streams.state.HostInfo) PositionBound(org.apache.kafka.streams.query.PositionBound) HashMap(java.util.HashMap) RemoveMembersFromConsumerGroupResult(org.apache.kafka.clients.admin.RemoveMembersFromConsumerGroupResult) UnknownStateStoreException(org.apache.kafka.streams.errors.UnknownStateStoreException) ArrayList(java.util.ArrayList) MetricsContext(org.apache.kafka.common.metrics.MetricsContext) HashSet(java.util.HashSet) LinkedHashMap(java.util.LinkedHashMap) StreamsStoppedException(org.apache.kafka.streams.errors.StreamsStoppedException) StateQueryRequest(org.apache.kafka.streams.query.StateQueryRequest) KafkaProducer(org.apache.kafka.clients.producer.KafkaProducer) State(org.apache.kafka.streams.processor.internals.GlobalStreamThread.State) BiConsumer(java.util.function.BiConsumer) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) Admin(org.apache.kafka.clients.admin.Admin) LinkedList(java.util.LinkedList) QueryResult(org.apache.kafka.streams.query.QueryResult) JmxReporter(org.apache.kafka.common.metrics.JmxReporter) TimeoutException(org.apache.kafka.common.errors.TimeoutException) StreamPartitioner(org.apache.kafka.streams.processor.StreamPartitioner) Logger(org.slf4j.Logger) Properties(java.util.Properties) StreamsUncaughtExceptionHandler(org.apache.kafka.streams.errors.StreamsUncaughtExceptionHandler) StreamThread(org.apache.kafka.streams.processor.internals.StreamThread) InvalidStateStorePartitionException(org.apache.kafka.streams.errors.InvalidStateStorePartitionException) ThreadStateTransitionValidator(org.apache.kafka.streams.processor.internals.ThreadStateTransitionValidator) RemoveMembersFromConsumerGroupOptions(org.apache.kafka.clients.admin.RemoveMembersFromConsumerGroupOptions) ClientUtils.fetchEndOffsets(org.apache.kafka.streams.processor.internals.ClientUtils.fetchEndOffsets) GlobalStateStoreProvider(org.apache.kafka.streams.state.internals.GlobalStateStoreProvider) Task(org.apache.kafka.streams.processor.internals.Task) AssignorError(org.apache.kafka.streams.processor.internals.assignment.AssignorError) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) ApiUtils.prepareMillisCheckFailMsgPrefix(org.apache.kafka.streams.internals.ApiUtils.prepareMillisCheckFailMsgPrefix) StateStore(org.apache.kafka.streams.processor.StateStore) TreeMap(java.util.TreeMap) Serializer(org.apache.kafka.common.serialization.Serializer) Collections(java.util.Collections) METRICS_RECORDING_LEVEL_CONFIG(org.apache.kafka.streams.StreamsConfig.METRICS_RECORDING_LEVEL_CONFIG) Task(org.apache.kafka.streams.processor.internals.Task) ListOffsetsResultInfo(org.apache.kafka.clients.admin.ListOffsetsResult.ListOffsetsResultInfo) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) TreeMap(java.util.TreeMap) LinkedList(java.util.LinkedList) TopicPartition(org.apache.kafka.common.TopicPartition) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) TreeMap(java.util.TreeMap)

Example 3 with Task

use of org.apache.kafka.streams.processor.internals.Task in project kafka by apache.

the class StreamThreadStateStoreProvider method stores.

@SuppressWarnings("unchecked")
public <T> List<T> stores(final StoreQueryParameters storeQueryParams) {
    final StreamThread.State state = streamThread.state();
    if (state == StreamThread.State.DEAD) {
        return Collections.emptyList();
    }
    final String storeName = storeQueryParams.storeName();
    final QueryableStoreType<T> queryableStoreType = storeQueryParams.queryableStoreType();
    final String topologyName = storeQueryParams instanceof NamedTopologyStoreQueryParameters ? ((NamedTopologyStoreQueryParameters) storeQueryParams).topologyName() : null;
    if (storeQueryParams.staleStoresEnabled() ? state.isAlive() : state == StreamThread.State.RUNNING) {
        final Collection<Task> tasks = storeQueryParams.staleStoresEnabled() ? streamThread.allTasks().values() : streamThread.activeTasks();
        if (storeQueryParams.partition() != null) {
            for (final Task task : tasks) {
                if (task.id().partition() == storeQueryParams.partition() && (topologyName == null || topologyName.equals(task.id().topologyName())) && task.getStore(storeName) != null && storeName.equals(task.getStore(storeName).name())) {
                    final T typedStore = validateAndCastStores(task.getStore(storeName), queryableStoreType, storeName, task.id());
                    return Collections.singletonList(typedStore);
                }
            }
            return Collections.emptyList();
        } else {
            final List<T> list = new ArrayList<>();
            for (final Task task : tasks) {
                final StateStore store = task.getStore(storeName);
                if (store == null) {
                // then this task doesn't have that store
                } else {
                    final T typedStore = validateAndCastStores(store, queryableStoreType, storeName, task.id());
                    list.add(typedStore);
                }
            }
            return list;
        }
    } else {
        throw new InvalidStateStoreException("Cannot get state store " + storeName + " because the stream thread is " + state + ", not RUNNING" + (storeQueryParams.staleStoresEnabled() ? " or REBALANCING" : ""));
    }
}
Also used : Task(org.apache.kafka.streams.processor.internals.Task) InvalidStateStoreException(org.apache.kafka.streams.errors.InvalidStateStoreException) StreamThread(org.apache.kafka.streams.processor.internals.StreamThread) ArrayList(java.util.ArrayList) StateStore(org.apache.kafka.streams.processor.StateStore) NamedTopologyStoreQueryParameters(org.apache.kafka.streams.processor.internals.namedtopology.NamedTopologyStoreQueryParameters)

Example 4 with Task

use of org.apache.kafka.streams.processor.internals.Task in project kafka by apache.

the class HighAvailabilityTaskAssignor method assignStandbyReplicaTasks.

private static void assignStandbyReplicaTasks(final TreeMap<UUID, ClientState> clientStates, final Set<TaskId> statefulTasks, final int numStandbyReplicas) {
    final Map<TaskId, Integer> tasksToRemainingStandbys = statefulTasks.stream().collect(Collectors.toMap(task -> task, t -> numStandbyReplicas));
    final ConstrainedPrioritySet standbyTaskClientsByTaskLoad = new ConstrainedPrioritySet((client, task) -> !clientStates.get(client).hasAssignedTask(task), client -> clientStates.get(client).assignedTaskLoad());
    standbyTaskClientsByTaskLoad.offerAll(clientStates.keySet());
    for (final TaskId task : statefulTasks) {
        int numRemainingStandbys = tasksToRemainingStandbys.get(task);
        while (numRemainingStandbys > 0) {
            final UUID client = standbyTaskClientsByTaskLoad.poll(task);
            if (client == null) {
                break;
            }
            clientStates.get(client).assignStandby(task);
            numRemainingStandbys--;
            standbyTaskClientsByTaskLoad.offer(client);
        }
        if (numRemainingStandbys > 0) {
            log.warn("Unable to assign {} of {} standby tasks for task [{}]. " + "There is not enough available capacity. You should " + "increase the number of application instances " + "to maintain the requested number of standby replicas.", numRemainingStandbys, numStandbyReplicas, task);
        }
    }
    balanceTasksOverThreads(clientStates, ClientState::standbyTasks, ClientState::unassignStandby, ClientState::assignStandby);
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Logger(org.slf4j.Logger) TaskId(org.apache.kafka.streams.processor.TaskId) Iterator(java.util.Iterator) SortedSet(java.util.SortedSet) AssignmentConfigs(org.apache.kafka.streams.processor.internals.assignment.AssignorConfiguration.AssignmentConfigs) TaskMovement.assignStandbyTaskMovements(org.apache.kafka.streams.processor.internals.assignment.TaskMovement.assignStandbyTaskMovements) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) Utils.diff(org.apache.kafka.common.utils.Utils.diff) HashMap(java.util.HashMap) TaskMovement.assignActiveTaskMovements(org.apache.kafka.streams.processor.internals.assignment.TaskMovement.assignActiveTaskMovements) UUID(java.util.UUID) Function(java.util.function.Function) Collectors(java.util.stream.Collectors) TreeSet(java.util.TreeSet) Task(org.apache.kafka.streams.processor.internals.Task) TreeMap(java.util.TreeMap) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) BiConsumer(java.util.function.BiConsumer) SortedMap(java.util.SortedMap) TaskId(org.apache.kafka.streams.processor.TaskId) UUID(java.util.UUID)

Example 5 with Task

use of org.apache.kafka.streams.processor.internals.Task in project kafka by apache.

the class SubscriptionInfo method setTaskOffsetSumDataFromTaskOffsetSumMap.

// For MIN_NAMED_TOPOLOGY_VERSION > version > MIN_VERSION_OFFSET_SUM_SUBSCRIPTION
private void setTaskOffsetSumDataFromTaskOffsetSumMap(final Map<TaskId, Long> taskOffsetSums) {
    final Map<Integer, List<SubscriptionInfoData.PartitionToOffsetSum>> topicGroupIdToPartitionOffsetSum = new HashMap<>();
    for (final Map.Entry<TaskId, Long> taskEntry : taskOffsetSums.entrySet()) {
        final TaskId task = taskEntry.getKey();
        if (task.topologyName() != null) {
            throw new TaskAssignmentException("Named topologies are not compatible with older protocol versions");
        }
        topicGroupIdToPartitionOffsetSum.computeIfAbsent(task.subtopology(), t -> new ArrayList<>()).add(new SubscriptionInfoData.PartitionToOffsetSum().setPartition(task.partition()).setOffsetSum(taskEntry.getValue()));
    }
    data.setTaskOffsetSums(topicGroupIdToPartitionOffsetSum.entrySet().stream().map(t -> {
        final SubscriptionInfoData.TaskOffsetSum taskOffsetSum = new SubscriptionInfoData.TaskOffsetSum();
        taskOffsetSum.setTopicGroupId(t.getKey());
        taskOffsetSum.setPartitionToOffsetSum(t.getValue());
        return taskOffsetSum;
    }).collect(Collectors.toList()));
}
Also used : TaskAssignmentException(org.apache.kafka.streams.errors.TaskAssignmentException) Uuid(org.apache.kafka.common.Uuid) MessageUtil(org.apache.kafka.common.protocol.MessageUtil) MIN_NAMED_TOPOLOGY_VERSION(org.apache.kafka.streams.processor.internals.assignment.StreamsAssignmentProtocolVersions.MIN_NAMED_TOPOLOGY_VERSION) Logger(org.slf4j.Logger) PartitionToOffsetSum(org.apache.kafka.streams.internals.generated.SubscriptionInfoData.PartitionToOffsetSum) TaskId(org.apache.kafka.streams.processor.TaskId) LATEST_SUPPORTED_VERSION(org.apache.kafka.streams.processor.internals.assignment.StreamsAssignmentProtocolVersions.LATEST_SUPPORTED_VERSION) ByteBufferAccessor(org.apache.kafka.common.protocol.ByteBufferAccessor) LoggerFactory(org.slf4j.LoggerFactory) SubscriptionInfoData(org.apache.kafka.streams.internals.generated.SubscriptionInfoData) Set(java.util.Set) HashMap(java.util.HashMap) UUID(java.util.UUID) TaskAssignmentException(org.apache.kafka.streams.errors.TaskAssignmentException) Collectors(java.util.stream.Collectors) ByteBuffer(java.nio.ByteBuffer) StandardCharsets(java.nio.charset.StandardCharsets) ArrayList(java.util.ArrayList) Task(org.apache.kafka.streams.processor.internals.Task) HashSet(java.util.HashSet) List(java.util.List) TaskOffsetSum(org.apache.kafka.streams.internals.generated.SubscriptionInfoData.TaskOffsetSum) Map(java.util.Map) Collections(java.util.Collections) TaskId(org.apache.kafka.streams.processor.TaskId) TaskOffsetSum(org.apache.kafka.streams.internals.generated.SubscriptionInfoData.TaskOffsetSum) TaskOffsetSum(org.apache.kafka.streams.internals.generated.SubscriptionInfoData.TaskOffsetSum) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SubscriptionInfoData(org.apache.kafka.streams.internals.generated.SubscriptionInfoData) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) PartitionToOffsetSum(org.apache.kafka.streams.internals.generated.SubscriptionInfoData.PartitionToOffsetSum)

Aggregations

Task (org.apache.kafka.streams.processor.internals.Task)7 TaskId (org.apache.kafka.streams.processor.TaskId)6 Map (java.util.Map)5 Set (java.util.Set)5 UUID (java.util.UUID)5 Collectors (java.util.stream.Collectors)5 HashMap (java.util.HashMap)4 HashSet (java.util.HashSet)4 TreeMap (java.util.TreeMap)4 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)4 Logger (org.slf4j.Logger)4 ArrayList (java.util.ArrayList)3 Collection (java.util.Collection)3 TreeSet (java.util.TreeSet)3 TopicPartition (org.apache.kafka.common.TopicPartition)3 StateStore (org.apache.kafka.streams.processor.StateStore)3 StreamThread (org.apache.kafka.streams.processor.internals.StreamThread)3 LoggerFactory (org.slf4j.LoggerFactory)3 Collections (java.util.Collections)2 Collections.emptyMap (java.util.Collections.emptyMap)2