use of org.apache.kafka.streams.processor.internals.Task in project kafka by apache.
the class KafkaStreams method query.
/**
* Run an interactive query against a state store.
* <p>
* This method allows callers outside of the Streams runtime to access the internal state of
* stateful processors. See https://kafka.apache.org/documentation/streams/developer-guide/interactive-queries.html
* for more information.
*
* @param <R> The result type specified by the query.
* @throws StreamsNotStartedException If Streams has not yet been started. Just call {@link
* KafkaStreams#start()} and then retry this call.
* @throws StreamsStoppedException If Streams is in a terminal state like PENDING_SHUTDOWN,
* NOT_RUNNING, PENDING_ERROR, or ERROR. The caller should
* discover a new instance to query.
* @throws UnknownStateStoreException If the specified store name does not exist in the
* topology.
*/
@Evolving
public <R> StateQueryResult<R> query(final StateQueryRequest<R> request) {
final String storeName = request.getStoreName();
if (!topologyMetadata.hasStore(storeName)) {
throw new UnknownStateStoreException("Cannot get state store " + storeName + " because no such store is registered in the topology.");
}
if (state().hasNotStarted()) {
throw new StreamsNotStartedException("KafkaStreams has not been started, you can retry after calling start().");
}
if (state().isShuttingDown() || state.hasCompletedShutdown()) {
throw new StreamsStoppedException("KafkaStreams has been stopped (" + state + ")." + " This instance can no longer serve queries.");
}
final StateQueryResult<R> result = new StateQueryResult<>();
final Map<String, StateStore> globalStateStores = topologyMetadata.globalStateStores();
if (globalStateStores.containsKey(storeName)) {
// See KAFKA-13523
result.setGlobalResult(QueryResult.forFailure(FailureReason.UNKNOWN_QUERY_TYPE, "Global stores do not yet support the KafkaStreams#query API. Use KafkaStreams#store instead."));
} else {
for (final StreamThread thread : threads) {
final Map<TaskId, Task> tasks = thread.allTasks();
for (final Entry<TaskId, Task> entry : tasks.entrySet()) {
final TaskId taskId = entry.getKey();
final int partition = taskId.partition();
if (request.isAllPartitions() || request.getPartitions().contains(partition)) {
final Task task = entry.getValue();
final StateStore store = task.getStore(storeName);
if (store != null) {
final StreamThread.State state = thread.state();
final boolean active = task.isActive();
if (request.isRequireActive() && (state != StreamThread.State.RUNNING || !active)) {
result.addResult(partition, QueryResult.forFailure(FailureReason.NOT_ACTIVE, "Query requires a running active task," + " but partition was in state " + state + " and was " + (active ? "active" : "not active") + "."));
} else {
final QueryResult<R> r = store.query(request.getQuery(), request.isRequireActive() ? PositionBound.unbounded() : request.getPositionBound(), new QueryConfig(request.executionInfoEnabled()));
result.addResult(partition, r);
}
// we can return right away.
if (!request.isAllPartitions() && result.getPartitionResults().keySet().containsAll(request.getPartitions())) {
return result;
}
}
}
}
}
}
if (!request.isAllPartitions()) {
for (final Integer partition : request.getPartitions()) {
if (!result.getPartitionResults().containsKey(partition)) {
result.addResult(partition, QueryResult.forFailure(FailureReason.NOT_PRESENT, "The requested partition was not present at the time of the query."));
}
}
}
return result;
}
use of org.apache.kafka.streams.processor.internals.Task in project kafka by apache.
the class KafkaStreams method allLocalStorePartitionLags.
protected Map<String, Map<Integer, LagInfo>> allLocalStorePartitionLags(final List<Task> tasksToCollectLagFor) {
final Map<String, Map<Integer, LagInfo>> localStorePartitionLags = new TreeMap<>();
final Collection<TopicPartition> allPartitions = new LinkedList<>();
final Map<TopicPartition, Long> allChangelogPositions = new HashMap<>();
// Obtain the current positions, of all the active-restoring and standby tasks
for (final Task task : tasksToCollectLagFor) {
allPartitions.addAll(task.changelogPartitions());
// Note that not all changelog partitions, will have positions; since some may not have started
allChangelogPositions.putAll(task.changelogOffsets());
}
log.debug("Current changelog positions: {}", allChangelogPositions);
final Map<TopicPartition, ListOffsetsResultInfo> allEndOffsets;
allEndOffsets = fetchEndOffsets(allPartitions, adminClient);
log.debug("Current end offsets :{}", allEndOffsets);
for (final Map.Entry<TopicPartition, ListOffsetsResultInfo> entry : allEndOffsets.entrySet()) {
// Avoiding an extra admin API lookup by computing lags for not-yet-started restorations
// from zero instead of the real "earliest offset" for the changelog.
// This will yield the correct relative order of lagginess for the tasks in the cluster,
// but it is an over-estimate of how much work remains to restore the task from scratch.
final long earliestOffset = 0L;
final long changelogPosition = allChangelogPositions.getOrDefault(entry.getKey(), earliestOffset);
final long latestOffset = entry.getValue().offset();
final LagInfo lagInfo = new LagInfo(changelogPosition == Task.LATEST_OFFSET ? latestOffset : changelogPosition, latestOffset);
final String storeName = streamsMetadataState.getStoreForChangelogTopic(entry.getKey().topic());
localStorePartitionLags.computeIfAbsent(storeName, ignored -> new TreeMap<>()).put(entry.getKey().partition(), lagInfo);
}
return Collections.unmodifiableMap(localStorePartitionLags);
}
use of org.apache.kafka.streams.processor.internals.Task in project kafka by apache.
the class StreamThreadStateStoreProvider method stores.
@SuppressWarnings("unchecked")
public <T> List<T> stores(final StoreQueryParameters storeQueryParams) {
final StreamThread.State state = streamThread.state();
if (state == StreamThread.State.DEAD) {
return Collections.emptyList();
}
final String storeName = storeQueryParams.storeName();
final QueryableStoreType<T> queryableStoreType = storeQueryParams.queryableStoreType();
final String topologyName = storeQueryParams instanceof NamedTopologyStoreQueryParameters ? ((NamedTopologyStoreQueryParameters) storeQueryParams).topologyName() : null;
if (storeQueryParams.staleStoresEnabled() ? state.isAlive() : state == StreamThread.State.RUNNING) {
final Collection<Task> tasks = storeQueryParams.staleStoresEnabled() ? streamThread.allTasks().values() : streamThread.activeTasks();
if (storeQueryParams.partition() != null) {
for (final Task task : tasks) {
if (task.id().partition() == storeQueryParams.partition() && (topologyName == null || topologyName.equals(task.id().topologyName())) && task.getStore(storeName) != null && storeName.equals(task.getStore(storeName).name())) {
final T typedStore = validateAndCastStores(task.getStore(storeName), queryableStoreType, storeName, task.id());
return Collections.singletonList(typedStore);
}
}
return Collections.emptyList();
} else {
final List<T> list = new ArrayList<>();
for (final Task task : tasks) {
final StateStore store = task.getStore(storeName);
if (store == null) {
// then this task doesn't have that store
} else {
final T typedStore = validateAndCastStores(store, queryableStoreType, storeName, task.id());
list.add(typedStore);
}
}
return list;
}
} else {
throw new InvalidStateStoreException("Cannot get state store " + storeName + " because the stream thread is " + state + ", not RUNNING" + (storeQueryParams.staleStoresEnabled() ? " or REBALANCING" : ""));
}
}
use of org.apache.kafka.streams.processor.internals.Task in project kafka by apache.
the class HighAvailabilityTaskAssignor method assignStandbyReplicaTasks.
private static void assignStandbyReplicaTasks(final TreeMap<UUID, ClientState> clientStates, final Set<TaskId> statefulTasks, final int numStandbyReplicas) {
final Map<TaskId, Integer> tasksToRemainingStandbys = statefulTasks.stream().collect(Collectors.toMap(task -> task, t -> numStandbyReplicas));
final ConstrainedPrioritySet standbyTaskClientsByTaskLoad = new ConstrainedPrioritySet((client, task) -> !clientStates.get(client).hasAssignedTask(task), client -> clientStates.get(client).assignedTaskLoad());
standbyTaskClientsByTaskLoad.offerAll(clientStates.keySet());
for (final TaskId task : statefulTasks) {
int numRemainingStandbys = tasksToRemainingStandbys.get(task);
while (numRemainingStandbys > 0) {
final UUID client = standbyTaskClientsByTaskLoad.poll(task);
if (client == null) {
break;
}
clientStates.get(client).assignStandby(task);
numRemainingStandbys--;
standbyTaskClientsByTaskLoad.offer(client);
}
if (numRemainingStandbys > 0) {
log.warn("Unable to assign {} of {} standby tasks for task [{}]. " + "There is not enough available capacity. You should " + "increase the number of application instances " + "to maintain the requested number of standby replicas.", numRemainingStandbys, numStandbyReplicas, task);
}
}
balanceTasksOverThreads(clientStates, ClientState::standbyTasks, ClientState::unassignStandby, ClientState::assignStandby);
}
use of org.apache.kafka.streams.processor.internals.Task in project kafka by apache.
the class SubscriptionInfo method setTaskOffsetSumDataFromTaskOffsetSumMap.
// For MIN_NAMED_TOPOLOGY_VERSION > version > MIN_VERSION_OFFSET_SUM_SUBSCRIPTION
private void setTaskOffsetSumDataFromTaskOffsetSumMap(final Map<TaskId, Long> taskOffsetSums) {
final Map<Integer, List<SubscriptionInfoData.PartitionToOffsetSum>> topicGroupIdToPartitionOffsetSum = new HashMap<>();
for (final Map.Entry<TaskId, Long> taskEntry : taskOffsetSums.entrySet()) {
final TaskId task = taskEntry.getKey();
if (task.topologyName() != null) {
throw new TaskAssignmentException("Named topologies are not compatible with older protocol versions");
}
topicGroupIdToPartitionOffsetSum.computeIfAbsent(task.subtopology(), t -> new ArrayList<>()).add(new SubscriptionInfoData.PartitionToOffsetSum().setPartition(task.partition()).setOffsetSum(taskEntry.getValue()));
}
data.setTaskOffsetSums(topicGroupIdToPartitionOffsetSum.entrySet().stream().map(t -> {
final SubscriptionInfoData.TaskOffsetSum taskOffsetSum = new SubscriptionInfoData.TaskOffsetSum();
taskOffsetSum.setTopicGroupId(t.getKey());
taskOffsetSum.setPartitionToOffsetSum(t.getValue());
return taskOffsetSum;
}).collect(Collectors.toList()));
}
Aggregations