Search in sources :

Example 56 with HostInfo

use of org.apache.kafka.streams.state.HostInfo in project ksql by confluentinc.

the class KsLocator method getMetadataForKeys.

/**
 * Gets the Metadata when looking up a list of keys.  This is used when the set of keys are known.
 * @param keys The non-empty set of keys to lookup metadata for
 * @param filterPartitions The partitions to limit lookups to, if non empty. Partitions which
 *                         exist by are not listed here are omitted. If empty, no filtering is
 *                         done.
 * @return The metadata associated with the keys
 */
private List<PartitionMetadata> getMetadataForKeys(final List<KsqlKey> keys, final Set<Integer> filterPartitions) {
    // Maintain request order for reproducibility by using a LinkedHashMap, even though it's
    // not a guarantee of the API.
    final Map<Integer, KeyQueryMetadata> metadataByPartition = new LinkedHashMap<>();
    final Map<Integer, Set<KsqlKey>> keysByPartition = new HashMap<>();
    for (KsqlKey key : keys) {
        final KeyQueryMetadata metadata = getKeyQueryMetadata(key);
        // Fail fast if Streams not ready. Let client handle it
        if (metadata.equals(KeyQueryMetadata.NOT_AVAILABLE)) {
            LOG.debug("KeyQueryMetadata not available for state store '{}' and key {}", storeName, key);
            throw new MaterializationException(String.format("Materialized data for key %s is not available yet. " + "Please try again later.", key));
        }
        LOG.debug("Handling pull query for key {} in partition {} of state store {}.", key, metadata.partition(), storeName);
        if (filterPartitions.size() > 0 && !filterPartitions.contains(metadata.partition())) {
            LOG.debug("Ignoring key {} in partition {} because parition is not included in lookup.", key, metadata.partition());
            continue;
        }
        keysByPartition.computeIfAbsent(metadata.partition(), k -> new LinkedHashSet<>());
        keysByPartition.get(metadata.partition()).add(key);
        metadataByPartition.putIfAbsent(metadata.partition(), metadata);
    }
    return metadataByPartition.values().stream().map(metadata -> {
        final HostInfo activeHost = metadata.activeHost();
        final Set<HostInfo> standByHosts = metadata.standbyHosts();
        return new PartitionMetadata(activeHost, standByHosts, metadata.partition(), Optional.of(keysByPartition.get(metadata.partition())));
    }).collect(Collectors.toList());
}
Also used : UNKNOWN_HOST(org.apache.kafka.streams.processor.internals.StreamsMetadataState.UNKNOWN_HOST) MaterializationException(io.confluent.ksql.execution.streams.materialization.MaterializationException) URL(java.net.URL) HostInfo(org.apache.kafka.streams.state.HostInfo) RoutingFilterFactory(io.confluent.ksql.execution.streams.RoutingFilter.RoutingFilterFactory) URISyntaxException(java.net.URISyntaxException) KafkaStreamsNamedTopologyWrapper(org.apache.kafka.streams.processor.internals.namedtopology.KafkaStreamsNamedTopologyWrapper) LoggerFactory(org.slf4j.LoggerFactory) RoutingOptions(io.confluent.ksql.execution.streams.RoutingOptions) HashMap(java.util.HashMap) RoutingFilter(io.confluent.ksql.execution.streams.RoutingFilter) KsqlHostInfo(io.confluent.ksql.util.KsqlHostInfo) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) LinkedHashMap(java.util.LinkedHashMap) TopologyDescription(org.apache.kafka.streams.TopologyDescription) ImmutableList(com.google.common.collect.ImmutableList) Immutable(com.google.errorprone.annotations.Immutable) Host(io.confluent.ksql.execution.streams.RoutingFilter.Host) Objects.requireNonNull(java.util.Objects.requireNonNull) Map(java.util.Map) URI(java.net.URI) LinkedHashSet(java.util.LinkedHashSet) Processor(org.apache.kafka.streams.TopologyDescription.Processor) Logger(org.slf4j.Logger) Collection(java.util.Collection) StreamsMetadata(org.apache.kafka.streams.StreamsMetadata) Set(java.util.Set) Streams(com.google.common.collect.Streams) Locator(io.confluent.ksql.execution.streams.materialization.Locator) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) Objects(java.util.Objects) List(java.util.List) Stream(java.util.stream.Stream) KeyQueryMetadata(org.apache.kafka.streams.KeyQueryMetadata) Subtopology(org.apache.kafka.streams.TopologyDescription.Subtopology) Serializer(org.apache.kafka.common.serialization.Serializer) Optional(java.util.Optional) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) GenericKey(io.confluent.ksql.GenericKey) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Collections(java.util.Collections) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) Topology(org.apache.kafka.streams.Topology) Source(org.apache.kafka.streams.TopologyDescription.Source) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) Set(java.util.Set) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) KeyQueryMetadata(org.apache.kafka.streams.KeyQueryMetadata) MaterializationException(io.confluent.ksql.execution.streams.materialization.MaterializationException) LinkedHashMap(java.util.LinkedHashMap) HostInfo(org.apache.kafka.streams.state.HostInfo) KsqlHostInfo(io.confluent.ksql.util.KsqlHostInfo)

Example 57 with HostInfo

use of org.apache.kafka.streams.state.HostInfo in project ksql by confluentinc.

the class KsLocator method getMetadataForAllPartitions.

/**
 * Gets the metadata for all partitions associated with the state store.
 * @param filterPartitions The partitions to limit lookups to, if non empty. Partitions which
 *                         exist by are not listed here are omitted. If empty, no filtering is
 *                         done.
 * @return The metadata associated with all partitions
 */
private List<PartitionMetadata> getMetadataForAllPartitions(final Set<Integer> filterPartitions, final Optional<Set<KsqlKey>> keys) {
    // It's important that we consider only the source topics for the subtopology that contains the
    // state store. Otherwise, we'll be given the wrong partition -> host mappings.
    // The underlying state store has a number of partitions that is the MAX of the number of
    // partitions of all source topics of the subtopology.  Since partition X of all source topics
    // of the particular subtopology will map to the same host, we can collect partition -> host
    // for these topics to find the locations of each partition of the state store.
    final Set<String> sourceTopicSuffixes = findSubtopologySourceTopicSuffixes();
    final Map<Integer, HostInfo> activeHostByPartition = new HashMap<>();
    final Map<Integer, Set<HostInfo>> standbyHostsByPartition = new HashMap<>();
    final Collection<StreamsMetadata> streamsMetadataCollection = getStreamsMetadata();
    for (final StreamsMetadata streamsMetadata : streamsMetadataCollection) {
        streamsMetadata.topicPartitions().forEach(tp -> {
            if (sourceTopicSuffixes.stream().anyMatch(suffix -> tp.topic().endsWith(suffix))) {
                activeHostByPartition.compute(tp.partition(), (partition, hostInfo) -> {
                    if (hostInfo != null && !streamsMetadata.hostInfo().equals(hostInfo)) {
                        throw new IllegalStateException("Should only be one active host per partition");
                    }
                    return streamsMetadata.hostInfo();
                });
            }
        });
        streamsMetadata.standbyTopicPartitions().forEach(tp -> {
            // with multiple per partition, we can't easy sanity check.
            if (sourceTopicSuffixes.stream().anyMatch(suffix -> tp.topic().endsWith(suffix))) {
                standbyHostsByPartition.computeIfAbsent(tp.partition(), p -> new HashSet<>());
                standbyHostsByPartition.get(tp.partition()).add(streamsMetadata.hostInfo());
            }
        });
    }
    final Set<Integer> partitions = Streams.concat(activeHostByPartition.keySet().stream(), standbyHostsByPartition.keySet().stream()).collect(Collectors.toSet());
    final List<PartitionMetadata> metadataList = new ArrayList<>();
    for (Integer partition : partitions) {
        if (filterPartitions.size() > 0 && !filterPartitions.contains(partition)) {
            LOG.debug("Ignoring partition {} because partition is not included in lookup.", partition);
            continue;
        }
        final HostInfo activeHost = activeHostByPartition.getOrDefault(partition, UNKNOWN_HOST);
        final Set<HostInfo> standbyHosts = standbyHostsByPartition.getOrDefault(partition, Collections.emptySet());
        metadataList.add(new PartitionMetadata(activeHost, standbyHosts, partition, keys));
    }
    return metadataList;
}
Also used : HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) Set(java.util.Set) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) StreamsMetadata(org.apache.kafka.streams.StreamsMetadata) ArrayList(java.util.ArrayList) HostInfo(org.apache.kafka.streams.state.HostInfo) KsqlHostInfo(io.confluent.ksql.util.KsqlHostInfo)

Aggregations

HostInfo (org.apache.kafka.streams.state.HostInfo)57 TopicPartition (org.apache.kafka.common.TopicPartition)31 HashSet (java.util.HashSet)30 Test (org.junit.Test)27 Set (java.util.Set)25 HashMap (java.util.HashMap)22 TaskId (org.apache.kafka.streams.processor.TaskId)18 AssignmentInfo (org.apache.kafka.streams.processor.internals.assignment.AssignmentInfo)16 KsqlHostInfo (io.confluent.ksql.util.KsqlHostInfo)12 Map (java.util.Map)12 PartitionInfo (org.apache.kafka.common.PartitionInfo)11 ArrayList (java.util.ArrayList)10 UUID (java.util.UUID)9 Cluster (org.apache.kafka.common.Cluster)9 PartitionAssignor (org.apache.kafka.clients.consumer.internals.PartitionAssignor)8 StreamsMetadata (org.apache.kafka.streams.StreamsMetadata)7 SubscriptionInfo (org.apache.kafka.streams.processor.internals.assignment.SubscriptionInfo)7 List (java.util.List)6 Node (org.apache.kafka.common.Node)6 KsqlNode (io.confluent.ksql.execution.streams.materialization.Locator.KsqlNode)5