Search in sources :

Example 6 with GraphNode

use of org.apache.kafka.streams.kstream.internals.graph.GraphNode in project kafka by apache.

the class InternalStreamsBuilder method maybeOptimizeRepartitionOperations.

private void maybeOptimizeRepartitionOperations() {
    maybeUpdateKeyChangingRepartitionNodeMap();
    final Iterator<Entry<GraphNode, LinkedHashSet<OptimizableRepartitionNode<?, ?>>>> entryIterator = keyChangingOperationsToOptimizableRepartitionNodes.entrySet().iterator();
    while (entryIterator.hasNext()) {
        final Map.Entry<GraphNode, LinkedHashSet<OptimizableRepartitionNode<?, ?>>> entry = entryIterator.next();
        final GraphNode keyChangingNode = entry.getKey();
        if (entry.getValue().isEmpty()) {
            continue;
        }
        final GroupedInternal<?, ?> groupedInternal = new GroupedInternal<>(getRepartitionSerdes(entry.getValue()));
        final String repartitionTopicName = getFirstRepartitionTopicName(entry.getValue());
        // passing in the name of the first repartition topic, re-used to create the optimized repartition topic
        final GraphNode optimizedSingleRepartition = createRepartitionNode(repartitionTopicName, groupedInternal.keySerde(), groupedInternal.valueSerde());
        // re-use parent buildPriority to make sure the single repartition graph node is evaluated before downstream nodes
        optimizedSingleRepartition.setBuildPriority(keyChangingNode.buildPriority());
        for (final OptimizableRepartitionNode<?, ?> repartitionNodeToBeReplaced : entry.getValue()) {
            final GraphNode keyChangingNodeChild = findParentNodeMatching(repartitionNodeToBeReplaced, gn -> gn.parentNodes().contains(keyChangingNode));
            if (keyChangingNodeChild == null) {
                throw new StreamsException(String.format("Found a null keyChangingChild node for %s", repartitionNodeToBeReplaced));
            }
            LOG.debug("Found the child node of the key changer {} from the repartition {}.", keyChangingNodeChild, repartitionNodeToBeReplaced);
            // need to add children of key-changing node as children of optimized repartition
            // in order to process records from re-partitioning
            optimizedSingleRepartition.addChild(keyChangingNodeChild);
            LOG.debug("Removing {} from {}  children {}", keyChangingNodeChild, keyChangingNode, keyChangingNode.children());
            // now remove children from key-changing node
            keyChangingNode.removeChild(keyChangingNodeChild);
            // now need to get children of repartition node so we can remove repartition node
            final Collection<GraphNode> repartitionNodeToBeReplacedChildren = repartitionNodeToBeReplaced.children();
            final Collection<GraphNode> parentsOfRepartitionNodeToBeReplaced = repartitionNodeToBeReplaced.parentNodes();
            for (final GraphNode repartitionNodeToBeReplacedChild : repartitionNodeToBeReplacedChildren) {
                for (final GraphNode parentNode : parentsOfRepartitionNodeToBeReplaced) {
                    parentNode.addChild(repartitionNodeToBeReplacedChild);
                }
            }
            for (final GraphNode parentNode : parentsOfRepartitionNodeToBeReplaced) {
                parentNode.removeChild(repartitionNodeToBeReplaced);
            }
            repartitionNodeToBeReplaced.clearChildren();
            // if replaced repartition node is part of any copartition group,
            // we need to update it with the new node name so that co-partitioning won't break.
            internalTopologyBuilder.maybeUpdateCopartitionSourceGroups(repartitionNodeToBeReplaced.nodeName(), optimizedSingleRepartition.nodeName());
            LOG.debug("Updated node {} children {}", optimizedSingleRepartition, optimizedSingleRepartition.children());
        }
        keyChangingNode.addChild(optimizedSingleRepartition);
        entryIterator.remove();
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) StreamsException(org.apache.kafka.streams.errors.StreamsException) GraphNode(org.apache.kafka.streams.kstream.internals.graph.GraphNode) OptimizableRepartitionNode(org.apache.kafka.streams.kstream.internals.graph.OptimizableRepartitionNode) Entry(java.util.Map.Entry) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) TreeMap(java.util.TreeMap)

Example 7 with GraphNode

use of org.apache.kafka.streams.kstream.internals.graph.GraphNode in project kafka by apache.

the class InternalStreamsBuilder method addGlobalStore.

public synchronized <KIn, VIn> void addGlobalStore(final StoreBuilder<?> storeBuilder, final String topic, final ConsumedInternal<KIn, VIn> consumed, final org.apache.kafka.streams.processor.api.ProcessorSupplier<KIn, VIn, Void, Void> stateUpdateSupplier) {
    // explicitly disable logging for global stores
    storeBuilder.withLoggingDisabled();
    final NamedInternal named = new NamedInternal(consumed.name());
    final String sourceName = named.suffixWithOrElseGet(TABLE_SOURCE_SUFFIX, this, KStreamImpl.SOURCE_NAME);
    final String processorName = named.orElseGenerateWithPrefix(this, KTableImpl.SOURCE_NAME);
    final GraphNode globalStoreNode = new GlobalStoreNode<>(storeBuilder, sourceName, topic, consumed, processorName, stateUpdateSupplier);
    addGraphNode(root, globalStoreNode);
}
Also used : GlobalStoreNode(org.apache.kafka.streams.kstream.internals.graph.GlobalStoreNode) GraphNode(org.apache.kafka.streams.kstream.internals.graph.GraphNode)

Example 8 with GraphNode

use of org.apache.kafka.streams.kstream.internals.graph.GraphNode in project kafka by apache.

the class InternalStreamsBuilder method buildAndOptimizeTopology.

public void buildAndOptimizeTopology(final boolean optimizeTopology) {
    mergeDuplicateSourceNodes();
    if (optimizeTopology) {
        LOG.debug("Optimizing the Kafka Streams graph for repartition nodes");
        optimizeKTableSourceTopics();
        maybeOptimizeRepartitionOperations();
    }
    final PriorityQueue<GraphNode> graphNodePriorityQueue = new PriorityQueue<>(5, Comparator.comparing(GraphNode::buildPriority));
    graphNodePriorityQueue.offer(root);
    while (!graphNodePriorityQueue.isEmpty()) {
        final GraphNode streamGraphNode = graphNodePriorityQueue.remove();
        if (LOG.isDebugEnabled()) {
            LOG.debug("Adding nodes to topology {} child nodes {}", streamGraphNode, streamGraphNode.children());
        }
        if (streamGraphNode.allParentsWrittenToTopology() && !streamGraphNode.hasWrittenToTopology()) {
            streamGraphNode.writeToTopology(internalTopologyBuilder);
            streamGraphNode.setHasWrittenToTopology(true);
        }
        for (final GraphNode graphNode : streamGraphNode.children()) {
            graphNodePriorityQueue.offer(graphNode);
        }
    }
    internalTopologyBuilder.validateCopartition();
}
Also used : GraphNode(org.apache.kafka.streams.kstream.internals.graph.GraphNode) PriorityQueue(java.util.PriorityQueue)

Example 9 with GraphNode

use of org.apache.kafka.streams.kstream.internals.graph.GraphNode in project kafka by apache.

the class InternalStreamsBuilder method addGraphNode.

void addGraphNode(final Collection<GraphNode> parents, final GraphNode child) {
    Objects.requireNonNull(parents, "parent node can't be null");
    Objects.requireNonNull(child, "child node can't be null");
    if (parents.isEmpty()) {
        throw new StreamsException("Parent node collection can't be empty");
    }
    for (final GraphNode parent : parents) {
        addGraphNode(parent, child);
    }
}
Also used : StreamsException(org.apache.kafka.streams.errors.StreamsException) GraphNode(org.apache.kafka.streams.kstream.internals.graph.GraphNode)

Example 10 with GraphNode

use of org.apache.kafka.streams.kstream.internals.graph.GraphNode in project kafka by apache.

the class KStreamImplJoin method join.

public <K, V1, V2, VOut> KStream<K, VOut> join(final KStream<K, V1> lhs, final KStream<K, V2> other, final ValueJoinerWithKey<? super K, ? super V1, ? super V2, ? extends VOut> joiner, final JoinWindows windows, final StreamJoined<K, V1, V2> streamJoined) {
    final StreamJoinedInternal<K, V1, V2> streamJoinedInternal = new StreamJoinedInternal<>(streamJoined);
    final NamedInternal renamed = new NamedInternal(streamJoinedInternal.name());
    final String joinThisSuffix = rightOuter ? "-outer-this-join" : "-this-join";
    final String joinOtherSuffix = leftOuter ? "-outer-other-join" : "-other-join";
    final String thisWindowStreamProcessorName = renamed.suffixWithOrElseGet("-this-windowed", builder, KStreamImpl.WINDOWED_NAME);
    final String otherWindowStreamProcessorName = renamed.suffixWithOrElseGet("-other-windowed", builder, KStreamImpl.WINDOWED_NAME);
    final String joinThisGeneratedName = rightOuter ? builder.newProcessorName(KStreamImpl.OUTERTHIS_NAME) : builder.newProcessorName(KStreamImpl.JOINTHIS_NAME);
    final String joinOtherGeneratedName = leftOuter ? builder.newProcessorName(KStreamImpl.OUTEROTHER_NAME) : builder.newProcessorName(KStreamImpl.JOINOTHER_NAME);
    final String joinThisName = renamed.suffixWithOrElseGet(joinThisSuffix, joinThisGeneratedName);
    final String joinOtherName = renamed.suffixWithOrElseGet(joinOtherSuffix, joinOtherGeneratedName);
    final String joinMergeName = renamed.suffixWithOrElseGet("-merge", builder, KStreamImpl.MERGE_NAME);
    final GraphNode thisGraphNode = ((AbstractStream<?, ?>) lhs).graphNode;
    final GraphNode otherGraphNode = ((AbstractStream<?, ?>) other).graphNode;
    final StoreBuilder<WindowStore<K, V1>> thisWindowStore;
    final StoreBuilder<WindowStore<K, V2>> otherWindowStore;
    final String userProvidedBaseStoreName = streamJoinedInternal.storeName();
    final WindowBytesStoreSupplier thisStoreSupplier = streamJoinedInternal.thisStoreSupplier();
    final WindowBytesStoreSupplier otherStoreSupplier = streamJoinedInternal.otherStoreSupplier();
    assertUniqueStoreNames(thisStoreSupplier, otherStoreSupplier);
    if (thisStoreSupplier == null) {
        final String thisJoinStoreName = userProvidedBaseStoreName == null ? joinThisGeneratedName : userProvidedBaseStoreName + joinThisSuffix;
        thisWindowStore = joinWindowStoreBuilder(thisJoinStoreName, windows, streamJoinedInternal.keySerde(), streamJoinedInternal.valueSerde(), streamJoinedInternal.loggingEnabled(), streamJoinedInternal.logConfig());
    } else {
        assertWindowSettings(thisStoreSupplier, windows);
        thisWindowStore = joinWindowStoreBuilderFromSupplier(thisStoreSupplier, streamJoinedInternal.keySerde(), streamJoinedInternal.valueSerde());
    }
    if (otherStoreSupplier == null) {
        final String otherJoinStoreName = userProvidedBaseStoreName == null ? joinOtherGeneratedName : userProvidedBaseStoreName + joinOtherSuffix;
        otherWindowStore = joinWindowStoreBuilder(otherJoinStoreName, windows, streamJoinedInternal.keySerde(), streamJoinedInternal.otherValueSerde(), streamJoinedInternal.loggingEnabled(), streamJoinedInternal.logConfig());
    } else {
        assertWindowSettings(otherStoreSupplier, windows);
        otherWindowStore = joinWindowStoreBuilderFromSupplier(otherStoreSupplier, streamJoinedInternal.keySerde(), streamJoinedInternal.otherValueSerde());
    }
    final KStreamJoinWindow<K, V1> thisWindowedStream = new KStreamJoinWindow<>(thisWindowStore.name());
    final ProcessorParameters<K, V1, ?, ?> thisWindowStreamProcessorParams = new ProcessorParameters<>(thisWindowedStream, thisWindowStreamProcessorName);
    final ProcessorGraphNode<K, V1> thisWindowedStreamsNode = new ProcessorGraphNode<>(thisWindowStreamProcessorName, thisWindowStreamProcessorParams);
    builder.addGraphNode(thisGraphNode, thisWindowedStreamsNode);
    final KStreamJoinWindow<K, V2> otherWindowedStream = new KStreamJoinWindow<>(otherWindowStore.name());
    final ProcessorParameters<K, V2, ?, ?> otherWindowStreamProcessorParams = new ProcessorParameters<>(otherWindowedStream, otherWindowStreamProcessorName);
    final ProcessorGraphNode<K, V2> otherWindowedStreamsNode = new ProcessorGraphNode<>(otherWindowStreamProcessorName, otherWindowStreamProcessorParams);
    builder.addGraphNode(otherGraphNode, otherWindowedStreamsNode);
    Optional<StoreBuilder<KeyValueStore<TimestampedKeyAndJoinSide<K>, LeftOrRightValue<V1, V2>>>> outerJoinWindowStore = Optional.empty();
    if (leftOuter) {
        outerJoinWindowStore = Optional.of(sharedOuterJoinWindowStoreBuilder(windows, streamJoinedInternal, joinThisGeneratedName));
    }
    // Time-shared between joins to keep track of the maximum stream time
    final TimeTracker sharedTimeTracker = new TimeTracker();
    final JoinWindowsInternal internalWindows = new JoinWindowsInternal(windows);
    final KStreamKStreamJoin<K, V1, V2, VOut> joinThis = new KStreamKStreamJoin<>(true, otherWindowStore.name(), internalWindows, joiner, leftOuter, outerJoinWindowStore.map(StoreBuilder::name), sharedTimeTracker);
    final KStreamKStreamJoin<K, V2, V1, VOut> joinOther = new KStreamKStreamJoin<>(false, thisWindowStore.name(), internalWindows, AbstractStream.reverseJoinerWithKey(joiner), rightOuter, outerJoinWindowStore.map(StoreBuilder::name), sharedTimeTracker);
    final PassThrough<K, VOut> joinMerge = new PassThrough<>();
    final StreamStreamJoinNode.StreamStreamJoinNodeBuilder<K, V1, V2, VOut> joinBuilder = StreamStreamJoinNode.streamStreamJoinNodeBuilder();
    final ProcessorParameters<K, V1, ?, ?> joinThisProcessorParams = new ProcessorParameters<>(joinThis, joinThisName);
    final ProcessorParameters<K, V2, ?, ?> joinOtherProcessorParams = new ProcessorParameters<>(joinOther, joinOtherName);
    final ProcessorParameters<K, VOut, ?, ?> joinMergeProcessorParams = new ProcessorParameters<>(joinMerge, joinMergeName);
    joinBuilder.withJoinMergeProcessorParameters(joinMergeProcessorParams).withJoinThisProcessorParameters(joinThisProcessorParams).withJoinOtherProcessorParameters(joinOtherProcessorParams).withThisWindowStoreBuilder(thisWindowStore).withOtherWindowStoreBuilder(otherWindowStore).withThisWindowedStreamProcessorParameters(thisWindowStreamProcessorParams).withOtherWindowedStreamProcessorParameters(otherWindowStreamProcessorParams).withOuterJoinWindowStoreBuilder(outerJoinWindowStore).withValueJoiner(joiner).withNodeName(joinMergeName);
    if (internalWindows.spuriousResultFixEnabled()) {
        joinBuilder.withSpuriousResultFixEnabled();
    }
    final GraphNode joinGraphNode = joinBuilder.build();
    builder.addGraphNode(Arrays.asList(thisGraphNode, otherGraphNode), joinGraphNode);
    final Set<String> allSourceNodes = new HashSet<>(((KStreamImpl<K, V1>) lhs).subTopologySourceNodes);
    allSourceNodes.addAll(((KStreamImpl<K, V2>) other).subTopologySourceNodes);
    // also for key serde we do not inherit from either since we cannot tell if these two serdes are different
    return new KStreamImpl<>(joinMergeName, streamJoinedInternal.keySerde(), null, allSourceNodes, false, joinGraphNode, builder);
}
Also used : ListValueStoreBuilder(org.apache.kafka.streams.state.internals.ListValueStoreBuilder) StoreBuilder(org.apache.kafka.streams.state.StoreBuilder) ProcessorGraphNode(org.apache.kafka.streams.kstream.internals.graph.ProcessorGraphNode) WindowStore(org.apache.kafka.streams.state.WindowStore) StreamStreamJoinNode(org.apache.kafka.streams.kstream.internals.graph.StreamStreamJoinNode) TimestampedKeyAndJoinSide(org.apache.kafka.streams.state.internals.TimestampedKeyAndJoinSide) HashSet(java.util.HashSet) LeftOrRightValue(org.apache.kafka.streams.state.internals.LeftOrRightValue) WindowBytesStoreSupplier(org.apache.kafka.streams.state.WindowBytesStoreSupplier) ProcessorParameters(org.apache.kafka.streams.kstream.internals.graph.ProcessorParameters) GraphNode(org.apache.kafka.streams.kstream.internals.graph.GraphNode) ProcessorGraphNode(org.apache.kafka.streams.kstream.internals.graph.ProcessorGraphNode)

Aggregations

GraphNode (org.apache.kafka.streams.kstream.internals.graph.GraphNode)14 ProcessorGraphNode (org.apache.kafka.streams.kstream.internals.graph.ProcessorGraphNode)6 HashMap (java.util.HashMap)3 LinkedHashMap (java.util.LinkedHashMap)3 TreeMap (java.util.TreeMap)3 TableProcessorNode (org.apache.kafka.streams.kstream.internals.graph.TableProcessorNode)3 TimestampedKeyValueStore (org.apache.kafka.streams.state.TimestampedKeyValueStore)3 HashSet (java.util.HashSet)2 LinkedHashSet (java.util.LinkedHashSet)2 Map (java.util.Map)2 Entry (java.util.Map.Entry)2 StreamsException (org.apache.kafka.streams.errors.StreamsException)2 OptimizableRepartitionNode (org.apache.kafka.streams.kstream.internals.graph.OptimizableRepartitionNode)2 ProcessorParameters (org.apache.kafka.streams.kstream.internals.graph.ProcessorParameters)2 ArrayList (java.util.ArrayList)1 PriorityQueue (java.util.PriorityQueue)1 Set (java.util.Set)1 Pattern (java.util.regex.Pattern)1 TopologyException (org.apache.kafka.streams.errors.TopologyException)1 GlobalStoreNode (org.apache.kafka.streams.kstream.internals.graph.GlobalStoreNode)1