Search in sources :

Example 16 with KGroupedStream

use of org.apache.kafka.streams.kstream.KGroupedStream in project ksql by confluentinc.

the class SchemaKStream method groupBy.

public SchemaKGroupedStream groupBy(final Serde<String> keySerde, final Serde<GenericRow> valSerde, final List<Expression> groupByExpressions) {
    boolean rekey = rekeyRequired(groupByExpressions);
    if (!rekey) {
        KGroupedStream kgroupedStream = kstream.groupByKey(Serialized.with(keySerde, valSerde));
        return new SchemaKGroupedStream(schema, kgroupedStream, keyField, Collections.singletonList(this), functionRegistry, schemaRegistryClient);
    }
    // Collect the column indexes, and build the new key as <column1>+<column2>+...
    StringBuilder aggregateKeyName = new StringBuilder();
    List<Integer> newKeyIndexes = new ArrayList<>();
    boolean addSeparator = false;
    for (Expression groupByExpr : groupByExpressions) {
        if (addSeparator) {
            aggregateKeyName.append("|+|");
        } else {
            addSeparator = true;
        }
        aggregateKeyName.append(groupByExpr.toString());
        newKeyIndexes.add(SchemaUtil.getIndexInSchema(groupByExpr.toString(), getSchema()));
    }
    KGroupedStream kgroupedStream = kstream.filter((key, value) -> value != null).groupBy((key, value) -> {
        StringBuilder newKey = new StringBuilder();
        boolean addSeparator1 = false;
        for (int index : newKeyIndexes) {
            if (addSeparator1) {
                newKey.append("|+|");
            } else {
                addSeparator1 = true;
            }
            newKey.append(String.valueOf(value.getColumns().get(index)));
        }
        return newKey.toString();
    }, Serialized.with(keySerde, valSerde));
    // TODO: if the key is a prefix of the grouping columns then we can
    // use the repartition reflection hack to tell streams not to
    // repartition.
    Field newKeyField = new Field(aggregateKeyName.toString(), -1, Schema.STRING_SCHEMA);
    return new SchemaKGroupedStream(schema, kgroupedStream, newKeyField, Collections.singletonList(this), functionRegistry, schemaRegistryClient);
}
Also used : Arrays(java.util.Arrays) KGroupedStream(org.apache.kafka.streams.kstream.KGroupedStream) Produced(org.apache.kafka.streams.kstream.Produced) SchemaRegistryClient(io.confluent.kafka.schemaregistry.client.SchemaRegistryClient) Serialized(org.apache.kafka.streams.kstream.Serialized) KStream(org.apache.kafka.streams.kstream.KStream) Joined(org.apache.kafka.streams.kstream.Joined) Schema(org.apache.kafka.connect.data.Schema) ArrayList(java.util.ArrayList) Pair(io.confluent.ksql.util.Pair) Serde(org.apache.kafka.common.serialization.Serde) DereferenceExpression(io.confluent.ksql.parser.tree.DereferenceExpression) ExpressionMetadata(io.confluent.ksql.util.ExpressionMetadata) Serdes(org.apache.kafka.common.serialization.Serdes) CodeGenRunner(io.confluent.ksql.codegen.CodeGenRunner) SchemaUtil(io.confluent.ksql.util.SchemaUtil) OutputNode(io.confluent.ksql.planner.plan.OutputNode) Field(org.apache.kafka.connect.data.Field) FunctionRegistry(io.confluent.ksql.function.FunctionRegistry) Set(java.util.Set) KsqlConfig(io.confluent.ksql.util.KsqlConfig) Expression(io.confluent.ksql.parser.tree.Expression) List(java.util.List) ValueJoiner(org.apache.kafka.streams.kstream.ValueJoiner) GenericRow(io.confluent.ksql.GenericRow) Optional(java.util.Optional) KsqlException(io.confluent.ksql.util.KsqlException) SchemaBuilder(org.apache.kafka.connect.data.SchemaBuilder) KsqlTopicSerDe(io.confluent.ksql.serde.KsqlTopicSerDe) Collections(java.util.Collections) GenericRowValueTypeEnforcer(io.confluent.ksql.util.GenericRowValueTypeEnforcer) Field(org.apache.kafka.connect.data.Field) KGroupedStream(org.apache.kafka.streams.kstream.KGroupedStream) DereferenceExpression(io.confluent.ksql.parser.tree.DereferenceExpression) Expression(io.confluent.ksql.parser.tree.Expression) ArrayList(java.util.ArrayList)

Example 17 with KGroupedStream

use of org.apache.kafka.streams.kstream.KGroupedStream in project kafka by apache.

the class CogroupedKStreamImplTest method shouldInsertRepartitionsTopicForUpstreamKeyModification.

@Test
public void shouldInsertRepartitionsTopicForUpstreamKeyModification() {
    final StreamsBuilder builder = new StreamsBuilder();
    final KStream<String, String> stream1 = builder.stream("one", stringConsumed);
    final KStream<String, String> test2 = builder.stream("two", stringConsumed);
    final KGroupedStream<String, String> groupedOne = stream1.map((k, v) -> new KeyValue<>(v, k)).groupByKey();
    final KGroupedStream<String, String> groupedTwo = test2.groupByKey();
    final KTable<String, String> customers = groupedOne.cogroup(STRING_AGGREGATOR).cogroup(groupedTwo, STRING_AGGREGATOR).aggregate(STRING_INITIALIZER, Named.as("test"), Materialized.as("store"));
    customers.toStream().to(OUTPUT);
    final String topologyDescription = builder.build().describe().toString();
    assertThat(topologyDescription, equalTo("Topologies:\n" + "   Sub-topology: 0\n" + "    Source: KSTREAM-SOURCE-0000000000 (topics: [one])\n" + "      --> KSTREAM-MAP-0000000002\n" + "    Processor: KSTREAM-MAP-0000000002 (stores: [])\n" + "      --> store-repartition-filter\n" + "      <-- KSTREAM-SOURCE-0000000000\n" + "    Processor: store-repartition-filter (stores: [])\n" + "      --> store-repartition-sink\n" + "      <-- KSTREAM-MAP-0000000002\n" + "    Sink: store-repartition-sink (topic: store-repartition)\n" + "      <-- store-repartition-filter\n\n" + "  Sub-topology: 1\n" + "    Source: KSTREAM-SOURCE-0000000001 (topics: [two])\n" + "      --> test-cogroup-agg-1\n" + "    Source: store-repartition-source (topics: [store-repartition])\n" + "      --> test-cogroup-agg-0\n" + "    Processor: test-cogroup-agg-0 (stores: [store])\n" + "      --> test-cogroup-merge\n" + "      <-- store-repartition-source\n" + "    Processor: test-cogroup-agg-1 (stores: [store])\n" + "      --> test-cogroup-merge\n" + "      <-- KSTREAM-SOURCE-0000000001\n" + "    Processor: test-cogroup-merge (stores: [])\n" + "      --> KTABLE-TOSTREAM-0000000009\n" + "      <-- test-cogroup-agg-0, test-cogroup-agg-1\n" + "    Processor: KTABLE-TOSTREAM-0000000009 (stores: [])\n" + "      --> KSTREAM-SINK-0000000010\n" + "      <-- test-cogroup-merge\n" + "    Sink: KSTREAM-SINK-0000000010 (topic: output)\n" + "      <-- KTABLE-TOSTREAM-0000000009\n\n"));
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) KGroupedStream(org.apache.kafka.streams.kstream.KGroupedStream) MockInitializer(org.apache.kafka.test.MockInitializer) SessionWindows(org.apache.kafka.streams.kstream.SessionWindows) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) Assert.assertThrows(org.junit.Assert.assertThrows) Window(org.apache.kafka.streams.kstream.Window) KStream(org.apache.kafka.streams.kstream.KStream) Initializer(org.apache.kafka.streams.kstream.Initializer) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) Named(org.apache.kafka.streams.kstream.Named) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore) IntegerSerializer(org.apache.kafka.common.serialization.IntegerSerializer) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Aggregator(org.apache.kafka.streams.kstream.Aggregator) TestRecord(org.apache.kafka.streams.test.TestRecord) Before(org.junit.Before) TopologyTestDriver(org.apache.kafka.streams.TopologyTestDriver) MockValueJoiner(org.apache.kafka.test.MockValueJoiner) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) KTable(org.apache.kafka.streams.kstream.KTable) Properties(java.util.Properties) TestOutputTopic(org.apache.kafka.streams.TestOutputTopic) Consumed(org.apache.kafka.streams.kstream.Consumed) KeyValue(org.apache.kafka.streams.KeyValue) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) Grouped(org.apache.kafka.streams.kstream.Grouped) MockAggregator(org.apache.kafka.test.MockAggregator) Bytes(org.apache.kafka.common.utils.Bytes) SlidingWindows(org.apache.kafka.streams.kstream.SlidingWindows) Materialized(org.apache.kafka.streams.kstream.Materialized) IntegerDeserializer(org.apache.kafka.common.serialization.IntegerDeserializer) TestInputTopic(org.apache.kafka.streams.TestInputTopic) CogroupedKStream(org.apache.kafka.streams.kstream.CogroupedKStream) Windows(org.apache.kafka.streams.kstream.Windows) StreamsTestUtils(org.apache.kafka.test.StreamsTestUtils) KeyValue(org.apache.kafka.streams.KeyValue) Test(org.junit.Test)

Aggregations

KGroupedStream (org.apache.kafka.streams.kstream.KGroupedStream)17 Serdes (org.apache.kafka.common.serialization.Serdes)15 KStream (org.apache.kafka.streams.kstream.KStream)15 Properties (java.util.Properties)14 Bytes (org.apache.kafka.common.utils.Bytes)14 StreamsBuilder (org.apache.kafka.streams.StreamsBuilder)14 StreamsConfig (org.apache.kafka.streams.StreamsConfig)14 KeyValue (org.apache.kafka.streams.KeyValue)13 Materialized (org.apache.kafka.streams.kstream.Materialized)13 Test (org.junit.Test)13 StringDeserializer (org.apache.kafka.common.serialization.StringDeserializer)12 StringSerializer (org.apache.kafka.common.serialization.StringSerializer)12 Consumed (org.apache.kafka.streams.kstream.Consumed)12 KTable (org.apache.kafka.streams.kstream.KTable)12 KeyValueStore (org.apache.kafka.streams.state.KeyValueStore)12 TestInputTopic (org.apache.kafka.streams.TestInputTopic)11 TopologyTestDriver (org.apache.kafka.streams.TopologyTestDriver)11 Grouped (org.apache.kafka.streams.kstream.Grouped)11 Initializer (org.apache.kafka.streams.kstream.Initializer)11 MatcherAssert.assertThat (org.hamcrest.MatcherAssert.assertThat)11