Search in sources :

Example 1 with WindowStore

use of org.apache.kafka.streams.state.WindowStore in project ksql by confluentinc.

the class HoppingWindowExpressionTest method shouldCreateHoppingWindowAggregate.

@Test
public void shouldCreateHoppingWindowAggregate() {
    final KGroupedStream stream = EasyMock.createNiceMock(KGroupedStream.class);
    final TimeWindowedKStream windowedKStream = EasyMock.createNiceMock(TimeWindowedKStream.class);
    final UdafAggregator aggregator = EasyMock.createNiceMock(UdafAggregator.class);
    final HoppingWindowExpression windowExpression = new HoppingWindowExpression(10, TimeUnit.SECONDS, 4, TimeUnit.MILLISECONDS);
    final Initializer initializer = () -> 0;
    final Materialized<String, GenericRow, WindowStore<Bytes, byte[]>> store = Materialized.as("store");
    EasyMock.expect(stream.windowedBy(TimeWindows.of(10000L).advanceBy(4L))).andReturn(windowedKStream);
    EasyMock.expect(windowedKStream.aggregate(same(initializer), same(aggregator), same(store))).andReturn(null);
    EasyMock.replay(stream, windowedKStream);
    windowExpression.applyAggregate(stream, initializer, aggregator, store);
    EasyMock.verify(stream, windowedKStream);
}
Also used : GenericRow(io.confluent.ksql.GenericRow) WindowStore(org.apache.kafka.streams.state.WindowStore) KGroupedStream(org.apache.kafka.streams.kstream.KGroupedStream) Initializer(org.apache.kafka.streams.kstream.Initializer) TimeWindowedKStream(org.apache.kafka.streams.kstream.TimeWindowedKStream) UdafAggregator(io.confluent.ksql.function.UdafAggregator) Test(org.junit.Test)

Example 2 with WindowStore

use of org.apache.kafka.streams.state.WindowStore in project ksql by confluentinc.

the class TumblingWindowExpressionTest method shouldCreateTumblingWindowAggregate.

@Test
public void shouldCreateTumblingWindowAggregate() {
    final KGroupedStream stream = EasyMock.createNiceMock(KGroupedStream.class);
    final TimeWindowedKStream windowedKStream = EasyMock.createNiceMock(TimeWindowedKStream.class);
    final UdafAggregator aggregator = EasyMock.createNiceMock(UdafAggregator.class);
    final TumblingWindowExpression windowExpression = new TumblingWindowExpression(10, TimeUnit.SECONDS);
    final Initializer initializer = () -> 0;
    final Materialized<String, GenericRow, WindowStore<Bytes, byte[]>> store = Materialized.as("store");
    EasyMock.expect(stream.windowedBy(TimeWindows.of(10000L))).andReturn(windowedKStream);
    EasyMock.expect(windowedKStream.aggregate(same(initializer), same(aggregator), same(store))).andReturn(null);
    EasyMock.replay(stream, windowedKStream);
    windowExpression.applyAggregate(stream, initializer, aggregator, store);
    EasyMock.verify(stream, windowedKStream);
}
Also used : GenericRow(io.confluent.ksql.GenericRow) WindowStore(org.apache.kafka.streams.state.WindowStore) KGroupedStream(org.apache.kafka.streams.kstream.KGroupedStream) Initializer(org.apache.kafka.streams.kstream.Initializer) TimeWindowedKStream(org.apache.kafka.streams.kstream.TimeWindowedKStream) UdafAggregator(io.confluent.ksql.function.UdafAggregator) Test(org.junit.Test)

Example 3 with WindowStore

use of org.apache.kafka.streams.state.WindowStore in project ksql by confluentinc.

the class SchemaKGroupedStream method aggregate.

@SuppressWarnings("unchecked")
public SchemaKTable aggregate(final Initializer initializer, final UdafAggregator aggregator, final WindowExpression windowExpression, final Serde<GenericRow> topicValueSerDe) {
    final KTable aggKtable;
    if (windowExpression != null) {
        final Materialized<String, GenericRow, ?> materialized = Materialized.<String, GenericRow, WindowStore<Bytes, byte[]>>with(Serdes.String(), topicValueSerDe);
        final KsqlWindowExpression ksqlWindowExpression = windowExpression.getKsqlWindowExpression();
        aggKtable = ksqlWindowExpression.applyAggregate(kgroupedStream, initializer, aggregator, materialized);
    } else {
        aggKtable = kgroupedStream.aggregate(initializer, aggregator, Materialized.with(Serdes.String(), topicValueSerDe));
    }
    return new SchemaKTable(schema, aggKtable, keyField, sourceSchemaKStreams, windowExpression != null, SchemaKStream.Type.AGGREGATE, functionRegistry, schemaRegistryClient);
}
Also used : GenericRow(io.confluent.ksql.GenericRow) WindowStore(org.apache.kafka.streams.state.WindowStore) KsqlWindowExpression(io.confluent.ksql.parser.tree.KsqlWindowExpression) KTable(org.apache.kafka.streams.kstream.KTable)

Example 4 with WindowStore

use of org.apache.kafka.streams.state.WindowStore in project kafka-streams-examples by confluentinc.

the class WordCountInteractiveQueriesExample method createStreams.

static KafkaStreams createStreams(final Properties streamsConfiguration) {
    final Serde<String> stringSerde = Serdes.String();
    StreamsBuilder builder = new StreamsBuilder();
    KStream<String, String> textLines = builder.stream(TEXT_LINES_TOPIC, Consumed.with(Serdes.String(), Serdes.String()));
    final KGroupedStream<String, String> groupedByWord = textLines.flatMapValues(value -> Arrays.asList(value.toLowerCase().split("\\W+"))).groupBy((key, word) -> word, Serialized.with(stringSerde, stringSerde));
    // Create a State Store for with the all time word count
    groupedByWord.count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>>as("word-count").withValueSerde(Serdes.Long()));
    // Create a Windowed State Store that contains the word count for every
    // 1 minute
    groupedByWord.windowedBy(TimeWindows.of(60000)).count(Materialized.<String, Long, WindowStore<Bytes, byte[]>>as("windowed-word-count").withValueSerde(Serdes.Long()));
    return new KafkaStreams(builder.build(), streamsConfiguration);
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) Arrays(java.util.Arrays) Properties(java.util.Properties) KGroupedStream(org.apache.kafka.streams.kstream.KGroupedStream) Files(java.nio.file.Files) Serialized(org.apache.kafka.streams.kstream.Serialized) HostInfo(org.apache.kafka.streams.state.HostInfo) KStream(org.apache.kafka.streams.kstream.KStream) WindowStore(org.apache.kafka.streams.state.WindowStore) File(java.io.File) Bytes(org.apache.kafka.common.utils.Bytes) Consumed(org.apache.kafka.streams.Consumed) Serde(org.apache.kafka.common.serialization.Serde) TimeWindows(org.apache.kafka.streams.kstream.TimeWindows) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore) Materialized(org.apache.kafka.streams.kstream.Materialized) Serdes(org.apache.kafka.common.serialization.Serdes) KafkaStreams(org.apache.kafka.streams.KafkaStreams) WindowStore(org.apache.kafka.streams.state.WindowStore) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore)

Example 5 with WindowStore

use of org.apache.kafka.streams.state.WindowStore in project kafka-streams-examples by confluentinc.

the class EventDeduplicationLambdaIntegrationTest method shouldRemoveDuplicatesFromTheInput.

@Test
public void shouldRemoveDuplicatesFromTheInput() throws Exception {
    // e.g. "4ff3cb44-abcb-46e3-8f9a-afb7cc74fbb8"
    String firstId = UUID.randomUUID().toString();
    String secondId = UUID.randomUUID().toString();
    String thirdId = UUID.randomUUID().toString();
    List<String> inputValues = Arrays.asList(firstId, secondId, firstId, firstId, secondId, thirdId, thirdId, firstId, secondId);
    List<String> expectedValues = Arrays.asList(firstId, secondId, thirdId);
    // 
    // Step 1: Configure and start the processor topology.
    // 
    StreamsBuilder builder = new StreamsBuilder();
    Properties streamsConfiguration = new Properties();
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "deduplication-lambda-integration-test");
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.ByteArray().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    // The commit interval for flushing records to state stores and downstream must be lower than
    // this integration test's timeout (30 secs) to ensure we observe the expected processing results.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, TimeUnit.SECONDS.toMillis(10));
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    // Use a temporary directory for storing state, which will be automatically removed after the test.
    streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
    // How long we "remember" an event.  During this time, any incoming duplicates of the event
    // will be, well, dropped, thereby de-duplicating the input data.
    // 
    // The actual value depends on your use case.  To reduce memory and disk usage, you could
    // decrease the size to purge old windows more frequently at the cost of potentially missing out
    // on de-duplicating late-arriving records.
    long maintainDurationPerEventInMs = TimeUnit.MINUTES.toMillis(10);
    // The number of segments has no impact on "correctness".
    // Using more segments implies larger overhead but allows for more fined grained record expiration
    // Note: the specified retention time is a _minimum_ time span and no strict upper time bound
    int numberOfSegments = 3;
    // retention period must be at least window size -- for this use case, we don't need a longer retention period
    // and thus just use the window size as retention time
    long retentionPeriod = maintainDurationPerEventInMs;
    StoreBuilder<WindowStore<String, Long>> dedupStoreBuilder = Stores.windowStoreBuilder(Stores.persistentWindowStore(storeName, retentionPeriod, numberOfSegments, maintainDurationPerEventInMs, false), Serdes.String(), Serdes.Long());
    builder.addStateStore(dedupStoreBuilder);
    KStream<byte[], String> input = builder.stream(inputTopic);
    KStream<byte[], String> deduplicated = input.transform(// function as needed.
    () -> new DeduplicationTransformer<>(maintainDurationPerEventInMs, (key, value) -> value), storeName);
    deduplicated.to(outputTopic);
    KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
    streams.start();
    // 
    // Step 2: Produce some input data to the input topic.
    // 
    Properties producerConfig = new Properties();
    producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
    producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
    producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class);
    producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
    IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig);
    // 
    // Step 3: Verify the application's output data.
    // 
    Properties consumerConfig = new Properties();
    consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "deduplication-integration-test-standard-consumer");
    consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class);
    consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
    List<String> actualValues = IntegrationTestUtils.waitUntilMinValuesRecordsReceived(consumerConfig, outputTopic, expectedValues.size());
    streams.close();
    assertThat(actualValues).containsExactlyElementsOf(expectedValues);
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) WindowStore(org.apache.kafka.streams.state.WindowStore) StreamsConfig(org.apache.kafka.streams.StreamsConfig) Arrays(java.util.Arrays) BeforeClass(org.junit.BeforeClass) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) Stores(org.apache.kafka.streams.state.Stores) KStream(org.apache.kafka.streams.kstream.KStream) WindowStore(org.apache.kafka.streams.state.WindowStore) ByteArraySerializer(org.apache.kafka.common.serialization.ByteArraySerializer) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) EmbeddedSingleNodeKafkaCluster(io.confluent.examples.streams.kafka.EmbeddedSingleNodeKafkaCluster) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) ClassRule(org.junit.ClassRule) ProducerConfig(org.apache.kafka.clients.producer.ProducerConfig) ByteArrayDeserializer(org.apache.kafka.common.serialization.ByteArrayDeserializer) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) KeyValueMapper(org.apache.kafka.streams.kstream.KeyValueMapper) Properties(java.util.Properties) TestUtils(org.apache.kafka.test.TestUtils) Transformer(org.apache.kafka.streams.kstream.Transformer) KeyValue(org.apache.kafka.streams.KeyValue) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) Test(org.junit.Test) UUID(java.util.UUID) StoreBuilder(org.apache.kafka.streams.state.StoreBuilder) TimeUnit(java.util.concurrent.TimeUnit) ProcessorContext(org.apache.kafka.streams.processor.ProcessorContext) List(java.util.List) WindowStoreIterator(org.apache.kafka.streams.state.WindowStoreIterator) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Properties(java.util.Properties) Test(org.junit.Test)

Aggregations

WindowStore (org.apache.kafka.streams.state.WindowStore)6 GenericRow (io.confluent.ksql.GenericRow)3 KGroupedStream (org.apache.kafka.streams.kstream.KGroupedStream)3 Test (org.junit.Test)3 UdafAggregator (io.confluent.ksql.function.UdafAggregator)2 Arrays (java.util.Arrays)2 Properties (java.util.Properties)2 Serdes (org.apache.kafka.common.serialization.Serdes)2 KafkaStreams (org.apache.kafka.streams.KafkaStreams)2 StreamsBuilder (org.apache.kafka.streams.StreamsBuilder)2 StreamsConfig (org.apache.kafka.streams.StreamsConfig)2 Initializer (org.apache.kafka.streams.kstream.Initializer)2 KStream (org.apache.kafka.streams.kstream.KStream)2 TimeWindowedKStream (org.apache.kafka.streams.kstream.TimeWindowedKStream)2 EmbeddedSingleNodeKafkaCluster (io.confluent.examples.streams.kafka.EmbeddedSingleNodeKafkaCluster)1 KsqlWindowExpression (io.confluent.ksql.parser.tree.KsqlWindowExpression)1 File (java.io.File)1 Files (java.nio.file.Files)1 List (java.util.List)1 UUID (java.util.UUID)1