use of org.apache.kafka.streams.state.WindowStore in project ksql by confluentinc.
the class HoppingWindowExpressionTest method shouldCreateHoppingWindowAggregate.
@Test
public void shouldCreateHoppingWindowAggregate() {
final KGroupedStream stream = EasyMock.createNiceMock(KGroupedStream.class);
final TimeWindowedKStream windowedKStream = EasyMock.createNiceMock(TimeWindowedKStream.class);
final UdafAggregator aggregator = EasyMock.createNiceMock(UdafAggregator.class);
final HoppingWindowExpression windowExpression = new HoppingWindowExpression(10, TimeUnit.SECONDS, 4, TimeUnit.MILLISECONDS);
final Initializer initializer = () -> 0;
final Materialized<String, GenericRow, WindowStore<Bytes, byte[]>> store = Materialized.as("store");
EasyMock.expect(stream.windowedBy(TimeWindows.of(10000L).advanceBy(4L))).andReturn(windowedKStream);
EasyMock.expect(windowedKStream.aggregate(same(initializer), same(aggregator), same(store))).andReturn(null);
EasyMock.replay(stream, windowedKStream);
windowExpression.applyAggregate(stream, initializer, aggregator, store);
EasyMock.verify(stream, windowedKStream);
}
use of org.apache.kafka.streams.state.WindowStore in project ksql by confluentinc.
the class TumblingWindowExpressionTest method shouldCreateTumblingWindowAggregate.
@Test
public void shouldCreateTumblingWindowAggregate() {
final KGroupedStream stream = EasyMock.createNiceMock(KGroupedStream.class);
final TimeWindowedKStream windowedKStream = EasyMock.createNiceMock(TimeWindowedKStream.class);
final UdafAggregator aggregator = EasyMock.createNiceMock(UdafAggregator.class);
final TumblingWindowExpression windowExpression = new TumblingWindowExpression(10, TimeUnit.SECONDS);
final Initializer initializer = () -> 0;
final Materialized<String, GenericRow, WindowStore<Bytes, byte[]>> store = Materialized.as("store");
EasyMock.expect(stream.windowedBy(TimeWindows.of(10000L))).andReturn(windowedKStream);
EasyMock.expect(windowedKStream.aggregate(same(initializer), same(aggregator), same(store))).andReturn(null);
EasyMock.replay(stream, windowedKStream);
windowExpression.applyAggregate(stream, initializer, aggregator, store);
EasyMock.verify(stream, windowedKStream);
}
use of org.apache.kafka.streams.state.WindowStore in project ksql by confluentinc.
the class SchemaKGroupedStream method aggregate.
@SuppressWarnings("unchecked")
public SchemaKTable aggregate(final Initializer initializer, final UdafAggregator aggregator, final WindowExpression windowExpression, final Serde<GenericRow> topicValueSerDe) {
final KTable aggKtable;
if (windowExpression != null) {
final Materialized<String, GenericRow, ?> materialized = Materialized.<String, GenericRow, WindowStore<Bytes, byte[]>>with(Serdes.String(), topicValueSerDe);
final KsqlWindowExpression ksqlWindowExpression = windowExpression.getKsqlWindowExpression();
aggKtable = ksqlWindowExpression.applyAggregate(kgroupedStream, initializer, aggregator, materialized);
} else {
aggKtable = kgroupedStream.aggregate(initializer, aggregator, Materialized.with(Serdes.String(), topicValueSerDe));
}
return new SchemaKTable(schema, aggKtable, keyField, sourceSchemaKStreams, windowExpression != null, SchemaKStream.Type.AGGREGATE, functionRegistry, schemaRegistryClient);
}
use of org.apache.kafka.streams.state.WindowStore in project kafka-streams-examples by confluentinc.
the class WordCountInteractiveQueriesExample method createStreams.
static KafkaStreams createStreams(final Properties streamsConfiguration) {
final Serde<String> stringSerde = Serdes.String();
StreamsBuilder builder = new StreamsBuilder();
KStream<String, String> textLines = builder.stream(TEXT_LINES_TOPIC, Consumed.with(Serdes.String(), Serdes.String()));
final KGroupedStream<String, String> groupedByWord = textLines.flatMapValues(value -> Arrays.asList(value.toLowerCase().split("\\W+"))).groupBy((key, word) -> word, Serialized.with(stringSerde, stringSerde));
// Create a State Store for with the all time word count
groupedByWord.count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>>as("word-count").withValueSerde(Serdes.Long()));
// Create a Windowed State Store that contains the word count for every
// 1 minute
groupedByWord.windowedBy(TimeWindows.of(60000)).count(Materialized.<String, Long, WindowStore<Bytes, byte[]>>as("windowed-word-count").withValueSerde(Serdes.Long()));
return new KafkaStreams(builder.build(), streamsConfiguration);
}
use of org.apache.kafka.streams.state.WindowStore in project kafka-streams-examples by confluentinc.
the class EventDeduplicationLambdaIntegrationTest method shouldRemoveDuplicatesFromTheInput.
@Test
public void shouldRemoveDuplicatesFromTheInput() throws Exception {
// e.g. "4ff3cb44-abcb-46e3-8f9a-afb7cc74fbb8"
String firstId = UUID.randomUUID().toString();
String secondId = UUID.randomUUID().toString();
String thirdId = UUID.randomUUID().toString();
List<String> inputValues = Arrays.asList(firstId, secondId, firstId, firstId, secondId, thirdId, thirdId, firstId, secondId);
List<String> expectedValues = Arrays.asList(firstId, secondId, thirdId);
//
// Step 1: Configure and start the processor topology.
//
StreamsBuilder builder = new StreamsBuilder();
Properties streamsConfiguration = new Properties();
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "deduplication-lambda-integration-test");
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.ByteArray().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
// The commit interval for flushing records to state stores and downstream must be lower than
// this integration test's timeout (30 secs) to ensure we observe the expected processing results.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, TimeUnit.SECONDS.toMillis(10));
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// Use a temporary directory for storing state, which will be automatically removed after the test.
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
// How long we "remember" an event. During this time, any incoming duplicates of the event
// will be, well, dropped, thereby de-duplicating the input data.
//
// The actual value depends on your use case. To reduce memory and disk usage, you could
// decrease the size to purge old windows more frequently at the cost of potentially missing out
// on de-duplicating late-arriving records.
long maintainDurationPerEventInMs = TimeUnit.MINUTES.toMillis(10);
// The number of segments has no impact on "correctness".
// Using more segments implies larger overhead but allows for more fined grained record expiration
// Note: the specified retention time is a _minimum_ time span and no strict upper time bound
int numberOfSegments = 3;
// retention period must be at least window size -- for this use case, we don't need a longer retention period
// and thus just use the window size as retention time
long retentionPeriod = maintainDurationPerEventInMs;
StoreBuilder<WindowStore<String, Long>> dedupStoreBuilder = Stores.windowStoreBuilder(Stores.persistentWindowStore(storeName, retentionPeriod, numberOfSegments, maintainDurationPerEventInMs, false), Serdes.String(), Serdes.Long());
builder.addStateStore(dedupStoreBuilder);
KStream<byte[], String> input = builder.stream(inputTopic);
KStream<byte[], String> deduplicated = input.transform(// function as needed.
() -> new DeduplicationTransformer<>(maintainDurationPerEventInMs, (key, value) -> value), storeName);
deduplicated.to(outputTopic);
KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
streams.start();
//
// Step 2: Produce some input data to the input topic.
//
Properties producerConfig = new Properties();
producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class);
producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig);
//
// Step 3: Verify the application's output data.
//
Properties consumerConfig = new Properties();
consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "deduplication-integration-test-standard-consumer");
consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class);
consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
List<String> actualValues = IntegrationTestUtils.waitUntilMinValuesRecordsReceived(consumerConfig, outputTopic, expectedValues.size());
streams.close();
assertThat(actualValues).containsExactlyElementsOf(expectedValues);
}
Aggregations