use of org.apache.kafka.streams.kstream.KStreamBuilder in project kafka by apache.
the class SimpleBenchmark method createKafkaStreams.
private KafkaStreams createKafkaStreams(String topic, final CountDownLatch latch) {
Properties props = setStreamProperties("simple-benchmark-streams");
KStreamBuilder builder = new KStreamBuilder();
KStream<Integer, byte[]> source = builder.stream(INTEGER_SERDE, BYTE_SERDE, topic);
source.process(new ProcessorSupplier<Integer, byte[]>() {
@Override
public Processor<Integer, byte[]> get() {
return new AbstractProcessor<Integer, byte[]>() {
@Override
public void init(ProcessorContext context) {
}
@Override
public void process(Integer key, byte[] value) {
processedRecords++;
processedBytes += value.length + Integer.SIZE;
if (processedRecords == numRecords) {
latch.countDown();
}
}
@Override
public void punctuate(long timestamp) {
}
@Override
public void close() {
}
};
}
});
return createKafkaStreamsWithExceptionHandler(builder, props);
}
use of org.apache.kafka.streams.kstream.KStreamBuilder in project kafka by apache.
the class SimpleBenchmark method createKafkaStreamsKStreamKStreamJoin.
private KafkaStreams createKafkaStreamsKStreamKStreamJoin(Properties streamConfig, String kStreamTopic1, String kStreamTopic2, final CountDownLatch latch) {
final KStreamBuilder builder = new KStreamBuilder();
final KStream<Long, byte[]> input1 = builder.stream(kStreamTopic1);
final KStream<Long, byte[]> input2 = builder.stream(kStreamTopic2);
final long timeDifferenceMs = 10000L;
input1.leftJoin(input2, VALUE_JOINER, JoinWindows.of(timeDifferenceMs)).foreach(new CountDownAction(latch));
return createKafkaStreamsWithExceptionHandler(builder, streamConfig);
}
use of org.apache.kafka.streams.kstream.KStreamBuilder in project kafka by apache.
the class SimpleBenchmark method createCountStreams.
private KafkaStreams createCountStreams(Properties streamConfig, String topic, final CountDownLatch latch) {
final KStreamBuilder builder = new KStreamBuilder();
final KStream<Integer, byte[]> input = builder.stream(topic);
input.groupByKey().count("tmpStoreName").foreach(new CountDownAction(latch));
return new KafkaStreams(builder, streamConfig);
}
use of org.apache.kafka.streams.kstream.KStreamBuilder in project kafka by apache.
the class KStreamKTableJoinIntegrationTest method shouldCountClicksPerRegion.
@Test
public void shouldCountClicksPerRegion() throws Exception {
// Input 1: Clicks per user (multiple records allowed per user).
final List<KeyValue<String, Long>> userClicks = Arrays.asList(new KeyValue<>("alice", 13L), new KeyValue<>("bob", 4L), new KeyValue<>("chao", 25L), new KeyValue<>("bob", 19L), new KeyValue<>("dave", 56L), new KeyValue<>("eve", 78L), new KeyValue<>("alice", 40L), new KeyValue<>("fang", 99L));
// Input 2: Region per user (multiple records allowed per user).
final List<KeyValue<String, String>> userRegions = Arrays.asList(new KeyValue<>("alice", "asia"), /* Alice lived in Asia originally... */
new KeyValue<>("bob", "americas"), new KeyValue<>("chao", "asia"), new KeyValue<>("dave", "europe"), new KeyValue<>("alice", "europe"), /* ...but moved to Europe some time later. */
new KeyValue<>("eve", "americas"), new KeyValue<>("fang", "asia"));
final List<KeyValue<String, Long>> expectedClicksPerRegion = (cacheSizeBytes == 0) ? Arrays.asList(new KeyValue<>("europe", 13L), new KeyValue<>("americas", 4L), new KeyValue<>("asia", 25L), new KeyValue<>("americas", 23L), new KeyValue<>("europe", 69L), new KeyValue<>("americas", 101L), new KeyValue<>("europe", 109L), new KeyValue<>("asia", 124L)) : Arrays.asList(new KeyValue<>("americas", 101L), new KeyValue<>("europe", 109L), new KeyValue<>("asia", 124L));
//
// Step 1: Configure and start the processor topology.
//
final Serde<String> stringSerde = Serdes.String();
final Serde<Long> longSerde = Serdes.Long();
final KStreamBuilder builder = new KStreamBuilder();
// This KStream contains information such as "alice" -> 13L.
//
// Because this is a KStream ("record stream"), multiple records for the same user will be
// considered as separate click-count events, each of which will be added to the total count.
final KStream<String, Long> userClicksStream = builder.stream(stringSerde, longSerde, userClicksTopic);
// This KTable contains information such as "alice" -> "europe".
//
// Because this is a KTable ("changelog stream"), only the latest value (here: region) for a
// record key will be considered at the time when a new user-click record (see above) is
// received for the `leftJoin` below. Any previous region values are being considered out of
// date. This behavior is quite different to the KStream for user clicks above.
//
// For example, the user "alice" will be considered to live in "europe" (although originally she
// lived in "asia") because, at the time her first user-click record is being received and
// subsequently processed in the `leftJoin`, the latest region update for "alice" is "europe"
// (which overrides her previous region value of "asia").
final KTable<String, String> userRegionsTable = builder.table(stringSerde, stringSerde, userRegionsTopic, userRegionsStoreName);
// Compute the number of clicks per region, e.g. "europe" -> 13L.
//
// The resulting KTable is continuously being updated as new data records are arriving in the
// input KStream `userClicksStream` and input KTable `userRegionsTable`.
final KTable<String, Long> clicksPerRegion = userClicksStream.leftJoin(userRegionsTable, new ValueJoiner<Long, String, RegionWithClicks>() {
@Override
public RegionWithClicks apply(final Long clicks, final String region) {
return new RegionWithClicks(region == null ? "UNKNOWN" : region, clicks);
}
}).map(new KeyValueMapper<String, RegionWithClicks, KeyValue<String, Long>>() {
@Override
public KeyValue<String, Long> apply(final String key, final RegionWithClicks value) {
return new KeyValue<>(value.getRegion(), value.getClicks());
}
}).groupByKey(stringSerde, longSerde).reduce(new Reducer<Long>() {
@Override
public Long apply(final Long value1, final Long value2) {
return value1 + value2;
}
}, "ClicksPerRegionUnwindowed");
// Write the (continuously updating) results to the output topic.
clicksPerRegion.to(stringSerde, longSerde, outputTopic);
kafkaStreams = new KafkaStreams(builder, streamsConfiguration);
kafkaStreams.start();
//
// Step 2: Publish user-region information.
//
// To keep this code example simple and easier to understand/reason about, we publish all
// user-region records before any user-click records (cf. step 3). In practice though,
// data records would typically be arriving concurrently in both input streams/topics.
final Properties userRegionsProducerConfig = new Properties();
userRegionsProducerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
userRegionsProducerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
userRegionsProducerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
userRegionsProducerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
userRegionsProducerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
IntegrationTestUtils.produceKeyValuesSynchronously(userRegionsTopic, userRegions, userRegionsProducerConfig, mockTime);
//
// Step 3: Publish some user click events.
//
final Properties userClicksProducerConfig = new Properties();
userClicksProducerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
userClicksProducerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
userClicksProducerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
userClicksProducerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
userClicksProducerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, LongSerializer.class);
IntegrationTestUtils.produceKeyValuesSynchronously(userClicksTopic, userClicks, userClicksProducerConfig, mockTime);
//
// Step 4: Verify the application's output data.
//
final Properties consumerConfig = new Properties();
consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "join-integration-test-standard-consumer");
consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, LongDeserializer.class);
final List<KeyValue<String, Long>> actualClicksPerRegion = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, expectedClicksPerRegion.size());
assertThat(actualClicksPerRegion, equalTo(expectedClicksPerRegion));
}
use of org.apache.kafka.streams.kstream.KStreamBuilder in project kafka by apache.
the class KStreamsFineGrainedAutoResetIntegrationTest method shouldThrowExceptionOverlappingPattern.
@Test(expected = TopologyBuilderException.class)
public void shouldThrowExceptionOverlappingPattern() throws Exception {
final KStreamBuilder builder = new KStreamBuilder();
//NOTE this would realistically get caught when building topology, the test is for completeness
final KStream<String, String> pattern1Stream = builder.stream(KStreamBuilder.AutoOffsetReset.EARLIEST, Pattern.compile("topic-[A-D]"));
final KStream<String, String> pattern2Stream = builder.stream(KStreamBuilder.AutoOffsetReset.LATEST, Pattern.compile("topic-[A-D]"));
final KStream<String, String> namedTopicsStream = builder.stream(TOPIC_Y, TOPIC_Z);
builder.earliestResetTopicsPattern();
}
Aggregations