use of org.apache.kafka.streams.StreamsBuilder in project apache-kafka-on-k8s by banzaicloud.
the class SimpleBenchmark method createKafkaStreamsWithStateStore.
private KafkaStreams createKafkaStreamsWithStateStore(String topic, final CountDownLatch latch, boolean enableCaching) {
setStreamProperties("simple-benchmark-streams-with-store" + enableCaching);
StreamsBuilder builder = new StreamsBuilder();
final StoreBuilder<KeyValueStore<Integer, byte[]>> storeBuilder = Stores.keyValueStoreBuilder(Stores.persistentKeyValueStore("store"), Serdes.Integer(), Serdes.ByteArray());
if (enableCaching) {
builder.addStateStore(storeBuilder.withCachingEnabled());
} else {
builder.addStateStore(storeBuilder);
}
KStream<Integer, byte[]> source = builder.stream(topic, Consumed.with(INTEGER_SERDE, BYTE_SERDE));
source.process(new ProcessorSupplier<Integer, byte[]>() {
@Override
public Processor<Integer, byte[]> get() {
return new AbstractProcessor<Integer, byte[]>() {
KeyValueStore<Integer, byte[]> store;
@SuppressWarnings("unchecked")
@Override
public void init(ProcessorContext context) {
store = (KeyValueStore<Integer, byte[]>) context.getStateStore("store");
}
@Override
public void process(Integer key, byte[] value) {
store.put(key, value);
processedRecords.getAndIncrement();
processedBytes += value.length + Integer.SIZE;
if (processedRecords.get() == numRecords) {
latch.countDown();
}
}
@Override
public void punctuate(long timestamp) {
}
@Override
public void close() {
}
};
}
}, "store");
return createKafkaStreamsWithExceptionHandler(builder, props);
}
use of org.apache.kafka.streams.StreamsBuilder in project apache-kafka-on-k8s by banzaicloud.
the class SimpleBenchmark method createCountStreams.
private KafkaStreams createCountStreams(Properties streamConfig, String topic, final CountDownLatch latch) {
final StreamsBuilder builder = new StreamsBuilder();
final KStream<Integer, byte[]> input = builder.stream(topic);
input.groupByKey().count("tmpStoreName").foreach(new CountDownAction(latch));
return new KafkaStreams(builder.build(), streamConfig);
}
use of org.apache.kafka.streams.StreamsBuilder in project apache-kafka-on-k8s by banzaicloud.
the class YahooBenchmark method createYahooBenchmarkStreams.
private KafkaStreams createYahooBenchmarkStreams(final Properties streamConfig, final String campaignsTopic, final String eventsTopic, final CountDownLatch latch, final int numRecords) {
Map<String, Object> serdeProps = new HashMap<>();
final Serializer<ProjectedEvent> projectedEventSerializer = new JsonPOJOSerializer<>();
serdeProps.put("JsonPOJOClass", ProjectedEvent.class);
projectedEventSerializer.configure(serdeProps, false);
final Deserializer<ProjectedEvent> projectedEventDeserializer = new JsonPOJODeserializer<>();
serdeProps.put("JsonPOJOClass", ProjectedEvent.class);
projectedEventDeserializer.configure(serdeProps, false);
final StreamsBuilder builder = new StreamsBuilder();
final KStream<String, ProjectedEvent> kEvents = builder.stream(eventsTopic, Consumed.with(Serdes.String(), Serdes.serdeFrom(projectedEventSerializer, projectedEventDeserializer)));
final KTable<String, String> kCampaigns = builder.table(campaignsTopic, Consumed.with(Serdes.String(), Serdes.String()));
KStream<String, ProjectedEvent> filteredEvents = kEvents.peek(new ForeachAction<String, ProjectedEvent>() {
@Override
public void apply(String key, ProjectedEvent value) {
parent.processedRecords.getAndIncrement();
if (parent.processedRecords.get() % 1000000 == 0) {
System.out.println("Processed " + parent.processedRecords.get());
}
if (parent.processedRecords.get() >= numRecords) {
latch.countDown();
}
}
}).filter(new Predicate<String, ProjectedEvent>() {
@Override
public boolean test(final String key, final ProjectedEvent value) {
return value.eventType.equals("view");
}
}).mapValues(new ValueMapper<ProjectedEvent, ProjectedEvent>() {
@Override
public ProjectedEvent apply(ProjectedEvent value) {
ProjectedEvent event = new ProjectedEvent();
event.adID = value.adID;
event.eventTime = value.eventTime;
event.eventType = value.eventType;
return event;
}
});
// deserialize the add ID and campaign ID from the stored value in Kafka
KTable<String, CampaignAd> deserCampaigns = kCampaigns.mapValues(new ValueMapper<String, CampaignAd>() {
@Override
public CampaignAd apply(String value) {
String[] parts = value.split(":");
CampaignAd cAdd = new CampaignAd();
cAdd.adID = parts[0];
cAdd.campaignID = parts[1];
return cAdd;
}
});
// join the events with the campaigns
KStream<String, String> joined = filteredEvents.join(deserCampaigns, new ValueJoiner<ProjectedEvent, CampaignAd, String>() {
@Override
public String apply(ProjectedEvent value1, CampaignAd value2) {
return value2.campaignID;
}
}, Serdes.String(), Serdes.serdeFrom(projectedEventSerializer, projectedEventDeserializer));
// key by campaign rather than by ad as original
KStream<String, String> keyedByCampaign = joined.selectKey(new KeyValueMapper<String, String, String>() {
@Override
public String apply(String key, String value) {
return value;
}
});
// calculate windowed counts
keyedByCampaign.groupByKey(Serialized.with(Serdes.String(), Serdes.String())).count(TimeWindows.of(10 * 1000), "time-windows");
return new KafkaStreams(builder.build(), streamConfig);
}
use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.
the class PageViewRegionExample method main.
public static void main(final String[] args) throws Exception {
final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092";
final String schemaRegistryUrl = args.length > 1 ? args[1] : "http://localhost:8081";
final Properties streamsConfiguration = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "pageview-region-example");
streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "pageview-region-example-client");
// Where to find Kafka broker(s).
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
// Where to find the Confluent schema registry instance(s)
streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
// Specify default (de)serializers for record keys and for record values.
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, GenericAvroSerde.class);
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// Records should be flushed every 10 seconds. This is less than the default
// in order to keep this example interactive.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
final Serde<String> stringSerde = Serdes.String();
final Serde<Long> longSerde = Serdes.Long();
final StreamsBuilder builder = new StreamsBuilder();
// Create a stream of page view events from the PageViews topic, where the key of
// a record is assumed to be null and the value an Avro GenericRecord
// that represents the full details of the page view event. See `pageview.avsc` under
// `src/main/avro/` for the corresponding Avro schema.
final KStream<String, GenericRecord> views = builder.stream("PageViews");
// Create a keyed stream of page view events from the PageViews stream,
// by extracting the user id (String) from the Avro value
final KStream<String, GenericRecord> viewsByUser = views.map(new KeyValueMapper<String, GenericRecord, KeyValue<String, GenericRecord>>() {
@Override
public KeyValue<String, GenericRecord> apply(final String dummy, final GenericRecord record) {
return new KeyValue<>(record.get("user").toString(), record);
}
});
// Create a changelog stream for user profiles from the UserProfiles topic,
// where the key of a record is assumed to be the user id (String) and its value
// an Avro GenericRecord. See `userprofile.avsc` under `src/main/avro/` for the
// corresponding Avro schema.
final KTable<String, GenericRecord> userProfiles = builder.table("UserProfiles");
// Create a changelog stream as a projection of the value to the region attribute only
final KTable<String, String> userRegions = userProfiles.mapValues(new ValueMapper<GenericRecord, String>() {
@Override
public String apply(final GenericRecord record) {
return record.get("region").toString();
}
});
// We must specify the Avro schemas for all intermediate (Avro) classes, if any.
// In this example, we want to create an intermediate GenericRecord to hold the view region
// (see below).
final InputStream pageViewRegionSchema = PageViewRegionLambdaExample.class.getClassLoader().getResourceAsStream("avro/io/confluent/examples/streams/pageviewregion.avsc");
final Schema schema = new Schema.Parser().parse(pageViewRegionSchema);
final KTable<Windowed<String>, Long> viewsByRegion = viewsByUser.leftJoin(userRegions, new ValueJoiner<GenericRecord, String, GenericRecord>() {
@Override
public GenericRecord apply(final GenericRecord view, final String region) {
final GenericRecord viewRegion = new GenericData.Record(schema);
viewRegion.put("user", view.get("user"));
viewRegion.put("page", view.get("page"));
viewRegion.put("region", region);
return viewRegion;
}
}).map(new KeyValueMapper<String, GenericRecord, KeyValue<String, GenericRecord>>() {
@Override
public KeyValue<String, GenericRecord> apply(final String user, final GenericRecord viewRegion) {
return new KeyValue<>(viewRegion.get("region").toString(), viewRegion);
}
}).groupByKey().windowedBy(TimeWindows.of(TimeUnit.MINUTES.toMillis(5)).advanceBy(TimeUnit.MINUTES.toMillis(1))).count();
// Note: The following operations would NOT be needed for the actual pageview-by-region
// computation, which would normally stop at `count` above. We use the operations
// below only to "massage" the output data so it is easier to inspect on the console via
// kafka-console-consumer.
final KStream<String, Long> viewsByRegionForConsole = viewsByRegion.toStream(new KeyValueMapper<Windowed<String>, Long, String>() {
@Override
public String apply(final Windowed<String> windowedRegion, final Long count) {
return windowedRegion.toString();
}
});
// write to the result topic
viewsByRegionForConsole.to("PageViewsByRegion", Produced.with(stringSerde, longSerde));
final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
// Always (and unconditionally) clean local state prior to starting the processing topology.
// We opt for this unconditional call here because this will make it easier for you to play around with the example
// when resetting the application for doing a re-run (via the Application Reset Tool,
// http://docs.confluent.io/current/streams/developer-guide.html#application-reset-tool).
//
// The drawback of cleaning up local state prior is that your app must rebuilt its local state from scratch, which
// will take time and will require reading all the state-relevant data from the Kafka cluster over the network.
// Thus in a production scenario you typically do not want to clean up always as we do here but rather only when it
// is truly needed, i.e., only under certain conditions (e.g., the presence of a command line flag for your app).
// See `ApplicationResetExample.java` for a production-like example.
streams.cleanUp();
streams.start();
// Add shutdown hook to respond to SIGTERM and gracefully close Kafka Streams
Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
@Override
public void run() {
streams.close();
}
}));
}
use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.
the class PageViewRegionLambdaExample method main.
public static void main(final String[] args) throws Exception {
final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092";
final String schemaRegistryUrl = args.length > 1 ? args[1] : "http://localhost:8081";
final Properties streamsConfiguration = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "pageview-region-lambda-example");
streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "pageview-region-lambda-example-client");
// Where to find Kafka broker(s).
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
// Where to find the Confluent schema registry instance(s)
streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
// Specify default (de)serializers for record keys and for record values.
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, GenericAvroSerde.class);
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// Records should be flushed every 10 seconds. This is less than the default
// in order to keep this example interactive.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
final Serde<String> stringSerde = Serdes.String();
final Serde<Long> longSerde = Serdes.Long();
final StreamsBuilder builder = new StreamsBuilder();
// Create a stream of page view events from the PageViews topic, where the key of
// a record is assumed to be null and the value an Avro GenericRecord
// that represents the full details of the page view event. See `pageview.avsc` under
// `src/main/avro/` for the corresponding Avro schema.
final KStream<String, GenericRecord> views = builder.stream("PageViews");
// Create a keyed stream of page view events from the PageViews stream,
// by extracting the user id (String) from the Avro value
final KStream<String, GenericRecord> viewsByUser = views.map((dummy, record) -> new KeyValue<>(record.get("user").toString(), record));
// Create a changelog stream for user profiles from the UserProfiles topic,
// where the key of a record is assumed to be the user id (String) and its value
// an Avro GenericRecord. See `userprofile.avsc` under `src/main/avro/` for the
// corresponding Avro schema.
final KTable<String, GenericRecord> userProfiles = builder.table("UserProfiles");
// Create a changelog stream as a projection of the value to the region attribute only
final KTable<String, String> userRegions = userProfiles.mapValues(record -> record.get("region").toString());
// We must specify the Avro schemas for all intermediate (Avro) classes, if any.
// In this example, we want to create an intermediate GenericRecord to hold the view region.
// See `pageviewregion.avsc` under `src/main/avro/`.
final InputStream pageViewRegionSchema = PageViewRegionLambdaExample.class.getClassLoader().getResourceAsStream("avro/io/confluent/examples/streams/pageviewregion.avsc");
final Schema schema = new Schema.Parser().parse(pageViewRegionSchema);
final KTable<Windowed<String>, Long> viewsByRegion = viewsByUser.leftJoin(userRegions, (view, region) -> {
GenericRecord viewRegion = new GenericData.Record(schema);
viewRegion.put("user", view.get("user"));
viewRegion.put("page", view.get("page"));
viewRegion.put("region", region);
return viewRegion;
}).map((user, viewRegion) -> new KeyValue<>(viewRegion.get("region").toString(), viewRegion)).groupByKey().windowedBy(TimeWindows.of(TimeUnit.MINUTES.toMillis(5)).advanceBy(TimeUnit.MINUTES.toMillis(1))).count();
// Note: The following operations would NOT be needed for the actual pageview-by-region
// computation, which would normally stop at `count` above. We use the operations
// below only to "massage" the output data so it is easier to inspect on the console via
// kafka-console-consumer.
final KStream<String, Long> viewsByRegionForConsole = viewsByRegion.toStream((windowedRegion, count) -> windowedRegion.toString());
viewsByRegionForConsole.to("PageViewsByRegion", Produced.with(stringSerde, longSerde));
final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
// Always (and unconditionally) clean local state prior to starting the processing topology.
// We opt for this unconditional call here because this will make it easier for you to play around with the example
// when resetting the application for doing a re-run (via the Application Reset Tool,
// http://docs.confluent.io/current/streams/developer-guide.html#application-reset-tool).
//
// The drawback of cleaning up local state prior is that your app must rebuilt its local state from scratch, which
// will take time and will require reading all the state-relevant data from the Kafka cluster over the network.
// Thus in a production scenario you typically do not want to clean up always as we do here but rather only when it
// is truly needed, i.e., only under certain conditions (e.g., the presence of a command line flag for your app).
// See `ApplicationResetExample.java` for a production-like example.
streams.cleanUp();
streams.start();
// Add shutdown hook to respond to SIGTERM and gracefully close Kafka Streams
Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
}
Aggregations