Search in sources :

Example 71 with StreamsBuilder

use of org.apache.kafka.streams.StreamsBuilder in project apache-kafka-on-k8s by banzaicloud.

the class SimpleBenchmark method createKafkaStreamsWithStateStore.

private KafkaStreams createKafkaStreamsWithStateStore(String topic, final CountDownLatch latch, boolean enableCaching) {
    setStreamProperties("simple-benchmark-streams-with-store" + enableCaching);
    StreamsBuilder builder = new StreamsBuilder();
    final StoreBuilder<KeyValueStore<Integer, byte[]>> storeBuilder = Stores.keyValueStoreBuilder(Stores.persistentKeyValueStore("store"), Serdes.Integer(), Serdes.ByteArray());
    if (enableCaching) {
        builder.addStateStore(storeBuilder.withCachingEnabled());
    } else {
        builder.addStateStore(storeBuilder);
    }
    KStream<Integer, byte[]> source = builder.stream(topic, Consumed.with(INTEGER_SERDE, BYTE_SERDE));
    source.process(new ProcessorSupplier<Integer, byte[]>() {

        @Override
        public Processor<Integer, byte[]> get() {
            return new AbstractProcessor<Integer, byte[]>() {

                KeyValueStore<Integer, byte[]> store;

                @SuppressWarnings("unchecked")
                @Override
                public void init(ProcessorContext context) {
                    store = (KeyValueStore<Integer, byte[]>) context.getStateStore("store");
                }

                @Override
                public void process(Integer key, byte[] value) {
                    store.put(key, value);
                    processedRecords.getAndIncrement();
                    processedBytes += value.length + Integer.SIZE;
                    if (processedRecords.get() == numRecords) {
                        latch.countDown();
                    }
                }

                @Override
                public void punctuate(long timestamp) {
                }

                @Override
                public void close() {
                }
            };
        }
    }, "store");
    return createKafkaStreamsWithExceptionHandler(builder, props);
}
Also used : Processor(org.apache.kafka.streams.processor.Processor) AbstractProcessor(org.apache.kafka.streams.processor.AbstractProcessor) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore) ProcessorContext(org.apache.kafka.streams.processor.ProcessorContext) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) AtomicInteger(java.util.concurrent.atomic.AtomicInteger)

Example 72 with StreamsBuilder

use of org.apache.kafka.streams.StreamsBuilder in project apache-kafka-on-k8s by banzaicloud.

the class SimpleBenchmark method createCountStreams.

private KafkaStreams createCountStreams(Properties streamConfig, String topic, final CountDownLatch latch) {
    final StreamsBuilder builder = new StreamsBuilder();
    final KStream<Integer, byte[]> input = builder.stream(topic);
    input.groupByKey().count("tmpStoreName").foreach(new CountDownAction(latch));
    return new KafkaStreams(builder.build(), streamConfig);
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) KafkaStreams(org.apache.kafka.streams.KafkaStreams)

Example 73 with StreamsBuilder

use of org.apache.kafka.streams.StreamsBuilder in project apache-kafka-on-k8s by banzaicloud.

the class YahooBenchmark method createYahooBenchmarkStreams.

private KafkaStreams createYahooBenchmarkStreams(final Properties streamConfig, final String campaignsTopic, final String eventsTopic, final CountDownLatch latch, final int numRecords) {
    Map<String, Object> serdeProps = new HashMap<>();
    final Serializer<ProjectedEvent> projectedEventSerializer = new JsonPOJOSerializer<>();
    serdeProps.put("JsonPOJOClass", ProjectedEvent.class);
    projectedEventSerializer.configure(serdeProps, false);
    final Deserializer<ProjectedEvent> projectedEventDeserializer = new JsonPOJODeserializer<>();
    serdeProps.put("JsonPOJOClass", ProjectedEvent.class);
    projectedEventDeserializer.configure(serdeProps, false);
    final StreamsBuilder builder = new StreamsBuilder();
    final KStream<String, ProjectedEvent> kEvents = builder.stream(eventsTopic, Consumed.with(Serdes.String(), Serdes.serdeFrom(projectedEventSerializer, projectedEventDeserializer)));
    final KTable<String, String> kCampaigns = builder.table(campaignsTopic, Consumed.with(Serdes.String(), Serdes.String()));
    KStream<String, ProjectedEvent> filteredEvents = kEvents.peek(new ForeachAction<String, ProjectedEvent>() {

        @Override
        public void apply(String key, ProjectedEvent value) {
            parent.processedRecords.getAndIncrement();
            if (parent.processedRecords.get() % 1000000 == 0) {
                System.out.println("Processed " + parent.processedRecords.get());
            }
            if (parent.processedRecords.get() >= numRecords) {
                latch.countDown();
            }
        }
    }).filter(new Predicate<String, ProjectedEvent>() {

        @Override
        public boolean test(final String key, final ProjectedEvent value) {
            return value.eventType.equals("view");
        }
    }).mapValues(new ValueMapper<ProjectedEvent, ProjectedEvent>() {

        @Override
        public ProjectedEvent apply(ProjectedEvent value) {
            ProjectedEvent event = new ProjectedEvent();
            event.adID = value.adID;
            event.eventTime = value.eventTime;
            event.eventType = value.eventType;
            return event;
        }
    });
    // deserialize the add ID and campaign ID from the stored value in Kafka
    KTable<String, CampaignAd> deserCampaigns = kCampaigns.mapValues(new ValueMapper<String, CampaignAd>() {

        @Override
        public CampaignAd apply(String value) {
            String[] parts = value.split(":");
            CampaignAd cAdd = new CampaignAd();
            cAdd.adID = parts[0];
            cAdd.campaignID = parts[1];
            return cAdd;
        }
    });
    // join the events with the campaigns
    KStream<String, String> joined = filteredEvents.join(deserCampaigns, new ValueJoiner<ProjectedEvent, CampaignAd, String>() {

        @Override
        public String apply(ProjectedEvent value1, CampaignAd value2) {
            return value2.campaignID;
        }
    }, Serdes.String(), Serdes.serdeFrom(projectedEventSerializer, projectedEventDeserializer));
    // key by campaign rather than by ad as original
    KStream<String, String> keyedByCampaign = joined.selectKey(new KeyValueMapper<String, String, String>() {

        @Override
        public String apply(String key, String value) {
            return value;
        }
    });
    // calculate windowed counts
    keyedByCampaign.groupByKey(Serialized.with(Serdes.String(), Serdes.String())).count(TimeWindows.of(10 * 1000), "time-windows");
    return new KafkaStreams(builder.build(), streamConfig);
}
Also used : KafkaStreams(org.apache.kafka.streams.KafkaStreams) HashMap(java.util.HashMap) Predicate(org.apache.kafka.streams.kstream.Predicate) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder)

Example 74 with StreamsBuilder

use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.

the class PageViewRegionExample method main.

public static void main(final String[] args) throws Exception {
    final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092";
    final String schemaRegistryUrl = args.length > 1 ? args[1] : "http://localhost:8081";
    final Properties streamsConfiguration = new Properties();
    // Give the Streams application a unique name.  The name must be unique in the Kafka cluster
    // against which the application is run.
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "pageview-region-example");
    streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "pageview-region-example-client");
    // Where to find Kafka broker(s).
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    // Where to find the Confluent schema registry instance(s)
    streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
    // Specify default (de)serializers for record keys and for record values.
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, GenericAvroSerde.class);
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    // Records should be flushed every 10 seconds. This is less than the default
    // in order to keep this example interactive.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
    final Serde<String> stringSerde = Serdes.String();
    final Serde<Long> longSerde = Serdes.Long();
    final StreamsBuilder builder = new StreamsBuilder();
    // Create a stream of page view events from the PageViews topic, where the key of
    // a record is assumed to be null and the value an Avro GenericRecord
    // that represents the full details of the page view event. See `pageview.avsc` under
    // `src/main/avro/` for the corresponding Avro schema.
    final KStream<String, GenericRecord> views = builder.stream("PageViews");
    // Create a keyed stream of page view events from the PageViews stream,
    // by extracting the user id (String) from the Avro value
    final KStream<String, GenericRecord> viewsByUser = views.map(new KeyValueMapper<String, GenericRecord, KeyValue<String, GenericRecord>>() {

        @Override
        public KeyValue<String, GenericRecord> apply(final String dummy, final GenericRecord record) {
            return new KeyValue<>(record.get("user").toString(), record);
        }
    });
    // Create a changelog stream for user profiles from the UserProfiles topic,
    // where the key of a record is assumed to be the user id (String) and its value
    // an Avro GenericRecord.  See `userprofile.avsc` under `src/main/avro/` for the
    // corresponding Avro schema.
    final KTable<String, GenericRecord> userProfiles = builder.table("UserProfiles");
    // Create a changelog stream as a projection of the value to the region attribute only
    final KTable<String, String> userRegions = userProfiles.mapValues(new ValueMapper<GenericRecord, String>() {

        @Override
        public String apply(final GenericRecord record) {
            return record.get("region").toString();
        }
    });
    // We must specify the Avro schemas for all intermediate (Avro) classes, if any.
    // In this example, we want to create an intermediate GenericRecord to hold the view region
    // (see below).
    final InputStream pageViewRegionSchema = PageViewRegionLambdaExample.class.getClassLoader().getResourceAsStream("avro/io/confluent/examples/streams/pageviewregion.avsc");
    final Schema schema = new Schema.Parser().parse(pageViewRegionSchema);
    final KTable<Windowed<String>, Long> viewsByRegion = viewsByUser.leftJoin(userRegions, new ValueJoiner<GenericRecord, String, GenericRecord>() {

        @Override
        public GenericRecord apply(final GenericRecord view, final String region) {
            final GenericRecord viewRegion = new GenericData.Record(schema);
            viewRegion.put("user", view.get("user"));
            viewRegion.put("page", view.get("page"));
            viewRegion.put("region", region);
            return viewRegion;
        }
    }).map(new KeyValueMapper<String, GenericRecord, KeyValue<String, GenericRecord>>() {

        @Override
        public KeyValue<String, GenericRecord> apply(final String user, final GenericRecord viewRegion) {
            return new KeyValue<>(viewRegion.get("region").toString(), viewRegion);
        }
    }).groupByKey().windowedBy(TimeWindows.of(TimeUnit.MINUTES.toMillis(5)).advanceBy(TimeUnit.MINUTES.toMillis(1))).count();
    // Note: The following operations would NOT be needed for the actual pageview-by-region
    // computation, which would normally stop at `count` above.  We use the operations
    // below only to "massage" the output data so it is easier to inspect on the console via
    // kafka-console-consumer.
    final KStream<String, Long> viewsByRegionForConsole = viewsByRegion.toStream(new KeyValueMapper<Windowed<String>, Long, String>() {

        @Override
        public String apply(final Windowed<String> windowedRegion, final Long count) {
            return windowedRegion.toString();
        }
    });
    // write to the result topic
    viewsByRegionForConsole.to("PageViewsByRegion", Produced.with(stringSerde, longSerde));
    final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
    // Always (and unconditionally) clean local state prior to starting the processing topology.
    // We opt for this unconditional call here because this will make it easier for you to play around with the example
    // when resetting the application for doing a re-run (via the Application Reset Tool,
    // http://docs.confluent.io/current/streams/developer-guide.html#application-reset-tool).
    // 
    // The drawback of cleaning up local state prior is that your app must rebuilt its local state from scratch, which
    // will take time and will require reading all the state-relevant data from the Kafka cluster over the network.
    // Thus in a production scenario you typically do not want to clean up always as we do here but rather only when it
    // is truly needed, i.e., only under certain conditions (e.g., the presence of a command line flag for your app).
    // See `ApplicationResetExample.java` for a production-like example.
    streams.cleanUp();
    streams.start();
    // Add shutdown hook to respond to SIGTERM and gracefully close Kafka Streams
    Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {

        @Override
        public void run() {
            streams.close();
        }
    }));
}
Also used : KeyValue(org.apache.kafka.streams.KeyValue) Schema(org.apache.avro.Schema) KeyValueMapper(org.apache.kafka.streams.kstream.KeyValueMapper) Properties(java.util.Properties) GenericRecord(org.apache.avro.generic.GenericRecord) KafkaStreams(org.apache.kafka.streams.KafkaStreams) InputStream(java.io.InputStream) GenericData(org.apache.avro.generic.GenericData) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Windowed(org.apache.kafka.streams.kstream.Windowed)

Example 75 with StreamsBuilder

use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.

the class PageViewRegionLambdaExample method main.

public static void main(final String[] args) throws Exception {
    final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092";
    final String schemaRegistryUrl = args.length > 1 ? args[1] : "http://localhost:8081";
    final Properties streamsConfiguration = new Properties();
    // Give the Streams application a unique name.  The name must be unique in the Kafka cluster
    // against which the application is run.
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "pageview-region-lambda-example");
    streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "pageview-region-lambda-example-client");
    // Where to find Kafka broker(s).
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    // Where to find the Confluent schema registry instance(s)
    streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
    // Specify default (de)serializers for record keys and for record values.
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, GenericAvroSerde.class);
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    // Records should be flushed every 10 seconds. This is less than the default
    // in order to keep this example interactive.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
    final Serde<String> stringSerde = Serdes.String();
    final Serde<Long> longSerde = Serdes.Long();
    final StreamsBuilder builder = new StreamsBuilder();
    // Create a stream of page view events from the PageViews topic, where the key of
    // a record is assumed to be null and the value an Avro GenericRecord
    // that represents the full details of the page view event. See `pageview.avsc` under
    // `src/main/avro/` for the corresponding Avro schema.
    final KStream<String, GenericRecord> views = builder.stream("PageViews");
    // Create a keyed stream of page view events from the PageViews stream,
    // by extracting the user id (String) from the Avro value
    final KStream<String, GenericRecord> viewsByUser = views.map((dummy, record) -> new KeyValue<>(record.get("user").toString(), record));
    // Create a changelog stream for user profiles from the UserProfiles topic,
    // where the key of a record is assumed to be the user id (String) and its value
    // an Avro GenericRecord.  See `userprofile.avsc` under `src/main/avro/` for the
    // corresponding Avro schema.
    final KTable<String, GenericRecord> userProfiles = builder.table("UserProfiles");
    // Create a changelog stream as a projection of the value to the region attribute only
    final KTable<String, String> userRegions = userProfiles.mapValues(record -> record.get("region").toString());
    // We must specify the Avro schemas for all intermediate (Avro) classes, if any.
    // In this example, we want to create an intermediate GenericRecord to hold the view region.
    // See `pageviewregion.avsc` under `src/main/avro/`.
    final InputStream pageViewRegionSchema = PageViewRegionLambdaExample.class.getClassLoader().getResourceAsStream("avro/io/confluent/examples/streams/pageviewregion.avsc");
    final Schema schema = new Schema.Parser().parse(pageViewRegionSchema);
    final KTable<Windowed<String>, Long> viewsByRegion = viewsByUser.leftJoin(userRegions, (view, region) -> {
        GenericRecord viewRegion = new GenericData.Record(schema);
        viewRegion.put("user", view.get("user"));
        viewRegion.put("page", view.get("page"));
        viewRegion.put("region", region);
        return viewRegion;
    }).map((user, viewRegion) -> new KeyValue<>(viewRegion.get("region").toString(), viewRegion)).groupByKey().windowedBy(TimeWindows.of(TimeUnit.MINUTES.toMillis(5)).advanceBy(TimeUnit.MINUTES.toMillis(1))).count();
    // Note: The following operations would NOT be needed for the actual pageview-by-region
    // computation, which would normally stop at `count` above.  We use the operations
    // below only to "massage" the output data so it is easier to inspect on the console via
    // kafka-console-consumer.
    final KStream<String, Long> viewsByRegionForConsole = viewsByRegion.toStream((windowedRegion, count) -> windowedRegion.toString());
    viewsByRegionForConsole.to("PageViewsByRegion", Produced.with(stringSerde, longSerde));
    final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
    // Always (and unconditionally) clean local state prior to starting the processing topology.
    // We opt for this unconditional call here because this will make it easier for you to play around with the example
    // when resetting the application for doing a re-run (via the Application Reset Tool,
    // http://docs.confluent.io/current/streams/developer-guide.html#application-reset-tool).
    // 
    // The drawback of cleaning up local state prior is that your app must rebuilt its local state from scratch, which
    // will take time and will require reading all the state-relevant data from the Kafka cluster over the network.
    // Thus in a production scenario you typically do not want to clean up always as we do here but rather only when it
    // is truly needed, i.e., only under certain conditions (e.g., the presence of a command line flag for your app).
    // See `ApplicationResetExample.java` for a production-like example.
    streams.cleanUp();
    streams.start();
    // Add shutdown hook to respond to SIGTERM and gracefully close Kafka Streams
    Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) GenericRecord(org.apache.avro.generic.GenericRecord) KTable(org.apache.kafka.streams.kstream.KTable) Schema(org.apache.avro.Schema) Properties(java.util.Properties) Produced(org.apache.kafka.streams.kstream.Produced) KeyValue(org.apache.kafka.streams.KeyValue) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) KStream(org.apache.kafka.streams.kstream.KStream) AbstractKafkaAvroSerDeConfig(io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig) GenericAvroSerde(io.confluent.kafka.streams.serdes.avro.GenericAvroSerde) GenericData(org.apache.avro.generic.GenericData) TimeUnit(java.util.concurrent.TimeUnit) Windowed(org.apache.kafka.streams.kstream.Windowed) Serde(org.apache.kafka.common.serialization.Serde) TimeWindows(org.apache.kafka.streams.kstream.TimeWindows) Serdes(org.apache.kafka.common.serialization.Serdes) KafkaStreams(org.apache.kafka.streams.KafkaStreams) InputStream(java.io.InputStream) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KeyValue(org.apache.kafka.streams.KeyValue) InputStream(java.io.InputStream) Schema(org.apache.avro.Schema) Properties(java.util.Properties) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Windowed(org.apache.kafka.streams.kstream.Windowed) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord)

Aggregations

StreamsBuilder (org.apache.kafka.streams.StreamsBuilder)189 Test (org.junit.Test)121 KafkaStreams (org.apache.kafka.streams.KafkaStreams)72 Properties (java.util.Properties)61 KeyValue (org.apache.kafka.streams.KeyValue)42 MockProcessorSupplier (org.apache.kafka.test.MockProcessorSupplier)30 StreamsBuilderTest (org.apache.kafka.streams.StreamsBuilderTest)27 Serdes (org.apache.kafka.common.serialization.Serdes)21 KeyValueMapper (org.apache.kafka.streams.kstream.KeyValueMapper)21 Before (org.junit.Before)19 StreamsConfig (org.apache.kafka.streams.StreamsConfig)18 KStream (org.apache.kafka.streams.kstream.KStream)18 Predicate (org.apache.kafka.streams.kstream.Predicate)18 IntegrationTest (org.apache.kafka.test.IntegrationTest)18 Bytes (org.apache.kafka.common.utils.Bytes)16 HashSet (java.util.HashSet)15 ValueMapper (org.apache.kafka.streams.kstream.ValueMapper)14 HashMap (java.util.HashMap)13 KTable (org.apache.kafka.streams.kstream.KTable)13 Produced (org.apache.kafka.streams.kstream.Produced)13