use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.
the class SecureKafkaStreamsExample method main.
public static void main(final String[] args) throws Exception {
final String secureBootstrapServers = args.length > 0 ? args[0] : "localhost:9093";
final Properties streamsConfiguration = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "secure-kafka-streams-app");
streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "secure-kafka-streams-app-client");
// Where to find secure (!) Kafka broker(s). In the VM, the broker listens on port 9093 for
// SSL connections.
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, secureBootstrapServers);
// Specify default (de)serializers for record keys and for record values.
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.ByteArray().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.ByteArray().getClass().getName());
// Security settings.
// 1. These settings must match the security settings of the secure Kafka cluster.
// 2. The SSL trust store and key store files must be locally accessible to the application.
// Typically, this means they would be installed locally in the client machine (or container)
// on which the application runs. To simplify running this example, however, these files
// were generated and stored in the VM in which the secure Kafka broker is running. This
// also explains why you must run this example application from within the VM.
streamsConfiguration.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SSL");
streamsConfiguration.put(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG, "/etc/security/tls/kafka.client.truststore.jks");
streamsConfiguration.put(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG, "test1234");
streamsConfiguration.put(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG, "/etc/security/tls/kafka.client.keystore.jks");
streamsConfiguration.put(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG, "test1234");
streamsConfiguration.put(SslConfigs.SSL_KEY_PASSWORD_CONFIG, "test1234");
final StreamsBuilder builder = new StreamsBuilder();
// Write the input data as-is to the output topic.
builder.stream("secure-input").to("secure-output");
final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
// Always (and unconditionally) clean local state prior to starting the processing topology.
// We opt for this unconditional call here because this will make it easier for you to play around with the example
// when resetting the application for doing a re-run (via the Application Reset Tool,
// http://docs.confluent.io/current/streams/developer-guide.html#application-reset-tool).
//
// The drawback of cleaning up local state prior is that your app must rebuilt its local state from scratch, which
// will take time and will require reading all the state-relevant data from the Kafka cluster over the network.
// Thus in a production scenario you typically do not want to clean up always as we do here but rather only when it
// is truly needed, i.e., only under certain conditions (e.g., the presence of a command line flag for your app).
// See `ApplicationResetExample.java` for a production-like example.
streams.cleanUp();
streams.start();
// Add shutdown hook to respond to SIGTERM and gracefully close Kafka Streams
Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
@Override
public void run() {
streams.close();
}
}));
}
use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.
the class SumLambdaExample method main.
public static void main(final String[] args) throws Exception {
final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092";
final Properties streamsConfiguration = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "sum-lambda-example");
streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "sum-lambda-example-client");
// Where to find Kafka broker(s).
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
// Specify default (de)serializers for record keys and for record values.
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.Integer().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.Integer().getClass().getName());
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, "/tmp/kafka-streams");
// Records should be flushed every 10 seconds. This is less than the default
// in order to keep this example interactive.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
final StreamsBuilder builder = new StreamsBuilder();
// We assume the input topic contains records where the values are Integers.
// We don't really care about the keys of the input records; for simplicity, we assume them
// to be Integers, too, because we will re-key the stream later on, and the new key will be
// of type Integer.
final KStream<Integer, Integer> input = builder.stream(NUMBERS_TOPIC);
final KTable<Integer, Integer> sumOfOddNumbers = input.filter((k, v) -> v % 2 != 0).selectKey((k, v) -> 1).groupByKey().reduce((v1, v2) -> v1 + v2);
sumOfOddNumbers.toStream().to(SUM_OF_ODD_NUMBERS_TOPIC);
final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
// Always (and unconditionally) clean local state prior to starting the processing topology.
// We opt for this unconditional call here because this will make it easier for you to play around with the example
// when resetting the application for doing a re-run (via the Application Reset Tool,
// http://docs.confluent.io/current/streams/developer-guide.html#application-reset-tool).
//
// The drawback of cleaning up local state prior is that your app must rebuilt its local state from scratch, which
// will take time and will require reading all the state-relevant data from the Kafka cluster over the network.
// Thus in a production scenario you typically do not want to clean up always as we do here but rather only when it
// is truly needed, i.e., only under certain conditions (e.g., the presence of a command line flag for your app).
// See `ApplicationResetExample.java` for a production-like example.
streams.cleanUp();
streams.start();
// Add shutdown hook to respond to SIGTERM and gracefully close Kafka Streams
Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
}
use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.
the class GlobalKTablesExample method createStreams.
public static KafkaStreams createStreams(final String bootstrapServers, final String schemaRegistryUrl, final String stateDir) {
final Properties streamsConfiguration = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "global-tables-example");
streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "global-tables-example-client");
// Where to find Kafka broker(s).
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
// Set to earliest so we don't miss any data that arrived in the topics before the process
// started
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// create and configure the SpecificAvroSerdes required in this example
final SpecificAvroSerde<Order> orderSerde = new SpecificAvroSerde<>();
final Map<String, String> serdeConfig = Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
orderSerde.configure(serdeConfig, false);
final SpecificAvroSerde<Customer> customerSerde = new SpecificAvroSerde<>();
customerSerde.configure(serdeConfig, false);
final SpecificAvroSerde<Product> productSerde = new SpecificAvroSerde<>();
productSerde.configure(serdeConfig, false);
final SpecificAvroSerde<EnrichedOrder> enrichedOrdersSerde = new SpecificAvroSerde<>();
enrichedOrdersSerde.configure(serdeConfig, false);
final StreamsBuilder builder = new StreamsBuilder();
// Get the stream of orders
final KStream<Long, Order> ordersStream = builder.stream(ORDER_TOPIC, Consumed.with(Serdes.Long(), orderSerde));
// Create a global table for customers. The data from this global table
// will be fully replicated on each instance of this application.
final GlobalKTable<Long, Customer> customers = builder.globalTable(CUSTOMER_TOPIC, Materialized.<Long, Customer, KeyValueStore<Bytes, byte[]>>as(CUSTOMER_STORE).withKeySerde(Serdes.Long()).withValueSerde(customerSerde));
// Create a global table for products. The data from this global table
// will be fully replicated on each instance of this application.
final GlobalKTable<Long, Product> products = builder.globalTable(PRODUCT_TOPIC, Materialized.<Long, Product, KeyValueStore<Bytes, byte[]>>as(PRODUCT_STORE).withKeySerde(Serdes.Long()).withValueSerde(productSerde));
// Join the orders stream to the customer global table. As this is global table
// we can use a non-key based join with out needing to repartition the input stream
final KStream<Long, CustomerOrder> customerOrdersStream = ordersStream.join(customers, (orderId, order) -> order.getCustomerId(), (order, customer) -> new CustomerOrder(customer, order));
// Join the enriched customer order stream with the product global table. As this is global table
// we can use a non-key based join without needing to repartition the input stream
final KStream<Long, EnrichedOrder> enrichedOrdersStream = customerOrdersStream.join(products, (orderId, customerOrder) -> customerOrder.productId(), (customerOrder, product) -> new EnrichedOrder(product, customerOrder.customer, customerOrder.order));
// write the enriched order to the enriched-order topic
enrichedOrdersStream.to(ENRICHED_ORDER_TOPIC, Produced.with(Serdes.Long(), enrichedOrdersSerde));
return new KafkaStreams(builder.build(), new StreamsConfig(streamsConfiguration));
}
use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.
the class StreamToTableJoinIntegrationTest method shouldCountClicksPerRegion.
@Test
public void shouldCountClicksPerRegion() throws Exception {
// Input 1: Clicks per user (multiple records allowed per user).
List<KeyValue<String, Long>> userClicks = Arrays.asList(new KeyValue<>("alice", 13L), new KeyValue<>("bob", 4L), new KeyValue<>("chao", 25L), new KeyValue<>("bob", 19L), new KeyValue<>("dave", 56L), new KeyValue<>("eve", 78L), new KeyValue<>("alice", 40L), new KeyValue<>("fang", 99L));
// Input 2: Region per user (multiple records allowed per user).
List<KeyValue<String, String>> userRegions = Arrays.asList(new KeyValue<>("alice", "asia"), /* Alice lived in Asia originally... */
new KeyValue<>("bob", "americas"), new KeyValue<>("chao", "asia"), new KeyValue<>("dave", "europe"), new KeyValue<>("alice", "europe"), /* ...but moved to Europe some time later. */
new KeyValue<>("eve", "americas"), new KeyValue<>("fang", "asia"));
List<KeyValue<String, Long>> expectedClicksPerRegion = Arrays.asList(new KeyValue<>("americas", 101L), new KeyValue<>("europe", 109L), new KeyValue<>("asia", 124L));
//
// Step 1: Configure and start the processor topology.
//
final Serde<String> stringSerde = Serdes.String();
final Serde<Long> longSerde = Serdes.Long();
Properties streamsConfiguration = new Properties();
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "stream-table-join-lambda-integration-test");
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
// The commit interval for flushing records to state stores and downstream must be lower than
// this integration test's timeout (30 secs) to ensure we observe the expected processing results.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// Use a temporary directory for storing state, which will be automatically removed after the test.
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
StreamsBuilder builder = new StreamsBuilder();
// This KStream contains information such as "alice" -> 13L.
//
// Because this is a KStream ("record stream"), multiple records for the same user will be
// considered as separate click-count events, each of which will be added to the total count.
KStream<String, Long> userClicksStream = builder.stream(userClicksTopic, Consumed.with(stringSerde, longSerde));
// This KTable contains information such as "alice" -> "europe".
//
// Because this is a KTable ("changelog stream"), only the latest value (here: region) for a
// record key will be considered at the time when a new user-click record (see above) is
// received for the `leftJoin` below. Any previous region values are being considered out of
// date. This behavior is quite different to the KStream for user clicks above.
//
// For example, the user "alice" will be considered to live in "europe" (although originally she
// lived in "asia") because, at the time her first user-click record is being received and
// subsequently processed in the `leftJoin`, the latest region update for "alice" is "europe"
// (which overrides her previous region value of "asia").
KTable<String, String> userRegionsTable = builder.table(userRegionsTopic);
// Compute the number of clicks per region, e.g. "europe" -> 13L.
//
// The resulting KTable is continuously being updated as new data records are arriving in the
// input KStream `userClicksStream` and input KTable `userRegionsTable`.
KTable<String, Long> clicksPerRegion = userClicksStream.leftJoin(userRegionsTable, (clicks, region) -> new RegionWithClicks(region == null ? "UNKNOWN" : region, clicks)).map((user, regionWithClicks) -> new KeyValue<>(regionWithClicks.getRegion(), regionWithClicks.getClicks())).groupByKey(Serialized.with(stringSerde, longSerde)).reduce((firstClicks, secondClicks) -> firstClicks + secondClicks);
// Write the (continuously updating) results to the output topic.
clicksPerRegion.toStream().to(outputTopic, Produced.with(stringSerde, longSerde));
KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
streams.start();
//
// Step 2: Publish user-region information.
//
// To keep this code example simple and easier to understand/reason about, we publish all
// user-region records before any user-click records (cf. step 3). In practice though,
// data records would typically be arriving concurrently in both input streams/topics.
Properties userRegionsProducerConfig = new Properties();
userRegionsProducerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
userRegionsProducerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
userRegionsProducerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
userRegionsProducerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
userRegionsProducerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
IntegrationTestUtils.produceKeyValuesSynchronously(userRegionsTopic, userRegions, userRegionsProducerConfig);
//
// Step 3: Publish some user click events.
//
Properties userClicksProducerConfig = new Properties();
userClicksProducerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
userClicksProducerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
userClicksProducerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
userClicksProducerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
userClicksProducerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, LongSerializer.class);
IntegrationTestUtils.produceKeyValuesSynchronously(userClicksTopic, userClicks, userClicksProducerConfig);
//
// Step 4: Verify the application's output data.
//
Properties consumerConfig = new Properties();
consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "join-lambda-integration-test-standard-consumer");
consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, LongDeserializer.class);
List<KeyValue<String, Long>> actualClicksPerRegion = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, expectedClicksPerRegion.size());
streams.close();
assertThat(actualClicksPerRegion).containsExactlyElementsOf(expectedClicksPerRegion);
}
use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.
the class TableToTableJoinIntegrationTest method shouldJoinTwoTables.
@Test
public void shouldJoinTwoTables() throws Exception {
// Input: Region per user (multiple records allowed per user).
List<KeyValue<String, String>> userRegionRecords = Arrays.asList(new KeyValue<>("alice", "asia"), new KeyValue<>("bob", "europe"), new KeyValue<>("alice", "europe"), new KeyValue<>("charlie", "europe"), new KeyValue<>("bob", "asia"));
// Input 2: Timestamp of last login per user (multiple records allowed per user)
List<KeyValue<String, Long>> userLastLoginRecords = Arrays.asList(new KeyValue<>("alice", 1485500000L), new KeyValue<>("bob", 1485520000L), new KeyValue<>("alice", 1485530000L), new KeyValue<>("bob", 1485560000L));
List<KeyValue<String, String>> expectedResults = Arrays.asList(new KeyValue<>("alice", "europe/1485500000"), new KeyValue<>("bob", "asia/1485520000"), new KeyValue<>("alice", "europe/1485530000"), new KeyValue<>("bob", "asia/1485560000"));
List<KeyValue<String, String>> expectedResultsForJoinStateStore = Arrays.asList(new KeyValue<>("alice", "europe/1485530000"), new KeyValue<>("bob", "asia/1485560000"));
//
// Step 1: Configure and start the processor topology.
//
final Serde<String> stringSerde = Serdes.String();
final Serde<Long> longSerde = Serdes.Long();
Properties streamsConfiguration = new Properties();
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "table-table-join-lambda-integration-test");
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
// For didactic reasons: disable record caching so we can observe every individual update record being sent downstream
streamsConfiguration.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
// The commit interval for flushing records to state stores and downstream must be lower than
// this integration test's timeout (30 secs) to ensure we observe the expected processing results.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// Use a temporary directory for storing state, which will be automatically removed after the test.
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
StreamsBuilder builder = new StreamsBuilder();
KTable<String, String> userRegions = builder.table(userRegionTopic);
KTable<String, Long> userLastLogins = builder.table(userLastLoginTopic, Consumed.with(stringSerde, longSerde));
String storeName = "joined-store";
userRegions.join(userLastLogins, (regionValue, lastLoginValue) -> regionValue + "/" + lastLoginValue, Materialized.as(storeName)).toStream().to(outputTopic, Produced.with(Serdes.String(), Serdes.String()));
KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
streams.start();
//
// Step 2: Publish user regions.
//
Properties regionsProducerConfig = new Properties();
regionsProducerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
regionsProducerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
regionsProducerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
regionsProducerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
regionsProducerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
IntegrationTestUtils.produceKeyValuesSynchronously(userRegionTopic, userRegionRecords, regionsProducerConfig);
//
// Step 3: Publish user's last login timestamps.
//
Properties lastLoginProducerConfig = new Properties();
lastLoginProducerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
lastLoginProducerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
lastLoginProducerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
lastLoginProducerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
lastLoginProducerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, LongSerializer.class);
IntegrationTestUtils.produceKeyValuesSynchronously(userLastLoginTopic, userLastLoginRecords, lastLoginProducerConfig);
//
// Step 4: Verify the application's output data.
//
Properties consumerConfig = new Properties();
consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "stream-stream-join-lambda-integration-test-standard-consumer");
consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
List<KeyValue<String, String>> actualResults = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, expectedResults.size());
// Verify the (local) state store of the joined table.
// For a comprehensive demonstration of interactive queries please refer to KafkaMusicExample.
ReadOnlyKeyValueStore<String, String> readOnlyKeyValueStore = streams.store(storeName, QueryableStoreTypes.keyValueStore());
KeyValueIterator<String, String> keyValueIterator = readOnlyKeyValueStore.all();
assertThat(keyValueIterator).containsExactlyElementsOf(expectedResultsForJoinStateStore);
streams.close();
assertThat(actualResults).containsExactlyElementsOf(expectedResults);
}
Aggregations