use of org.apache.kafka.streams.kstream.KTable in project ksql by confluentinc.
the class StructuredDataSourceNode method buildStream.
@Override
public SchemaKStream buildStream(final StreamsBuilder builder, final KsqlConfig ksqlConfig, final KafkaTopicClient kafkaTopicClient, final FunctionRegistry functionRegistry, final Map<String, Object> props, final SchemaRegistryClient schemaRegistryClient) {
if (getTimestampField() != null) {
int timestampColumnIndex = getTimeStampColumnIndex();
ksqlConfig.put(KsqlConfig.KSQL_TIMESTAMP_COLUMN_INDEX, timestampColumnIndex);
}
KsqlTopicSerDe ksqlTopicSerDe = getStructuredDataSource().getKsqlTopic().getKsqlTopicSerDe();
Serde<GenericRow> genericRowSerde = ksqlTopicSerDe.getGenericRowSerde(SchemaUtil.removeImplicitRowTimeRowKeyFromSchema(getSchema()), ksqlConfig, false, schemaRegistryClient);
if (getDataSourceType() == StructuredDataSource.DataSourceType.KTABLE) {
final KsqlTable table = (KsqlTable) getStructuredDataSource();
final KTable kTable = createKTable(builder, getAutoOffsetReset(props), table, genericRowSerde, table.getKsqlTopic().getKsqlTopicSerDe().getGenericRowSerde(getSchema(), ksqlConfig, true, schemaRegistryClient));
return new SchemaKTable(getSchema(), kTable, getKeyField(), new ArrayList<>(), table.isWindowed(), SchemaKStream.Type.SOURCE, functionRegistry, schemaRegistryClient);
}
return new SchemaKStream(getSchema(), builder.stream(getStructuredDataSource().getKsqlTopic().getKafkaTopicName(), Consumed.with(Serdes.String(), genericRowSerde)).mapValues(nonWindowedValueMapper).transformValues(new AddTimestampColumn()), getKeyField(), new ArrayList<>(), SchemaKStream.Type.SOURCE, functionRegistry, schemaRegistryClient);
}
use of org.apache.kafka.streams.kstream.KTable in project ksql by confluentinc.
the class SchemaKGroupedStream method aggregate.
@SuppressWarnings("unchecked")
public SchemaKTable aggregate(final Initializer initializer, final UdafAggregator aggregator, final WindowExpression windowExpression, final Serde<GenericRow> topicValueSerDe) {
final KTable aggKtable;
if (windowExpression != null) {
final Materialized<String, GenericRow, ?> materialized = Materialized.<String, GenericRow, WindowStore<Bytes, byte[]>>with(Serdes.String(), topicValueSerDe);
final KsqlWindowExpression ksqlWindowExpression = windowExpression.getKsqlWindowExpression();
aggKtable = ksqlWindowExpression.applyAggregate(kgroupedStream, initializer, aggregator, materialized);
} else {
aggKtable = kgroupedStream.aggregate(initializer, aggregator, Materialized.with(Serdes.String(), topicValueSerDe));
}
return new SchemaKTable(schema, aggKtable, keyField, sourceSchemaKStreams, windowExpression != null, SchemaKStream.Type.AGGREGATE, functionRegistry, schemaRegistryClient);
}
use of org.apache.kafka.streams.kstream.KTable in project apache-kafka-on-k8s by banzaicloud.
the class InternalStreamsBuilderTest method shouldStillMaterializeSourceKTableIfMaterializedIsntQueryable.
@Test
public void shouldStillMaterializeSourceKTableIfMaterializedIsntQueryable() throws Exception {
KTable table1 = builder.table("topic2", consumed, new MaterializedInternal<>(Materialized.<String, String, KeyValueStore<Bytes, byte[]>>with(null, null), builder, storePrefix));
final ProcessorTopology topology = builder.internalTopologyBuilder.build(null);
assertEquals(1, topology.stateStores().size());
final String storeName = "prefix-STATE-STORE-0000000000";
assertEquals(storeName, topology.stateStores().get(0).name());
assertEquals(1, topology.storeToChangelogTopic().size());
assertEquals("topic2", topology.storeToChangelogTopic().get(storeName));
assertNull(table1.queryableStoreName());
}
use of org.apache.kafka.streams.kstream.KTable in project kafka-streams-examples by confluentinc.
the class PageViewRegionLambdaExample method main.
public static void main(final String[] args) throws Exception {
final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092";
final String schemaRegistryUrl = args.length > 1 ? args[1] : "http://localhost:8081";
final Properties streamsConfiguration = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "pageview-region-lambda-example");
streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "pageview-region-lambda-example-client");
// Where to find Kafka broker(s).
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
// Where to find the Confluent schema registry instance(s)
streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
// Specify default (de)serializers for record keys and for record values.
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, GenericAvroSerde.class);
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// Records should be flushed every 10 seconds. This is less than the default
// in order to keep this example interactive.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
final Serde<String> stringSerde = Serdes.String();
final Serde<Long> longSerde = Serdes.Long();
final StreamsBuilder builder = new StreamsBuilder();
// Create a stream of page view events from the PageViews topic, where the key of
// a record is assumed to be null and the value an Avro GenericRecord
// that represents the full details of the page view event. See `pageview.avsc` under
// `src/main/avro/` for the corresponding Avro schema.
final KStream<String, GenericRecord> views = builder.stream("PageViews");
// Create a keyed stream of page view events from the PageViews stream,
// by extracting the user id (String) from the Avro value
final KStream<String, GenericRecord> viewsByUser = views.map((dummy, record) -> new KeyValue<>(record.get("user").toString(), record));
// Create a changelog stream for user profiles from the UserProfiles topic,
// where the key of a record is assumed to be the user id (String) and its value
// an Avro GenericRecord. See `userprofile.avsc` under `src/main/avro/` for the
// corresponding Avro schema.
final KTable<String, GenericRecord> userProfiles = builder.table("UserProfiles");
// Create a changelog stream as a projection of the value to the region attribute only
final KTable<String, String> userRegions = userProfiles.mapValues(record -> record.get("region").toString());
// We must specify the Avro schemas for all intermediate (Avro) classes, if any.
// In this example, we want to create an intermediate GenericRecord to hold the view region.
// See `pageviewregion.avsc` under `src/main/avro/`.
final InputStream pageViewRegionSchema = PageViewRegionLambdaExample.class.getClassLoader().getResourceAsStream("avro/io/confluent/examples/streams/pageviewregion.avsc");
final Schema schema = new Schema.Parser().parse(pageViewRegionSchema);
final KTable<Windowed<String>, Long> viewsByRegion = viewsByUser.leftJoin(userRegions, (view, region) -> {
GenericRecord viewRegion = new GenericData.Record(schema);
viewRegion.put("user", view.get("user"));
viewRegion.put("page", view.get("page"));
viewRegion.put("region", region);
return viewRegion;
}).map((user, viewRegion) -> new KeyValue<>(viewRegion.get("region").toString(), viewRegion)).groupByKey().windowedBy(TimeWindows.of(TimeUnit.MINUTES.toMillis(5)).advanceBy(TimeUnit.MINUTES.toMillis(1))).count();
// Note: The following operations would NOT be needed for the actual pageview-by-region
// computation, which would normally stop at `count` above. We use the operations
// below only to "massage" the output data so it is easier to inspect on the console via
// kafka-console-consumer.
final KStream<String, Long> viewsByRegionForConsole = viewsByRegion.toStream((windowedRegion, count) -> windowedRegion.toString());
viewsByRegionForConsole.to("PageViewsByRegion", Produced.with(stringSerde, longSerde));
final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
// Always (and unconditionally) clean local state prior to starting the processing topology.
// We opt for this unconditional call here because this will make it easier for you to play around with the example
// when resetting the application for doing a re-run (via the Application Reset Tool,
// http://docs.confluent.io/current/streams/developer-guide.html#application-reset-tool).
//
// The drawback of cleaning up local state prior is that your app must rebuilt its local state from scratch, which
// will take time and will require reading all the state-relevant data from the Kafka cluster over the network.
// Thus in a production scenario you typically do not want to clean up always as we do here but rather only when it
// is truly needed, i.e., only under certain conditions (e.g., the presence of a command line flag for your app).
// See `ApplicationResetExample.java` for a production-like example.
streams.cleanUp();
streams.start();
// Add shutdown hook to respond to SIGTERM and gracefully close Kafka Streams
Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
}
use of org.apache.kafka.streams.kstream.KTable in project kafka-streams-examples by confluentinc.
the class SumLambdaExample method main.
public static void main(final String[] args) throws Exception {
final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092";
final Properties streamsConfiguration = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "sum-lambda-example");
streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "sum-lambda-example-client");
// Where to find Kafka broker(s).
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
// Specify default (de)serializers for record keys and for record values.
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.Integer().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.Integer().getClass().getName());
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, "/tmp/kafka-streams");
// Records should be flushed every 10 seconds. This is less than the default
// in order to keep this example interactive.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
final StreamsBuilder builder = new StreamsBuilder();
// We assume the input topic contains records where the values are Integers.
// We don't really care about the keys of the input records; for simplicity, we assume them
// to be Integers, too, because we will re-key the stream later on, and the new key will be
// of type Integer.
final KStream<Integer, Integer> input = builder.stream(NUMBERS_TOPIC);
final KTable<Integer, Integer> sumOfOddNumbers = input.filter((k, v) -> v % 2 != 0).selectKey((k, v) -> 1).groupByKey().reduce((v1, v2) -> v1 + v2);
sumOfOddNumbers.toStream().to(SUM_OF_ODD_NUMBERS_TOPIC);
final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
// Always (and unconditionally) clean local state prior to starting the processing topology.
// We opt for this unconditional call here because this will make it easier for you to play around with the example
// when resetting the application for doing a re-run (via the Application Reset Tool,
// http://docs.confluent.io/current/streams/developer-guide.html#application-reset-tool).
//
// The drawback of cleaning up local state prior is that your app must rebuilt its local state from scratch, which
// will take time and will require reading all the state-relevant data from the Kafka cluster over the network.
// Thus in a production scenario you typically do not want to clean up always as we do here but rather only when it
// is truly needed, i.e., only under certain conditions (e.g., the presence of a command line flag for your app).
// See `ApplicationResetExample.java` for a production-like example.
streams.cleanUp();
streams.start();
// Add shutdown hook to respond to SIGTERM and gracefully close Kafka Streams
Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
}
Aggregations