Search in sources :

Example 16 with GenericRow

use of io.confluent.ksql.GenericRow in project ksql by confluentinc.

the class HoppingWindowExpressionTest method shouldCreateHoppingWindowAggregate.

@Test
public void shouldCreateHoppingWindowAggregate() {
    final KGroupedStream stream = EasyMock.createNiceMock(KGroupedStream.class);
    final TimeWindowedKStream windowedKStream = EasyMock.createNiceMock(TimeWindowedKStream.class);
    final UdafAggregator aggregator = EasyMock.createNiceMock(UdafAggregator.class);
    final HoppingWindowExpression windowExpression = new HoppingWindowExpression(10, TimeUnit.SECONDS, 4, TimeUnit.MILLISECONDS);
    final Initializer initializer = () -> 0;
    final Materialized<String, GenericRow, WindowStore<Bytes, byte[]>> store = Materialized.as("store");
    EasyMock.expect(stream.windowedBy(TimeWindows.of(10000L).advanceBy(4L))).andReturn(windowedKStream);
    EasyMock.expect(windowedKStream.aggregate(same(initializer), same(aggregator), same(store))).andReturn(null);
    EasyMock.replay(stream, windowedKStream);
    windowExpression.applyAggregate(stream, initializer, aggregator, store);
    EasyMock.verify(stream, windowedKStream);
}
Also used : GenericRow(io.confluent.ksql.GenericRow) WindowStore(org.apache.kafka.streams.state.WindowStore) KGroupedStream(org.apache.kafka.streams.kstream.KGroupedStream) Initializer(org.apache.kafka.streams.kstream.Initializer) TimeWindowedKStream(org.apache.kafka.streams.kstream.TimeWindowedKStream) UdafAggregator(io.confluent.ksql.function.UdafAggregator) Test(org.junit.Test)

Example 17 with GenericRow

use of io.confluent.ksql.GenericRow in project ksql by confluentinc.

the class TumblingWindowExpressionTest method shouldCreateTumblingWindowAggregate.

@Test
public void shouldCreateTumblingWindowAggregate() {
    final KGroupedStream stream = EasyMock.createNiceMock(KGroupedStream.class);
    final TimeWindowedKStream windowedKStream = EasyMock.createNiceMock(TimeWindowedKStream.class);
    final UdafAggregator aggregator = EasyMock.createNiceMock(UdafAggregator.class);
    final TumblingWindowExpression windowExpression = new TumblingWindowExpression(10, TimeUnit.SECONDS);
    final Initializer initializer = () -> 0;
    final Materialized<String, GenericRow, WindowStore<Bytes, byte[]>> store = Materialized.as("store");
    EasyMock.expect(stream.windowedBy(TimeWindows.of(10000L))).andReturn(windowedKStream);
    EasyMock.expect(windowedKStream.aggregate(same(initializer), same(aggregator), same(store))).andReturn(null);
    EasyMock.replay(stream, windowedKStream);
    windowExpression.applyAggregate(stream, initializer, aggregator, store);
    EasyMock.verify(stream, windowedKStream);
}
Also used : GenericRow(io.confluent.ksql.GenericRow) WindowStore(org.apache.kafka.streams.state.WindowStore) KGroupedStream(org.apache.kafka.streams.kstream.KGroupedStream) Initializer(org.apache.kafka.streams.kstream.Initializer) TimeWindowedKStream(org.apache.kafka.streams.kstream.TimeWindowedKStream) UdafAggregator(io.confluent.ksql.function.UdafAggregator) Test(org.junit.Test)

Example 18 with GenericRow

use of io.confluent.ksql.GenericRow in project ksql by confluentinc.

the class TopicProducer method produceInputData.

/**
 * Topic topicName will be automatically created if it doesn't exist.
 * @param topicName
 * @param recordsToPublish
 * @param schema
 * @return
 * @throws InterruptedException
 * @throws TimeoutException
 * @throws ExecutionException
 */
public Map<String, RecordMetadata> produceInputData(String topicName, Map<String, GenericRow> recordsToPublish, Schema schema) throws InterruptedException, TimeoutException, ExecutionException {
    KafkaProducer<String, GenericRow> producer = new KafkaProducer<>(producerConfig, new StringSerializer(), new KsqlJsonSerializer(schema));
    Map<String, RecordMetadata> result = new HashMap<>();
    for (Map.Entry<String, GenericRow> recordEntry : recordsToPublish.entrySet()) {
        String key = recordEntry.getKey();
        ProducerRecord<String, GenericRow> producerRecord = new ProducerRecord<>(topicName, key, recordEntry.getValue());
        Future<RecordMetadata> recordMetadataFuture = producer.send(producerRecord);
        result.put(key, recordMetadataFuture.get(TEST_RECORD_FUTURE_TIMEOUT_MS, TimeUnit.MILLISECONDS));
    }
    producer.close();
    return result;
}
Also used : KafkaProducer(org.apache.kafka.clients.producer.KafkaProducer) HashMap(java.util.HashMap) KsqlJsonSerializer(io.confluent.ksql.serde.json.KsqlJsonSerializer) GenericRow(io.confluent.ksql.GenericRow) RecordMetadata(org.apache.kafka.clients.producer.RecordMetadata) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) HashMap(java.util.HashMap) Map(java.util.Map)

Example 19 with GenericRow

use of io.confluent.ksql.GenericRow in project ksql by confluentinc.

the class UserDataProvider method buildData.

private Map<String, GenericRow> buildData() {
    Map<String, GenericRow> dataMap = new HashMap<>();
    // create a records with:
    // key == user_id
    // value = (creation_time, gender, region, user_id)
    dataMap.put("USER_0", new GenericRow(Arrays.asList(0, "FEMALE", "REGION_0", "USER_0")));
    dataMap.put("USER_1", new GenericRow(Arrays.asList(1, "MALE", "REGION_1", "USER_1")));
    dataMap.put("USER_2", new GenericRow(Arrays.asList(2, "FEMALE", "REGION_1", "USER_2")));
    dataMap.put("USER_3", new GenericRow(Arrays.asList(3, "MALE", "REGION_0", "USER_3")));
    dataMap.put("USER_4", new GenericRow(Arrays.asList(4, "MALE", "REGION_4", "USER_4")));
    return dataMap;
}
Also used : GenericRow(io.confluent.ksql.GenericRow) HashMap(java.util.HashMap)

Example 20 with GenericRow

use of io.confluent.ksql.GenericRow in project ksql by confluentinc.

the class AggregateNode method buildStream.

@Override
public SchemaKStream buildStream(final StreamsBuilder builder, final KsqlConfig ksqlConfig, final KafkaTopicClient kafkaTopicClient, final FunctionRegistry functionRegistry, final Map<String, Object> props, final SchemaRegistryClient schemaRegistryClient) {
    final StructuredDataSourceNode streamSourceNode = getTheSourceNode();
    final SchemaKStream sourceSchemaKStream = getSource().buildStream(builder, ksqlConfig, kafkaTopicClient, functionRegistry, props, schemaRegistryClient);
    if (sourceSchemaKStream instanceof SchemaKTable) {
        throw new KsqlException("Unsupported aggregation. KSQL currently only supports aggregation on a Stream.");
    }
    // Pre aggregate computations
    final List<Pair<String, Expression>> aggArgExpansionList = new ArrayList<>();
    final Map<String, Integer> expressionNames = new HashMap<>();
    collectAggregateArgExpressions(getRequiredColumnList(), aggArgExpansionList, expressionNames);
    collectAggregateArgExpressions(getAggregateFunctionArguments(), aggArgExpansionList, expressionNames);
    final SchemaKStream aggregateArgExpanded = sourceSchemaKStream.select(aggArgExpansionList);
    KsqlTopicSerDe ksqlTopicSerDe = streamSourceNode.getStructuredDataSource().getKsqlTopic().getKsqlTopicSerDe();
    final Serde<GenericRow> genericRowSerde = ksqlTopicSerDe.getGenericRowSerde(aggregateArgExpanded.getSchema(), ksqlConfig, true, schemaRegistryClient);
    final SchemaKGroupedStream schemaKGroupedStream = aggregateArgExpanded.groupBy(Serdes.String(), genericRowSerde, getGroupByExpressions());
    // Aggregate computations
    final SchemaBuilder aggregateSchema = SchemaBuilder.struct();
    final Map<Integer, Integer> aggValToValColumnMap = createAggregateValueToValueColumnMap(aggregateArgExpanded, aggregateSchema);
    final Schema aggStageSchema = buildAggregateSchema(aggregateArgExpanded.getSchema(), functionRegistry);
    final Serde<GenericRow> aggValueGenericRowSerde = ksqlTopicSerDe.getGenericRowSerde(aggStageSchema, ksqlConfig, true, schemaRegistryClient);
    final KudafInitializer initializer = new KudafInitializer(aggValToValColumnMap.size());
    final SchemaKTable schemaKTable = schemaKGroupedStream.aggregate(initializer, new KudafAggregator(createAggValToFunctionMap(expressionNames, aggregateArgExpanded, aggregateSchema, initializer, aggValToValColumnMap.size(), functionRegistry), aggValToValColumnMap), getWindowExpression(), aggValueGenericRowSerde);
    SchemaKTable result = new SchemaKTable(aggStageSchema, schemaKTable.getKtable(), schemaKTable.getKeyField(), schemaKTable.getSourceSchemaKStreams(), schemaKTable.isWindowed(), SchemaKStream.Type.AGGREGATE, functionRegistry, schemaRegistryClient);
    if (getHavingExpressions() != null) {
        result = result.filter(getHavingExpressions());
    }
    return result.select(getFinalSelectExpressions());
}
Also used : SchemaKTable(io.confluent.ksql.structured.SchemaKTable) HashMap(java.util.HashMap) Schema(org.apache.kafka.connect.data.Schema) ArrayList(java.util.ArrayList) KudafInitializer(io.confluent.ksql.function.udaf.KudafInitializer) KudafAggregator(io.confluent.ksql.function.udaf.KudafAggregator) KsqlException(io.confluent.ksql.util.KsqlException) SchemaKGroupedStream(io.confluent.ksql.structured.SchemaKGroupedStream) GenericRow(io.confluent.ksql.GenericRow) KsqlTopicSerDe(io.confluent.ksql.serde.KsqlTopicSerDe) SchemaBuilder(org.apache.kafka.connect.data.SchemaBuilder) SchemaKStream(io.confluent.ksql.structured.SchemaKStream) Pair(io.confluent.ksql.util.Pair)

Aggregations

GenericRow (io.confluent.ksql.GenericRow)65 Test (org.junit.Test)38 HashMap (java.util.HashMap)27 Schema (org.apache.kafka.connect.data.Schema)19 List (java.util.List)15 StringDeserializer (org.apache.kafka.common.serialization.StringDeserializer)15 ArrayList (java.util.ArrayList)11 MockSchemaRegistryClient (io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient)9 IntegrationTest (io.confluent.common.utils.IntegrationTest)8 SchemaRegistryClient (io.confluent.kafka.schemaregistry.client.SchemaRegistryClient)8 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 KsqlTopicSerDe (io.confluent.ksql.serde.KsqlTopicSerDe)5 KafkaTopicClient (io.confluent.ksql.util.KafkaTopicClient)5 KafkaTopicClientImpl (io.confluent.ksql.util.KafkaTopicClientImpl)5 KsqlConfig (io.confluent.ksql.util.KsqlConfig)5 Map (java.util.Map)5 GenericRecord (org.apache.avro.generic.GenericRecord)4 Windowed (org.apache.kafka.streams.kstream.Windowed)4 KafkaAvroDeserializer (io.confluent.kafka.serializers.KafkaAvroDeserializer)3 DereferenceExpression (io.confluent.ksql.parser.tree.DereferenceExpression)3