Search in sources :

Example 1 with KudafInitializer

use of io.confluent.ksql.function.udaf.KudafInitializer in project ksql by confluentinc.

the class AggregateNode method buildStream.

@Override
public SchemaKStream buildStream(final StreamsBuilder builder, final KsqlConfig ksqlConfig, final KafkaTopicClient kafkaTopicClient, final FunctionRegistry functionRegistry, final Map<String, Object> props, final SchemaRegistryClient schemaRegistryClient) {
    final StructuredDataSourceNode streamSourceNode = getTheSourceNode();
    final SchemaKStream sourceSchemaKStream = getSource().buildStream(builder, ksqlConfig, kafkaTopicClient, functionRegistry, props, schemaRegistryClient);
    if (sourceSchemaKStream instanceof SchemaKTable) {
        throw new KsqlException("Unsupported aggregation. KSQL currently only supports aggregation on a Stream.");
    }
    // Pre aggregate computations
    final List<Pair<String, Expression>> aggArgExpansionList = new ArrayList<>();
    final Map<String, Integer> expressionNames = new HashMap<>();
    collectAggregateArgExpressions(getRequiredColumnList(), aggArgExpansionList, expressionNames);
    collectAggregateArgExpressions(getAggregateFunctionArguments(), aggArgExpansionList, expressionNames);
    final SchemaKStream aggregateArgExpanded = sourceSchemaKStream.select(aggArgExpansionList);
    KsqlTopicSerDe ksqlTopicSerDe = streamSourceNode.getStructuredDataSource().getKsqlTopic().getKsqlTopicSerDe();
    final Serde<GenericRow> genericRowSerde = ksqlTopicSerDe.getGenericRowSerde(aggregateArgExpanded.getSchema(), ksqlConfig, true, schemaRegistryClient);
    final SchemaKGroupedStream schemaKGroupedStream = aggregateArgExpanded.groupBy(Serdes.String(), genericRowSerde, getGroupByExpressions());
    // Aggregate computations
    final SchemaBuilder aggregateSchema = SchemaBuilder.struct();
    final Map<Integer, Integer> aggValToValColumnMap = createAggregateValueToValueColumnMap(aggregateArgExpanded, aggregateSchema);
    final Schema aggStageSchema = buildAggregateSchema(aggregateArgExpanded.getSchema(), functionRegistry);
    final Serde<GenericRow> aggValueGenericRowSerde = ksqlTopicSerDe.getGenericRowSerde(aggStageSchema, ksqlConfig, true, schemaRegistryClient);
    final KudafInitializer initializer = new KudafInitializer(aggValToValColumnMap.size());
    final SchemaKTable schemaKTable = schemaKGroupedStream.aggregate(initializer, new KudafAggregator(createAggValToFunctionMap(expressionNames, aggregateArgExpanded, aggregateSchema, initializer, aggValToValColumnMap.size(), functionRegistry), aggValToValColumnMap), getWindowExpression(), aggValueGenericRowSerde);
    SchemaKTable result = new SchemaKTable(aggStageSchema, schemaKTable.getKtable(), schemaKTable.getKeyField(), schemaKTable.getSourceSchemaKStreams(), schemaKTable.isWindowed(), SchemaKStream.Type.AGGREGATE, functionRegistry, schemaRegistryClient);
    if (getHavingExpressions() != null) {
        result = result.filter(getHavingExpressions());
    }
    return result.select(getFinalSelectExpressions());
}
Also used : SchemaKTable(io.confluent.ksql.structured.SchemaKTable) HashMap(java.util.HashMap) Schema(org.apache.kafka.connect.data.Schema) ArrayList(java.util.ArrayList) KudafInitializer(io.confluent.ksql.function.udaf.KudafInitializer) KudafAggregator(io.confluent.ksql.function.udaf.KudafAggregator) KsqlException(io.confluent.ksql.util.KsqlException) SchemaKGroupedStream(io.confluent.ksql.structured.SchemaKGroupedStream) GenericRow(io.confluent.ksql.GenericRow) KsqlTopicSerDe(io.confluent.ksql.serde.KsqlTopicSerDe) SchemaBuilder(org.apache.kafka.connect.data.SchemaBuilder) SchemaKStream(io.confluent.ksql.structured.SchemaKStream) Pair(io.confluent.ksql.util.Pair)

Aggregations

GenericRow (io.confluent.ksql.GenericRow)1 KudafAggregator (io.confluent.ksql.function.udaf.KudafAggregator)1 KudafInitializer (io.confluent.ksql.function.udaf.KudafInitializer)1 KsqlTopicSerDe (io.confluent.ksql.serde.KsqlTopicSerDe)1 SchemaKGroupedStream (io.confluent.ksql.structured.SchemaKGroupedStream)1 SchemaKStream (io.confluent.ksql.structured.SchemaKStream)1 SchemaKTable (io.confluent.ksql.structured.SchemaKTable)1 KsqlException (io.confluent.ksql.util.KsqlException)1 Pair (io.confluent.ksql.util.Pair)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Schema (org.apache.kafka.connect.data.Schema)1 SchemaBuilder (org.apache.kafka.connect.data.SchemaBuilder)1