use of io.confluent.ksql.structured.SchemaKGroupedStream in project ksql by confluentinc.
the class AggregateNode method buildStream.
@Override
public SchemaKStream buildStream(final StreamsBuilder builder, final KsqlConfig ksqlConfig, final KafkaTopicClient kafkaTopicClient, final FunctionRegistry functionRegistry, final Map<String, Object> props, final SchemaRegistryClient schemaRegistryClient) {
final StructuredDataSourceNode streamSourceNode = getTheSourceNode();
final SchemaKStream sourceSchemaKStream = getSource().buildStream(builder, ksqlConfig, kafkaTopicClient, functionRegistry, props, schemaRegistryClient);
if (sourceSchemaKStream instanceof SchemaKTable) {
throw new KsqlException("Unsupported aggregation. KSQL currently only supports aggregation on a Stream.");
}
// Pre aggregate computations
final List<Pair<String, Expression>> aggArgExpansionList = new ArrayList<>();
final Map<String, Integer> expressionNames = new HashMap<>();
collectAggregateArgExpressions(getRequiredColumnList(), aggArgExpansionList, expressionNames);
collectAggregateArgExpressions(getAggregateFunctionArguments(), aggArgExpansionList, expressionNames);
final SchemaKStream aggregateArgExpanded = sourceSchemaKStream.select(aggArgExpansionList);
KsqlTopicSerDe ksqlTopicSerDe = streamSourceNode.getStructuredDataSource().getKsqlTopic().getKsqlTopicSerDe();
final Serde<GenericRow> genericRowSerde = ksqlTopicSerDe.getGenericRowSerde(aggregateArgExpanded.getSchema(), ksqlConfig, true, schemaRegistryClient);
final SchemaKGroupedStream schemaKGroupedStream = aggregateArgExpanded.groupBy(Serdes.String(), genericRowSerde, getGroupByExpressions());
// Aggregate computations
final SchemaBuilder aggregateSchema = SchemaBuilder.struct();
final Map<Integer, Integer> aggValToValColumnMap = createAggregateValueToValueColumnMap(aggregateArgExpanded, aggregateSchema);
final Schema aggStageSchema = buildAggregateSchema(aggregateArgExpanded.getSchema(), functionRegistry);
final Serde<GenericRow> aggValueGenericRowSerde = ksqlTopicSerDe.getGenericRowSerde(aggStageSchema, ksqlConfig, true, schemaRegistryClient);
final KudafInitializer initializer = new KudafInitializer(aggValToValColumnMap.size());
final SchemaKTable schemaKTable = schemaKGroupedStream.aggregate(initializer, new KudafAggregator(createAggValToFunctionMap(expressionNames, aggregateArgExpanded, aggregateSchema, initializer, aggValToValColumnMap.size(), functionRegistry), aggValToValColumnMap), getWindowExpression(), aggValueGenericRowSerde);
SchemaKTable result = new SchemaKTable(aggStageSchema, schemaKTable.getKtable(), schemaKTable.getKeyField(), schemaKTable.getSourceSchemaKStreams(), schemaKTable.isWindowed(), SchemaKStream.Type.AGGREGATE, functionRegistry, schemaRegistryClient);
if (getHavingExpressions() != null) {
result = result.filter(getHavingExpressions());
}
return result.select(getFinalSelectExpressions());
}
Aggregations