use of io.confluent.ksql.structured.SchemaKTable in project ksql by confluentinc.
the class AggregateNode method buildStream.
@Override
public SchemaKStream buildStream(final StreamsBuilder builder, final KsqlConfig ksqlConfig, final KafkaTopicClient kafkaTopicClient, final FunctionRegistry functionRegistry, final Map<String, Object> props, final SchemaRegistryClient schemaRegistryClient) {
final StructuredDataSourceNode streamSourceNode = getTheSourceNode();
final SchemaKStream sourceSchemaKStream = getSource().buildStream(builder, ksqlConfig, kafkaTopicClient, functionRegistry, props, schemaRegistryClient);
if (sourceSchemaKStream instanceof SchemaKTable) {
throw new KsqlException("Unsupported aggregation. KSQL currently only supports aggregation on a Stream.");
}
// Pre aggregate computations
final List<Pair<String, Expression>> aggArgExpansionList = new ArrayList<>();
final Map<String, Integer> expressionNames = new HashMap<>();
collectAggregateArgExpressions(getRequiredColumnList(), aggArgExpansionList, expressionNames);
collectAggregateArgExpressions(getAggregateFunctionArguments(), aggArgExpansionList, expressionNames);
final SchemaKStream aggregateArgExpanded = sourceSchemaKStream.select(aggArgExpansionList);
KsqlTopicSerDe ksqlTopicSerDe = streamSourceNode.getStructuredDataSource().getKsqlTopic().getKsqlTopicSerDe();
final Serde<GenericRow> genericRowSerde = ksqlTopicSerDe.getGenericRowSerde(aggregateArgExpanded.getSchema(), ksqlConfig, true, schemaRegistryClient);
final SchemaKGroupedStream schemaKGroupedStream = aggregateArgExpanded.groupBy(Serdes.String(), genericRowSerde, getGroupByExpressions());
// Aggregate computations
final SchemaBuilder aggregateSchema = SchemaBuilder.struct();
final Map<Integer, Integer> aggValToValColumnMap = createAggregateValueToValueColumnMap(aggregateArgExpanded, aggregateSchema);
final Schema aggStageSchema = buildAggregateSchema(aggregateArgExpanded.getSchema(), functionRegistry);
final Serde<GenericRow> aggValueGenericRowSerde = ksqlTopicSerDe.getGenericRowSerde(aggStageSchema, ksqlConfig, true, schemaRegistryClient);
final KudafInitializer initializer = new KudafInitializer(aggValToValColumnMap.size());
final SchemaKTable schemaKTable = schemaKGroupedStream.aggregate(initializer, new KudafAggregator(createAggValToFunctionMap(expressionNames, aggregateArgExpanded, aggregateSchema, initializer, aggValToValColumnMap.size(), functionRegistry), aggValToValColumnMap), getWindowExpression(), aggValueGenericRowSerde);
SchemaKTable result = new SchemaKTable(aggStageSchema, schemaKTable.getKtable(), schemaKTable.getKeyField(), schemaKTable.getSourceSchemaKStreams(), schemaKTable.isWindowed(), SchemaKStream.Type.AGGREGATE, functionRegistry, schemaRegistryClient);
if (getHavingExpressions() != null) {
result = result.filter(getHavingExpressions());
}
return result.select(getFinalSelectExpressions());
}
use of io.confluent.ksql.structured.SchemaKTable in project ksql by confluentinc.
the class JoinNode method tableForJoin.
// package private for test
SchemaKTable tableForJoin(final StreamsBuilder builder, final KsqlConfig ksqlConfig, final KafkaTopicClient kafkaTopicClient, final FunctionRegistry functionRegistry, final Map<String, Object> props, final SchemaRegistryClient schemaRegistryClient) {
Map<String, Object> joinTableProps = new HashMap<>(props);
joinTableProps.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
final SchemaKStream schemaKStream = right.buildStream(builder, ksqlConfig, kafkaTopicClient, functionRegistry, joinTableProps, schemaRegistryClient);
if (!(schemaKStream instanceof SchemaKTable)) {
throw new KsqlException("Unsupported Join. Only stream-table joins are supported, but was " + getLeft() + "-" + getRight());
}
return (SchemaKTable) schemaKStream;
}
use of io.confluent.ksql.structured.SchemaKTable in project ksql by confluentinc.
the class KsqlStructuredDataOutputNode method createOutputStream.
private SchemaKStream createOutputStream(final SchemaKStream schemaKStream, final KsqlStructuredDataOutputNode.Builder outputNodeBuilder, final FunctionRegistry functionRegistry, final Map<String, Object> outputProperties, final SchemaRegistryClient schemaRegistryClient) {
if (schemaKStream instanceof SchemaKTable) {
return schemaKStream;
}
final SchemaKStream result = new SchemaKStream(getSchema(), schemaKStream.getKstream(), this.getKeyField(), Collections.singletonList(schemaKStream), SchemaKStream.Type.SINK, functionRegistry, schemaRegistryClient);
if (outputProperties.containsKey(DdlConfig.PARTITION_BY_PROPERTY)) {
String keyFieldName = outputProperties.get(DdlConfig.PARTITION_BY_PROPERTY).toString();
Field keyField = SchemaUtil.getFieldByName(result.getSchema(), keyFieldName).orElseThrow(() -> new KsqlException(String.format("Column %s does not exist in the result schema." + " Error in Partition By clause.", keyFieldName)));
outputNodeBuilder.withKeyField(keyField);
return result.selectKey(keyField, false);
}
return result;
}
use of io.confluent.ksql.structured.SchemaKTable in project ksql by confluentinc.
the class StructuredDataSourceNode method buildStream.
@Override
public SchemaKStream buildStream(final StreamsBuilder builder, final KsqlConfig ksqlConfig, final KafkaTopicClient kafkaTopicClient, final FunctionRegistry functionRegistry, final Map<String, Object> props, final SchemaRegistryClient schemaRegistryClient) {
if (getTimestampField() != null) {
int timestampColumnIndex = getTimeStampColumnIndex();
ksqlConfig.put(KsqlConfig.KSQL_TIMESTAMP_COLUMN_INDEX, timestampColumnIndex);
}
KsqlTopicSerDe ksqlTopicSerDe = getStructuredDataSource().getKsqlTopic().getKsqlTopicSerDe();
Serde<GenericRow> genericRowSerde = ksqlTopicSerDe.getGenericRowSerde(SchemaUtil.removeImplicitRowTimeRowKeyFromSchema(getSchema()), ksqlConfig, false, schemaRegistryClient);
if (getDataSourceType() == StructuredDataSource.DataSourceType.KTABLE) {
final KsqlTable table = (KsqlTable) getStructuredDataSource();
final KTable kTable = createKTable(builder, getAutoOffsetReset(props), table, genericRowSerde, table.getKsqlTopic().getKsqlTopicSerDe().getGenericRowSerde(getSchema(), ksqlConfig, true, schemaRegistryClient));
return new SchemaKTable(getSchema(), kTable, getKeyField(), new ArrayList<>(), table.isWindowed(), SchemaKStream.Type.SOURCE, functionRegistry, schemaRegistryClient);
}
return new SchemaKStream(getSchema(), builder.stream(getStructuredDataSource().getKsqlTopic().getKafkaTopicName(), Consumed.with(Serdes.String(), genericRowSerde)).mapValues(nonWindowedValueMapper).transformValues(new AddTimestampColumn()), getKeyField(), new ArrayList<>(), SchemaKStream.Type.SOURCE, functionRegistry, schemaRegistryClient);
}
use of io.confluent.ksql.structured.SchemaKTable in project ksql by confluentinc.
the class PhysicalPlanBuilder method buildPlanForBareQuery.
private QueryMetadata buildPlanForBareQuery(final QueuedSchemaKStream schemaKStream, final KsqlBareOutputNode bareOutputNode, final String serviceId, final String transientQueryPrefix, final String statement) {
final String applicationId = addTimeSuffix(getBareQueryApplicationId(serviceId, transientQueryPrefix));
KafkaStreams streams = buildStreams(builder, applicationId, ksqlConfig, overriddenStreamsProperties);
SchemaKStream sourceSchemaKstream = schemaKStream.getSourceSchemaKStreams().get(0);
return new QueuedQueryMetadata(statement, streams, bareOutputNode, schemaKStream.getExecutionPlan(""), schemaKStream.getQueue(), (sourceSchemaKstream instanceof SchemaKTable) ? DataSource.DataSourceType.KTABLE : DataSource.DataSourceType.KSTREAM, applicationId, kafkaTopicClient, builder.build());
}
Aggregations