use of io.confluent.ksql.serde.KsqlTopicSerDe in project ksql by confluentinc.
the class AggregateNode method buildStream.
@Override
public SchemaKStream buildStream(final StreamsBuilder builder, final KsqlConfig ksqlConfig, final KafkaTopicClient kafkaTopicClient, final FunctionRegistry functionRegistry, final Map<String, Object> props, final SchemaRegistryClient schemaRegistryClient) {
final StructuredDataSourceNode streamSourceNode = getTheSourceNode();
final SchemaKStream sourceSchemaKStream = getSource().buildStream(builder, ksqlConfig, kafkaTopicClient, functionRegistry, props, schemaRegistryClient);
if (sourceSchemaKStream instanceof SchemaKTable) {
throw new KsqlException("Unsupported aggregation. KSQL currently only supports aggregation on a Stream.");
}
// Pre aggregate computations
final List<Pair<String, Expression>> aggArgExpansionList = new ArrayList<>();
final Map<String, Integer> expressionNames = new HashMap<>();
collectAggregateArgExpressions(getRequiredColumnList(), aggArgExpansionList, expressionNames);
collectAggregateArgExpressions(getAggregateFunctionArguments(), aggArgExpansionList, expressionNames);
final SchemaKStream aggregateArgExpanded = sourceSchemaKStream.select(aggArgExpansionList);
KsqlTopicSerDe ksqlTopicSerDe = streamSourceNode.getStructuredDataSource().getKsqlTopic().getKsqlTopicSerDe();
final Serde<GenericRow> genericRowSerde = ksqlTopicSerDe.getGenericRowSerde(aggregateArgExpanded.getSchema(), ksqlConfig, true, schemaRegistryClient);
final SchemaKGroupedStream schemaKGroupedStream = aggregateArgExpanded.groupBy(Serdes.String(), genericRowSerde, getGroupByExpressions());
// Aggregate computations
final SchemaBuilder aggregateSchema = SchemaBuilder.struct();
final Map<Integer, Integer> aggValToValColumnMap = createAggregateValueToValueColumnMap(aggregateArgExpanded, aggregateSchema);
final Schema aggStageSchema = buildAggregateSchema(aggregateArgExpanded.getSchema(), functionRegistry);
final Serde<GenericRow> aggValueGenericRowSerde = ksqlTopicSerDe.getGenericRowSerde(aggStageSchema, ksqlConfig, true, schemaRegistryClient);
final KudafInitializer initializer = new KudafInitializer(aggValToValColumnMap.size());
final SchemaKTable schemaKTable = schemaKGroupedStream.aggregate(initializer, new KudafAggregator(createAggValToFunctionMap(expressionNames, aggregateArgExpanded, aggregateSchema, initializer, aggValToValColumnMap.size(), functionRegistry), aggValToValColumnMap), getWindowExpression(), aggValueGenericRowSerde);
SchemaKTable result = new SchemaKTable(aggStageSchema, schemaKTable.getKtable(), schemaKTable.getKeyField(), schemaKTable.getSourceSchemaKStreams(), schemaKTable.isWindowed(), SchemaKStream.Type.AGGREGATE, functionRegistry, schemaRegistryClient);
if (getHavingExpressions() != null) {
result = result.filter(getHavingExpressions());
}
return result.select(getFinalSelectExpressions());
}
use of io.confluent.ksql.serde.KsqlTopicSerDe in project ksql by confluentinc.
the class StructuredDataSourceNode method buildStream.
@Override
public SchemaKStream buildStream(final StreamsBuilder builder, final KsqlConfig ksqlConfig, final KafkaTopicClient kafkaTopicClient, final FunctionRegistry functionRegistry, final Map<String, Object> props, final SchemaRegistryClient schemaRegistryClient) {
if (getTimestampField() != null) {
int timestampColumnIndex = getTimeStampColumnIndex();
ksqlConfig.put(KsqlConfig.KSQL_TIMESTAMP_COLUMN_INDEX, timestampColumnIndex);
}
KsqlTopicSerDe ksqlTopicSerDe = getStructuredDataSource().getKsqlTopic().getKsqlTopicSerDe();
Serde<GenericRow> genericRowSerde = ksqlTopicSerDe.getGenericRowSerde(SchemaUtil.removeImplicitRowTimeRowKeyFromSchema(getSchema()), ksqlConfig, false, schemaRegistryClient);
if (getDataSourceType() == StructuredDataSource.DataSourceType.KTABLE) {
final KsqlTable table = (KsqlTable) getStructuredDataSource();
final KTable kTable = createKTable(builder, getAutoOffsetReset(props), table, genericRowSerde, table.getKsqlTopic().getKsqlTopicSerDe().getGenericRowSerde(getSchema(), ksqlConfig, true, schemaRegistryClient));
return new SchemaKTable(getSchema(), kTable, getKeyField(), new ArrayList<>(), table.isWindowed(), SchemaKStream.Type.SOURCE, functionRegistry, schemaRegistryClient);
}
return new SchemaKStream(getSchema(), builder.stream(getStructuredDataSource().getKsqlTopic().getKafkaTopicName(), Consumed.with(Serdes.String(), genericRowSerde)).mapValues(nonWindowedValueMapper).transformValues(new AddTimestampColumn()), getKeyField(), new ArrayList<>(), SchemaKStream.Type.SOURCE, functionRegistry, schemaRegistryClient);
}
use of io.confluent.ksql.serde.KsqlTopicSerDe in project ksql by confluentinc.
the class SchemaKStreamTest method testGroupByKey.
@Test
public void testGroupByKey() {
String selectQuery = "SELECT col0, col1 FROM test1 WHERE col0 > 100;";
PlanNode logicalPlan = planBuilder.buildLogicalPlan(selectQuery);
initialSchemaKStream = new SchemaKStream(logicalPlan.getTheSourceNode().getSchema(), kStream, ksqlStream.getKeyField(), new ArrayList<>(), SchemaKStream.Type.SOURCE, functionRegistry, new MockSchemaRegistryClient());
Expression keyExpression = new DereferenceExpression(new QualifiedNameReference(QualifiedName.of("TEST1")), "COL0");
KsqlTopicSerDe ksqlTopicSerDe = new KsqlJsonTopicSerDe();
Serde<GenericRow> rowSerde = ksqlTopicSerDe.getGenericRowSerde(initialSchemaKStream.getSchema(), null, false, null);
List<Expression> groupByExpressions = Arrays.asList(keyExpression);
SchemaKGroupedStream groupedSchemaKStream = initialSchemaKStream.groupBy(Serdes.String(), rowSerde, groupByExpressions);
Assert.assertEquals(groupedSchemaKStream.getKeyField().name(), "COL0");
}
use of io.confluent.ksql.serde.KsqlTopicSerDe in project ksql by confluentinc.
the class SchemaKStreamTest method testGroupByMultipleColumns.
@Test
public void testGroupByMultipleColumns() {
String selectQuery = "SELECT col0, col1 FROM test1 WHERE col0 > 100;";
PlanNode logicalPlan = planBuilder.buildLogicalPlan(selectQuery);
initialSchemaKStream = new SchemaKStream(logicalPlan.getTheSourceNode().getSchema(), kStream, ksqlStream.getKeyField(), new ArrayList<>(), SchemaKStream.Type.SOURCE, functionRegistry, new MockSchemaRegistryClient());
Expression col0Expression = new DereferenceExpression(new QualifiedNameReference(QualifiedName.of("TEST1")), "COL0");
Expression col1Expression = new DereferenceExpression(new QualifiedNameReference(QualifiedName.of("TEST1")), "COL1");
KsqlTopicSerDe ksqlTopicSerDe = new KsqlJsonTopicSerDe();
Serde<GenericRow> rowSerde = ksqlTopicSerDe.getGenericRowSerde(initialSchemaKStream.getSchema(), null, false, null);
List<Expression> groupByExpressions = Arrays.asList(col1Expression, col0Expression);
SchemaKGroupedStream groupedSchemaKStream = initialSchemaKStream.groupBy(Serdes.String(), rowSerde, groupByExpressions);
Assert.assertEquals(groupedSchemaKStream.getKeyField().name(), "TEST1.COL1|+|TEST1.COL0");
}
use of io.confluent.ksql.serde.KsqlTopicSerDe in project ksql by confluentinc.
the class Analyzer method analyzeNonStdOutSink.
private void analyzeNonStdOutSink() {
List<Pair<StructuredDataSource, String>> fromDataSources = analysis.getFromDataSources();
StructuredDataSource intoStructuredDataSource = analysis.getInto();
String intoKafkaTopicName = analysis.getIntoKafkaTopicName();
if (intoKafkaTopicName == null) {
intoKafkaTopicName = intoStructuredDataSource.getName();
}
KsqlTopicSerDe intoTopicSerde = fromDataSources.get(0).getLeft().getKsqlTopic().getKsqlTopicSerDe();
if (analysis.getIntoFormat() != null) {
switch(analysis.getIntoFormat().toUpperCase()) {
case DataSource.AVRO_SERDE_NAME:
intoTopicSerde = new KsqlAvroTopicSerDe();
break;
case DataSource.JSON_SERDE_NAME:
intoTopicSerde = new KsqlJsonTopicSerDe();
break;
case DataSource.DELIMITED_SERDE_NAME:
intoTopicSerde = new KsqlDelimitedTopicSerDe();
break;
default:
throw new KsqlException(String.format("Unsupported format: %s", analysis.getIntoFormat()));
}
} else {
if (intoTopicSerde instanceof KsqlAvroTopicSerDe) {
intoTopicSerde = new KsqlAvroTopicSerDe();
}
}
KsqlTopic newIntoKsqlTopic = new KsqlTopic(intoKafkaTopicName, intoKafkaTopicName, intoTopicSerde);
KsqlStream intoKsqlStream = new KsqlStream(sqlExpression, intoStructuredDataSource.getName(), null, null, null, newIntoKsqlTopic);
analysis.setInto(intoKsqlStream);
}
Aggregations