Search in sources :

Example 1 with KsqlTopicSerDe

use of io.confluent.ksql.serde.KsqlTopicSerDe in project ksql by confluentinc.

the class AggregateNode method buildStream.

@Override
public SchemaKStream buildStream(final StreamsBuilder builder, final KsqlConfig ksqlConfig, final KafkaTopicClient kafkaTopicClient, final FunctionRegistry functionRegistry, final Map<String, Object> props, final SchemaRegistryClient schemaRegistryClient) {
    final StructuredDataSourceNode streamSourceNode = getTheSourceNode();
    final SchemaKStream sourceSchemaKStream = getSource().buildStream(builder, ksqlConfig, kafkaTopicClient, functionRegistry, props, schemaRegistryClient);
    if (sourceSchemaKStream instanceof SchemaKTable) {
        throw new KsqlException("Unsupported aggregation. KSQL currently only supports aggregation on a Stream.");
    }
    // Pre aggregate computations
    final List<Pair<String, Expression>> aggArgExpansionList = new ArrayList<>();
    final Map<String, Integer> expressionNames = new HashMap<>();
    collectAggregateArgExpressions(getRequiredColumnList(), aggArgExpansionList, expressionNames);
    collectAggregateArgExpressions(getAggregateFunctionArguments(), aggArgExpansionList, expressionNames);
    final SchemaKStream aggregateArgExpanded = sourceSchemaKStream.select(aggArgExpansionList);
    KsqlTopicSerDe ksqlTopicSerDe = streamSourceNode.getStructuredDataSource().getKsqlTopic().getKsqlTopicSerDe();
    final Serde<GenericRow> genericRowSerde = ksqlTopicSerDe.getGenericRowSerde(aggregateArgExpanded.getSchema(), ksqlConfig, true, schemaRegistryClient);
    final SchemaKGroupedStream schemaKGroupedStream = aggregateArgExpanded.groupBy(Serdes.String(), genericRowSerde, getGroupByExpressions());
    // Aggregate computations
    final SchemaBuilder aggregateSchema = SchemaBuilder.struct();
    final Map<Integer, Integer> aggValToValColumnMap = createAggregateValueToValueColumnMap(aggregateArgExpanded, aggregateSchema);
    final Schema aggStageSchema = buildAggregateSchema(aggregateArgExpanded.getSchema(), functionRegistry);
    final Serde<GenericRow> aggValueGenericRowSerde = ksqlTopicSerDe.getGenericRowSerde(aggStageSchema, ksqlConfig, true, schemaRegistryClient);
    final KudafInitializer initializer = new KudafInitializer(aggValToValColumnMap.size());
    final SchemaKTable schemaKTable = schemaKGroupedStream.aggregate(initializer, new KudafAggregator(createAggValToFunctionMap(expressionNames, aggregateArgExpanded, aggregateSchema, initializer, aggValToValColumnMap.size(), functionRegistry), aggValToValColumnMap), getWindowExpression(), aggValueGenericRowSerde);
    SchemaKTable result = new SchemaKTable(aggStageSchema, schemaKTable.getKtable(), schemaKTable.getKeyField(), schemaKTable.getSourceSchemaKStreams(), schemaKTable.isWindowed(), SchemaKStream.Type.AGGREGATE, functionRegistry, schemaRegistryClient);
    if (getHavingExpressions() != null) {
        result = result.filter(getHavingExpressions());
    }
    return result.select(getFinalSelectExpressions());
}
Also used : SchemaKTable(io.confluent.ksql.structured.SchemaKTable) HashMap(java.util.HashMap) Schema(org.apache.kafka.connect.data.Schema) ArrayList(java.util.ArrayList) KudafInitializer(io.confluent.ksql.function.udaf.KudafInitializer) KudafAggregator(io.confluent.ksql.function.udaf.KudafAggregator) KsqlException(io.confluent.ksql.util.KsqlException) SchemaKGroupedStream(io.confluent.ksql.structured.SchemaKGroupedStream) GenericRow(io.confluent.ksql.GenericRow) KsqlTopicSerDe(io.confluent.ksql.serde.KsqlTopicSerDe) SchemaBuilder(org.apache.kafka.connect.data.SchemaBuilder) SchemaKStream(io.confluent.ksql.structured.SchemaKStream) Pair(io.confluent.ksql.util.Pair)

Example 2 with KsqlTopicSerDe

use of io.confluent.ksql.serde.KsqlTopicSerDe in project ksql by confluentinc.

the class StructuredDataSourceNode method buildStream.

@Override
public SchemaKStream buildStream(final StreamsBuilder builder, final KsqlConfig ksqlConfig, final KafkaTopicClient kafkaTopicClient, final FunctionRegistry functionRegistry, final Map<String, Object> props, final SchemaRegistryClient schemaRegistryClient) {
    if (getTimestampField() != null) {
        int timestampColumnIndex = getTimeStampColumnIndex();
        ksqlConfig.put(KsqlConfig.KSQL_TIMESTAMP_COLUMN_INDEX, timestampColumnIndex);
    }
    KsqlTopicSerDe ksqlTopicSerDe = getStructuredDataSource().getKsqlTopic().getKsqlTopicSerDe();
    Serde<GenericRow> genericRowSerde = ksqlTopicSerDe.getGenericRowSerde(SchemaUtil.removeImplicitRowTimeRowKeyFromSchema(getSchema()), ksqlConfig, false, schemaRegistryClient);
    if (getDataSourceType() == StructuredDataSource.DataSourceType.KTABLE) {
        final KsqlTable table = (KsqlTable) getStructuredDataSource();
        final KTable kTable = createKTable(builder, getAutoOffsetReset(props), table, genericRowSerde, table.getKsqlTopic().getKsqlTopicSerDe().getGenericRowSerde(getSchema(), ksqlConfig, true, schemaRegistryClient));
        return new SchemaKTable(getSchema(), kTable, getKeyField(), new ArrayList<>(), table.isWindowed(), SchemaKStream.Type.SOURCE, functionRegistry, schemaRegistryClient);
    }
    return new SchemaKStream(getSchema(), builder.stream(getStructuredDataSource().getKsqlTopic().getKafkaTopicName(), Consumed.with(Serdes.String(), genericRowSerde)).mapValues(nonWindowedValueMapper).transformValues(new AddTimestampColumn()), getKeyField(), new ArrayList<>(), SchemaKStream.Type.SOURCE, functionRegistry, schemaRegistryClient);
}
Also used : GenericRow(io.confluent.ksql.GenericRow) SchemaKTable(io.confluent.ksql.structured.SchemaKTable) KsqlTopicSerDe(io.confluent.ksql.serde.KsqlTopicSerDe) KsqlTable(io.confluent.ksql.metastore.KsqlTable) SchemaKStream(io.confluent.ksql.structured.SchemaKStream) SchemaKTable(io.confluent.ksql.structured.SchemaKTable) KTable(org.apache.kafka.streams.kstream.KTable) AddTimestampColumn(io.confluent.ksql.physical.AddTimestampColumn)

Example 3 with KsqlTopicSerDe

use of io.confluent.ksql.serde.KsqlTopicSerDe in project ksql by confluentinc.

the class SchemaKStreamTest method testGroupByKey.

@Test
public void testGroupByKey() {
    String selectQuery = "SELECT col0, col1 FROM test1 WHERE col0 > 100;";
    PlanNode logicalPlan = planBuilder.buildLogicalPlan(selectQuery);
    initialSchemaKStream = new SchemaKStream(logicalPlan.getTheSourceNode().getSchema(), kStream, ksqlStream.getKeyField(), new ArrayList<>(), SchemaKStream.Type.SOURCE, functionRegistry, new MockSchemaRegistryClient());
    Expression keyExpression = new DereferenceExpression(new QualifiedNameReference(QualifiedName.of("TEST1")), "COL0");
    KsqlTopicSerDe ksqlTopicSerDe = new KsqlJsonTopicSerDe();
    Serde<GenericRow> rowSerde = ksqlTopicSerDe.getGenericRowSerde(initialSchemaKStream.getSchema(), null, false, null);
    List<Expression> groupByExpressions = Arrays.asList(keyExpression);
    SchemaKGroupedStream groupedSchemaKStream = initialSchemaKStream.groupBy(Serdes.String(), rowSerde, groupByExpressions);
    Assert.assertEquals(groupedSchemaKStream.getKeyField().name(), "COL0");
}
Also used : DereferenceExpression(io.confluent.ksql.parser.tree.DereferenceExpression) MockSchemaRegistryClient(io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient) ArrayList(java.util.ArrayList) GenericRow(io.confluent.ksql.GenericRow) PlanNode(io.confluent.ksql.planner.plan.PlanNode) DereferenceExpression(io.confluent.ksql.parser.tree.DereferenceExpression) Expression(io.confluent.ksql.parser.tree.Expression) KsqlTopicSerDe(io.confluent.ksql.serde.KsqlTopicSerDe) KsqlJsonTopicSerDe(io.confluent.ksql.serde.json.KsqlJsonTopicSerDe) QualifiedNameReference(io.confluent.ksql.parser.tree.QualifiedNameReference) Test(org.junit.Test)

Example 4 with KsqlTopicSerDe

use of io.confluent.ksql.serde.KsqlTopicSerDe in project ksql by confluentinc.

the class SchemaKStreamTest method testGroupByMultipleColumns.

@Test
public void testGroupByMultipleColumns() {
    String selectQuery = "SELECT col0, col1 FROM test1 WHERE col0 > 100;";
    PlanNode logicalPlan = planBuilder.buildLogicalPlan(selectQuery);
    initialSchemaKStream = new SchemaKStream(logicalPlan.getTheSourceNode().getSchema(), kStream, ksqlStream.getKeyField(), new ArrayList<>(), SchemaKStream.Type.SOURCE, functionRegistry, new MockSchemaRegistryClient());
    Expression col0Expression = new DereferenceExpression(new QualifiedNameReference(QualifiedName.of("TEST1")), "COL0");
    Expression col1Expression = new DereferenceExpression(new QualifiedNameReference(QualifiedName.of("TEST1")), "COL1");
    KsqlTopicSerDe ksqlTopicSerDe = new KsqlJsonTopicSerDe();
    Serde<GenericRow> rowSerde = ksqlTopicSerDe.getGenericRowSerde(initialSchemaKStream.getSchema(), null, false, null);
    List<Expression> groupByExpressions = Arrays.asList(col1Expression, col0Expression);
    SchemaKGroupedStream groupedSchemaKStream = initialSchemaKStream.groupBy(Serdes.String(), rowSerde, groupByExpressions);
    Assert.assertEquals(groupedSchemaKStream.getKeyField().name(), "TEST1.COL1|+|TEST1.COL0");
}
Also used : DereferenceExpression(io.confluent.ksql.parser.tree.DereferenceExpression) MockSchemaRegistryClient(io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient) ArrayList(java.util.ArrayList) GenericRow(io.confluent.ksql.GenericRow) PlanNode(io.confluent.ksql.planner.plan.PlanNode) DereferenceExpression(io.confluent.ksql.parser.tree.DereferenceExpression) Expression(io.confluent.ksql.parser.tree.Expression) KsqlTopicSerDe(io.confluent.ksql.serde.KsqlTopicSerDe) KsqlJsonTopicSerDe(io.confluent.ksql.serde.json.KsqlJsonTopicSerDe) QualifiedNameReference(io.confluent.ksql.parser.tree.QualifiedNameReference) Test(org.junit.Test)

Example 5 with KsqlTopicSerDe

use of io.confluent.ksql.serde.KsqlTopicSerDe in project ksql by confluentinc.

the class Analyzer method analyzeNonStdOutSink.

private void analyzeNonStdOutSink() {
    List<Pair<StructuredDataSource, String>> fromDataSources = analysis.getFromDataSources();
    StructuredDataSource intoStructuredDataSource = analysis.getInto();
    String intoKafkaTopicName = analysis.getIntoKafkaTopicName();
    if (intoKafkaTopicName == null) {
        intoKafkaTopicName = intoStructuredDataSource.getName();
    }
    KsqlTopicSerDe intoTopicSerde = fromDataSources.get(0).getLeft().getKsqlTopic().getKsqlTopicSerDe();
    if (analysis.getIntoFormat() != null) {
        switch(analysis.getIntoFormat().toUpperCase()) {
            case DataSource.AVRO_SERDE_NAME:
                intoTopicSerde = new KsqlAvroTopicSerDe();
                break;
            case DataSource.JSON_SERDE_NAME:
                intoTopicSerde = new KsqlJsonTopicSerDe();
                break;
            case DataSource.DELIMITED_SERDE_NAME:
                intoTopicSerde = new KsqlDelimitedTopicSerDe();
                break;
            default:
                throw new KsqlException(String.format("Unsupported format: %s", analysis.getIntoFormat()));
        }
    } else {
        if (intoTopicSerde instanceof KsqlAvroTopicSerDe) {
            intoTopicSerde = new KsqlAvroTopicSerDe();
        }
    }
    KsqlTopic newIntoKsqlTopic = new KsqlTopic(intoKafkaTopicName, intoKafkaTopicName, intoTopicSerde);
    KsqlStream intoKsqlStream = new KsqlStream(sqlExpression, intoStructuredDataSource.getName(), null, null, null, newIntoKsqlTopic);
    analysis.setInto(intoKsqlStream);
}
Also used : StructuredDataSource(io.confluent.ksql.metastore.StructuredDataSource) KsqlStream(io.confluent.ksql.metastore.KsqlStream) KsqlDelimitedTopicSerDe(io.confluent.ksql.serde.delimited.KsqlDelimitedTopicSerDe) KsqlAvroTopicSerDe(io.confluent.ksql.serde.avro.KsqlAvroTopicSerDe) KsqlTopicSerDe(io.confluent.ksql.serde.KsqlTopicSerDe) KsqlJsonTopicSerDe(io.confluent.ksql.serde.json.KsqlJsonTopicSerDe) KsqlException(io.confluent.ksql.util.KsqlException) Pair(io.confluent.ksql.util.Pair) KsqlTopic(io.confluent.ksql.metastore.KsqlTopic)

Aggregations

KsqlTopicSerDe (io.confluent.ksql.serde.KsqlTopicSerDe)6 GenericRow (io.confluent.ksql.GenericRow)4 KsqlJsonTopicSerDe (io.confluent.ksql.serde.json.KsqlJsonTopicSerDe)3 SchemaKStream (io.confluent.ksql.structured.SchemaKStream)3 SchemaKTable (io.confluent.ksql.structured.SchemaKTable)3 KsqlException (io.confluent.ksql.util.KsqlException)3 ArrayList (java.util.ArrayList)3 MockSchemaRegistryClient (io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient)2 DereferenceExpression (io.confluent.ksql.parser.tree.DereferenceExpression)2 Expression (io.confluent.ksql.parser.tree.Expression)2 QualifiedNameReference (io.confluent.ksql.parser.tree.QualifiedNameReference)2 PlanNode (io.confluent.ksql.planner.plan.PlanNode)2 Pair (io.confluent.ksql.util.Pair)2 Test (org.junit.Test)2 KudafAggregator (io.confluent.ksql.function.udaf.KudafAggregator)1 KudafInitializer (io.confluent.ksql.function.udaf.KudafInitializer)1 KsqlStream (io.confluent.ksql.metastore.KsqlStream)1 KsqlTable (io.confluent.ksql.metastore.KsqlTable)1 KsqlTopic (io.confluent.ksql.metastore.KsqlTopic)1 StructuredDataSource (io.confluent.ksql.metastore.StructuredDataSource)1