Search in sources :

Example 51 with GenericRow

use of io.confluent.ksql.GenericRow in project ksql by confluentinc.

the class UdfIntTest method testApplyUdfsToColumns.

private void testApplyUdfsToColumns(String resultStreamName, String inputStreamName, DataSource.DataSourceSerDe dataSourceSerde) throws Exception {
    final String queryString = String.format("CREATE STREAM %s AS SELECT %s FROM %s WHERE %s;", resultStreamName, "ITEMID, ORDERUNITS*10, PRICEARRAY[0]+10, KEYVALUEMAP['key1']*KEYVALUEMAP['key2']+10, " + "PRICEARRAY[1]>1000", inputStreamName, "ORDERUNITS > 20 AND ITEMID LIKE '%_8'");
    ksqlContext.sql(queryString);
    Schema resultSchema = ksqlContext.getMetaStore().getSource(resultStreamName).getSchema();
    Map<String, GenericRow> expectedResults = Collections.singletonMap("8", new GenericRow(Arrays.asList(null, null, "ITEM_8", 800.0, 1110.0, 12.0, true)));
    Map<String, GenericRow> results = testHarness.consumeData(resultStreamName, resultSchema, 4, new StringDeserializer(), IntegrationTestHarness.RESULTS_POLL_MAX_TIME_MS, dataSourceSerde);
    assertThat(results, equalTo(expectedResults));
}
Also used : GenericRow(io.confluent.ksql.GenericRow) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) Schema(org.apache.kafka.connect.data.Schema)

Example 52 with GenericRow

use of io.confluent.ksql.GenericRow in project ksql by confluentinc.

the class UdfIntTest method testShouldCastSelectedColumns.

private void testShouldCastSelectedColumns(String resultStreamName, String inputStreamName, DataSource.DataSourceSerDe dataSourceSerde) throws Exception {
    final String selectColumns = " CAST (ORDERUNITS AS INTEGER), CAST( PRICEARRAY[1]>1000 AS STRING), CAST (SUBSTRING" + "(ITEMID, 5) AS DOUBLE), CAST(ORDERUNITS AS VARCHAR) ";
    final String queryString = String.format("CREATE STREAM %s AS SELECT %s FROM %s WHERE %s;", resultStreamName, selectColumns, inputStreamName, "ORDERUNITS > 20 AND ITEMID LIKE '%_8'");
    ksqlContext.sql(queryString);
    Schema resultSchema = ksqlContext.getMetaStore().getSource(resultStreamName).getSchema();
    Map<String, GenericRow> expectedResults = Collections.singletonMap("8", new GenericRow(Arrays.asList(null, null, 80, "true", 8.0, "80.0")));
    Map<String, GenericRow> results = testHarness.consumeData(resultStreamName, resultSchema, 4, new StringDeserializer(), IntegrationTestHarness.RESULTS_POLL_MAX_TIME_MS, dataSourceSerde);
    assertThat(results, equalTo(expectedResults));
}
Also used : GenericRow(io.confluent.ksql.GenericRow) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) Schema(org.apache.kafka.connect.data.Schema)

Example 53 with GenericRow

use of io.confluent.ksql.GenericRow in project ksql by confluentinc.

the class UdfIntTest method testApplyUdfsToColumnsDelimited.

@Test
public void testApplyUdfsToColumnsDelimited() throws Exception {
    final String testStreamName = "SelectUDFsStreamDelimited".toUpperCase();
    final String queryString = String.format("CREATE STREAM %s AS SELECT %s FROM %s WHERE %s;", testStreamName, "ID, DESCRIPTION", delimitedStreamName, "ID LIKE '%_1'");
    ksqlContext.sql(queryString);
    Map<String, GenericRow> expectedResults = Collections.singletonMap("ITEM_1", new GenericRow(Arrays.asList("ITEM_1", "home cinema")));
    Map<String, GenericRow> results = testHarness.consumeData(testStreamName, itemDataProvider.schema(), 1, new StringDeserializer(), IntegrationTestHarness.RESULTS_POLL_MAX_TIME_MS, DataSource.DataSourceSerDe.DELIMITED);
    assertThat(results, equalTo(expectedResults));
}
Also used : GenericRow(io.confluent.ksql.GenericRow) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) IntegrationTest(io.confluent.common.utils.IntegrationTest) Test(org.junit.Test)

Example 54 with GenericRow

use of io.confluent.ksql.GenericRow in project ksql by confluentinc.

the class WindowingIntTest method shouldAggregateTumblingWindow.

@Test
public void shouldAggregateTumblingWindow() throws Exception {
    testHarness.publishTestData(topicName, dataProvider, now);
    final String streamName = "TUMBLING_AGGTEST";
    final String queryString = String.format("CREATE TABLE %s AS SELECT %s FROM ORDERS WINDOW %s WHERE ITEMID = 'ITEM_1' GROUP BY ITEMID;", streamName, "ITEMID, COUNT(ITEMID), SUM(ORDERUNITS)", "TUMBLING ( SIZE 10 SECONDS)");
    ksqlContext.sql(queryString);
    Schema resultSchema = ksqlContext.getMetaStore().getSource(streamName).getSchema();
    final GenericRow expected = new GenericRow(Arrays.asList(null, null, "ITEM_1", 2, /**
     * 2 x items *
     */
    20.0));
    final Map<String, GenericRow> results = new HashMap<>();
    TestUtils.waitForCondition(() -> {
        final Map<Windowed<String>, GenericRow> windowedResults = testHarness.consumeData(streamName, resultSchema, 1, new TimeWindowedDeserializer<>(new StringDeserializer()), MAX_POLL_PER_ITERATION);
        updateResults(results, windowedResults);
        final GenericRow actual = results.get("ITEM_1");
        return expected.equals(actual);
    }, 60000, "didn't receive correct results within timeout");
    AdminClient adminClient = AdminClient.create(testHarness.ksqlConfig.getKsqlStreamConfigProps());
    KafkaTopicClient topicClient = new KafkaTopicClientImpl(adminClient);
    Set<String> topicBeforeCleanup = topicClient.listTopicNames();
    assertThat("Expected to have 5 topics instead have : " + topicBeforeCleanup.size(), topicBeforeCleanup.size(), equalTo(5));
    QueryMetadata queryMetadata = ksqlContext.getRunningQueries().iterator().next();
    queryMetadata.close();
    Set<String> topicsAfterCleanUp = topicClient.listTopicNames();
    assertThat("Expected to see 3 topics after clean up but seeing " + topicsAfterCleanUp.size(), topicsAfterCleanUp.size(), equalTo(3));
    assertThat(topicClient.getTopicCleanupPolicy(streamName), equalTo(KafkaTopicClient.TopicCleanupPolicy.DELETE));
}
Also used : QueryMetadata(io.confluent.ksql.util.QueryMetadata) HashMap(java.util.HashMap) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) Schema(org.apache.kafka.connect.data.Schema) GenericRow(io.confluent.ksql.GenericRow) Windowed(org.apache.kafka.streams.kstream.Windowed) KafkaTopicClient(io.confluent.ksql.util.KafkaTopicClient) KafkaTopicClientImpl(io.confluent.ksql.util.KafkaTopicClientImpl) AdminClient(org.apache.kafka.clients.admin.AdminClient) IntegrationTest(io.confluent.common.utils.IntegrationTest) Test(org.junit.Test)

Example 55 with GenericRow

use of io.confluent.ksql.GenericRow in project ksql by confluentinc.

the class SchemaKStream method groupBy.

public SchemaKGroupedStream groupBy(final Serde<String> keySerde, final Serde<GenericRow> valSerde, final List<Expression> groupByExpressions) {
    boolean rekey = rekeyRequired(groupByExpressions);
    if (!rekey) {
        KGroupedStream kgroupedStream = kstream.groupByKey(Serialized.with(keySerde, valSerde));
        return new SchemaKGroupedStream(schema, kgroupedStream, keyField, Collections.singletonList(this), functionRegistry, schemaRegistryClient);
    }
    // Collect the column indexes, and build the new key as <column1>+<column2>+...
    StringBuilder aggregateKeyName = new StringBuilder();
    List<Integer> newKeyIndexes = new ArrayList<>();
    boolean addSeparator = false;
    for (Expression groupByExpr : groupByExpressions) {
        if (addSeparator) {
            aggregateKeyName.append("|+|");
        } else {
            addSeparator = true;
        }
        aggregateKeyName.append(groupByExpr.toString());
        newKeyIndexes.add(SchemaUtil.getIndexInSchema(groupByExpr.toString(), getSchema()));
    }
    KGroupedStream kgroupedStream = kstream.filter((key, value) -> value != null).groupBy((key, value) -> {
        StringBuilder newKey = new StringBuilder();
        boolean addSeparator1 = false;
        for (int index : newKeyIndexes) {
            if (addSeparator1) {
                newKey.append("|+|");
            } else {
                addSeparator1 = true;
            }
            newKey.append(String.valueOf(value.getColumns().get(index)));
        }
        return newKey.toString();
    }, Serialized.with(keySerde, valSerde));
    // TODO: if the key is a prefix of the grouping columns then we can
    // use the repartition reflection hack to tell streams not to
    // repartition.
    Field newKeyField = new Field(aggregateKeyName.toString(), -1, Schema.STRING_SCHEMA);
    return new SchemaKGroupedStream(schema, kgroupedStream, newKeyField, Collections.singletonList(this), functionRegistry, schemaRegistryClient);
}
Also used : Arrays(java.util.Arrays) KGroupedStream(org.apache.kafka.streams.kstream.KGroupedStream) Produced(org.apache.kafka.streams.kstream.Produced) SchemaRegistryClient(io.confluent.kafka.schemaregistry.client.SchemaRegistryClient) Serialized(org.apache.kafka.streams.kstream.Serialized) KStream(org.apache.kafka.streams.kstream.KStream) Joined(org.apache.kafka.streams.kstream.Joined) Schema(org.apache.kafka.connect.data.Schema) ArrayList(java.util.ArrayList) Pair(io.confluent.ksql.util.Pair) Serde(org.apache.kafka.common.serialization.Serde) DereferenceExpression(io.confluent.ksql.parser.tree.DereferenceExpression) ExpressionMetadata(io.confluent.ksql.util.ExpressionMetadata) Serdes(org.apache.kafka.common.serialization.Serdes) CodeGenRunner(io.confluent.ksql.codegen.CodeGenRunner) SchemaUtil(io.confluent.ksql.util.SchemaUtil) OutputNode(io.confluent.ksql.planner.plan.OutputNode) Field(org.apache.kafka.connect.data.Field) FunctionRegistry(io.confluent.ksql.function.FunctionRegistry) Set(java.util.Set) KsqlConfig(io.confluent.ksql.util.KsqlConfig) Expression(io.confluent.ksql.parser.tree.Expression) List(java.util.List) ValueJoiner(org.apache.kafka.streams.kstream.ValueJoiner) GenericRow(io.confluent.ksql.GenericRow) Optional(java.util.Optional) KsqlException(io.confluent.ksql.util.KsqlException) SchemaBuilder(org.apache.kafka.connect.data.SchemaBuilder) KsqlTopicSerDe(io.confluent.ksql.serde.KsqlTopicSerDe) Collections(java.util.Collections) GenericRowValueTypeEnforcer(io.confluent.ksql.util.GenericRowValueTypeEnforcer) Field(org.apache.kafka.connect.data.Field) KGroupedStream(org.apache.kafka.streams.kstream.KGroupedStream) DereferenceExpression(io.confluent.ksql.parser.tree.DereferenceExpression) Expression(io.confluent.ksql.parser.tree.Expression) ArrayList(java.util.ArrayList)

Aggregations

GenericRow (io.confluent.ksql.GenericRow)65 Test (org.junit.Test)38 HashMap (java.util.HashMap)27 Schema (org.apache.kafka.connect.data.Schema)19 List (java.util.List)15 StringDeserializer (org.apache.kafka.common.serialization.StringDeserializer)15 ArrayList (java.util.ArrayList)11 MockSchemaRegistryClient (io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient)9 IntegrationTest (io.confluent.common.utils.IntegrationTest)8 SchemaRegistryClient (io.confluent.kafka.schemaregistry.client.SchemaRegistryClient)8 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 KsqlTopicSerDe (io.confluent.ksql.serde.KsqlTopicSerDe)5 KafkaTopicClient (io.confluent.ksql.util.KafkaTopicClient)5 KafkaTopicClientImpl (io.confluent.ksql.util.KafkaTopicClientImpl)5 KsqlConfig (io.confluent.ksql.util.KsqlConfig)5 Map (java.util.Map)5 GenericRecord (org.apache.avro.generic.GenericRecord)4 Windowed (org.apache.kafka.streams.kstream.Windowed)4 KafkaAvroDeserializer (io.confluent.kafka.serializers.KafkaAvroDeserializer)3 DereferenceExpression (io.confluent.ksql.parser.tree.DereferenceExpression)3