Search in sources :

Example 76 with LogicalSchema

use of io.confluent.ksql.schema.ksql.LogicalSchema in project ksql by confluentinc.

the class KsMaterializationFunctionalTest method shouldHandleHavingClause.

@Test
public void shouldHandleHavingClause() {
    // Note: HAVING clause are handled centrally by KsqlMaterialization. This logic will have been
    // installed as part of building the below statement:
    // Given:
    final PersistentQueryMetadata query = executeQuery("CREATE TABLE " + output + " AS" + " SELECT USERID, COUNT(*) AS COUNT FROM " + USER_TABLE + " GROUP BY USERID" + " HAVING SUM(REGISTERTIME) > 2;");
    final LogicalSchema schema = schema("COUNT", SqlTypes.BIGINT);
    final int matches = (int) USER_DATA_PROVIDER.data().values().stream().filter(row -> ((Long) row.get(0)) > 2).count();
    final Map<String, GenericRow> rows = waitForUniqueUserRows(matches, STRING_DESERIALIZER, schema);
    // When:
    final Materialization materialization = query.getMaterialization(queryId, contextStacker).get();
    // Then:
    final MaterializedTable table = materialization.nonWindowed();
    rows.forEach((rowKey, value) -> {
        // Rows passing the HAVING clause:
        final GenericKey key = genericKey(rowKey);
        final List<Row> rowList = withRetry(() -> Lists.newArrayList(table.get(key, PARTITION)));
        assertThat(rowList.size(), is(1));
        assertThat(rowList.get(0).schema(), is(schema));
        assertThat(rowList.get(0).key(), is(key));
        assertThat(rowList.get(0).value(), is(value));
    });
    USER_DATA_PROVIDER.data().entries().stream().filter(e -> !rows.containsKey(e.getKey().get(0))).forEach(e -> {
        // Rows filtered by the HAVING clause:
        final List<Row> rowList = withRetry(() -> Lists.newArrayList(table.get(e.getKey(), PARTITION)));
        assertThat(rowList.isEmpty(), is(true));
    });
}
Also used : GenericRow(io.confluent.ksql.GenericRow) PhysicalSchema(io.confluent.ksql.schema.ksql.PhysicalSchema) ColumnName(io.confluent.ksql.name.ColumnName) Row(io.confluent.ksql.execution.streams.materialization.Row) AssertEventually.assertThatEventually(io.confluent.ksql.test.util.AssertEventually.assertThatEventually) WindowedSerdes(org.apache.kafka.streams.kstream.WindowedSerdes) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) Duration(java.time.Duration) Map(java.util.Map) WindowType(io.confluent.ksql.model.WindowType) QueryId(io.confluent.ksql.query.QueryId) RetryOnException(io.confluent.ksql.test.util.AssertEventually.RetryOnException) ClassRule(org.junit.ClassRule) PersistentQueryMetadata(io.confluent.ksql.util.PersistentQueryMetadata) ZooKeeperClientException(kafka.zookeeper.ZooKeeperClientException) QueryMetadata(io.confluent.ksql.util.QueryMetadata) Matchers.notNullValue(org.hamcrest.Matchers.notNullValue) Range(com.google.common.collect.Range) Set(java.util.Set) Window(io.confluent.ksql.Window) Instant(java.time.Instant) Category(org.junit.experimental.categories.Category) LogicalSchema(io.confluent.ksql.schema.ksql.LogicalSchema) Collectors(java.util.stream.Collectors) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) List(java.util.List) GenericKey.genericKey(io.confluent.ksql.GenericKey.genericKey) Stream(java.util.stream.Stream) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Optional(java.util.Optional) Matchers.is(org.hamcrest.Matchers.is) UserDataProvider(io.confluent.ksql.util.UserDataProvider) Retry(io.confluent.ksql.integration.Retry) StreamsConfig(org.apache.kafka.streams.StreamsConfig) PageViewDataProvider(io.confluent.ksql.util.PageViewDataProvider) BeforeClass(org.junit.BeforeClass) MaterializedTable(io.confluent.ksql.execution.streams.materialization.MaterializedTable) JSON(io.confluent.ksql.serde.FormatFactory.JSON) QueryContext(io.confluent.ksql.execution.context.QueryContext) IntegrationTest(org.apache.kafka.test.IntegrationTest) KsqlIdentifierTestUtil(io.confluent.ksql.test.util.KsqlIdentifierTestUtil) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) WindowedRow(io.confluent.ksql.execution.streams.materialization.WindowedRow) Windowed(org.apache.kafka.streams.kstream.Windowed) Timeout(org.junit.rules.Timeout) Matchers.hasSize(org.hamcrest.Matchers.hasSize) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) SqlType(io.confluent.ksql.schema.ksql.types.SqlType) SerdeFeatures(io.confluent.ksql.serde.SerdeFeatures) Deserializer(org.apache.kafka.common.serialization.Deserializer) Before(org.junit.Before) TestKsqlContext(io.confluent.ksql.integration.TestKsqlContext) Matchers.empty(org.hamcrest.Matchers.empty) LongStream(java.util.stream.LongStream) Iterator(java.util.Iterator) Lists(org.apache.commons.compress.utils.Lists) KAFKA(io.confluent.ksql.serde.FormatFactory.KAFKA) MaterializedWindowedTable(io.confluent.ksql.execution.streams.materialization.MaterializedWindowedTable) Test(org.junit.Test) IntegrationTestHarness(io.confluent.ksql.integration.IntegrationTestHarness) TimeUnit(java.util.concurrent.TimeUnit) RuleChain(org.junit.rules.RuleChain) Rule(org.junit.Rule) GenericRow(io.confluent.ksql.GenericRow) Format(io.confluent.ksql.serde.Format) GenericKey(io.confluent.ksql.GenericKey) Materialization(io.confluent.ksql.execution.streams.materialization.Materialization) SqlTypes(io.confluent.ksql.schema.ksql.types.SqlTypes) Materialization(io.confluent.ksql.execution.streams.materialization.Materialization) LogicalSchema(io.confluent.ksql.schema.ksql.LogicalSchema) GenericKey(io.confluent.ksql.GenericKey) Row(io.confluent.ksql.execution.streams.materialization.Row) WindowedRow(io.confluent.ksql.execution.streams.materialization.WindowedRow) GenericRow(io.confluent.ksql.GenericRow) MaterializedTable(io.confluent.ksql.execution.streams.materialization.MaterializedTable) PersistentQueryMetadata(io.confluent.ksql.util.PersistentQueryMetadata) IntegrationTest(org.apache.kafka.test.IntegrationTest) Test(org.junit.Test)

Example 77 with LogicalSchema

use of io.confluent.ksql.schema.ksql.LogicalSchema in project ksql by confluentinc.

the class KsMaterializationFunctionalTest method shouldQueryMaterializedTableForTumblingWindowed.

@Test
public void shouldQueryMaterializedTableForTumblingWindowed() {
    // Given:
    final PersistentQueryMetadata query = executeQuery("CREATE TABLE " + output + " AS" + " SELECT USERID, COUNT(*) AS COUNT FROM " + USER_STREAM + " WINDOW TUMBLING (SIZE " + WINDOW_SIZE.getSeconds() + " SECONDS)" + " GROUP BY USERID;");
    final LogicalSchema schema = schema("COUNT", SqlTypes.BIGINT);
    final Map<Windowed<String>, GenericRow> rows = waitForUniqueUserRows(TIME_WINDOWED_DESERIALIZER, schema);
    // When:
    final Materialization materialization = query.getMaterialization(queryId, contextStacker).get();
    // Then:
    assertThat(materialization.windowType(), is(Optional.of(WindowType.TUMBLING)));
    final MaterializedWindowedTable table = materialization.windowed();
    rows.forEach((k, v) -> {
        final Window w = Window.of(k.window().startTime(), k.window().endTime());
        final GenericKey key = genericKey(k.key());
        final List<WindowedRow> resultAtWindowStart = withRetry(() -> Lists.newArrayList(table.get(key, PARTITION, Range.singleton(w.start()), Range.all())));
        assertThat("at exact window start", resultAtWindowStart, hasSize(1));
        assertThat(resultAtWindowStart.get(0).schema(), is(schema));
        assertThat(resultAtWindowStart.get(0).window(), is(Optional.of(w)));
        assertThat(resultAtWindowStart.get(0).key(), is(key));
        assertThat(resultAtWindowStart.get(0).value(), is(v));
        final List<WindowedRow> resultAtWindowEnd = withRetry(() -> Lists.newArrayList(table.get(key, PARTITION, Range.all(), Range.singleton(w.end()))));
        assertThat("at exact window end", resultAtWindowEnd, hasSize(1));
        final List<WindowedRow> resultFromRange = withRetry(() -> withRetry(() -> Lists.newArrayList(table.get(key, PARTITION, Range.closed(w.start().minusMillis(1), w.start().plusMillis(1)), Range.all()))));
        assertThat("range including window start", resultFromRange, is(resultAtWindowStart));
        final List<WindowedRow> resultPast = withRetry(() -> Lists.newArrayList(table.get(key, PARTITION, Range.closed(w.start().plusMillis(1), w.start().plusMillis(1)), Range.all())));
        assertThat("past start", resultPast, is(empty()));
    });
}
Also used : Windowed(org.apache.kafka.streams.kstream.Windowed) GenericRow(io.confluent.ksql.GenericRow) Window(io.confluent.ksql.Window) Materialization(io.confluent.ksql.execution.streams.materialization.Materialization) MaterializedWindowedTable(io.confluent.ksql.execution.streams.materialization.MaterializedWindowedTable) LogicalSchema(io.confluent.ksql.schema.ksql.LogicalSchema) GenericKey(io.confluent.ksql.GenericKey) WindowedRow(io.confluent.ksql.execution.streams.materialization.WindowedRow) PersistentQueryMetadata(io.confluent.ksql.util.PersistentQueryMetadata) IntegrationTest(org.apache.kafka.test.IntegrationTest) Test(org.junit.Test)

Example 78 with LogicalSchema

use of io.confluent.ksql.schema.ksql.LogicalSchema in project ksql by confluentinc.

the class JsonFormatTest method produceInitData.

private static void produceInitData() {
    TEST_HARNESS.produceRows(inputTopic, ORDER_DATA_PROVIDER, KAFKA, JSON);
    final LogicalSchema messageSchema = LogicalSchema.builder().keyColumn(SystemColumns.ROWKEY_NAME, SqlTypes.STRING).valueColumn(ColumnName.of("MESSAGE"), SqlTypes.STRING).build();
    final GenericKey messageKey = genericKey("1");
    final GenericRow messageRow = genericRow("{\"log\":{\"@timestamp\":\"2017-05-30T16:44:22.175Z\",\"@version\":\"1\"," + "\"caasVersion\":\"0.0.2\",\"cloud\":\"aws\",\"logs\":[{\"entry\":\"first\"}],\"clusterId\":\"cp99\",\"clusterName\":\"kafka\",\"cpComponentId\":\"kafka\",\"host\":\"kafka-1-wwl0p\",\"k8sId\":\"k8s13\",\"k8sName\":\"perf\",\"level\":\"ERROR\",\"logger\":\"kafka.server.ReplicaFetcherThread\",\"message\":\"Found invalid messages during fetch for partition [foo512,172] offset 0 error Record is corrupt (stored crc = 1321230880, computed crc = 1139143803)\",\"networkId\":\"vpc-d8c7a9bf\",\"region\":\"us-west-2\",\"serverId\":\"1\",\"skuId\":\"sku5\",\"source\":\"kafka\",\"tenantId\":\"t47\",\"tenantName\":\"perf-test\",\"thread\":\"ReplicaFetcherThread-0-2\",\"zone\":\"us-west-2a\"},\"stream\":\"stdout\",\"time\":2017}");
    final Map<GenericKey, GenericRow> records = new HashMap<>();
    records.put(messageKey, messageRow);
    final PhysicalSchema schema = PhysicalSchema.from(messageSchema, SerdeFeatures.of(), SerdeFeatures.of());
    TEST_HARNESS.produceRows(messageLogTopic, records.entrySet(), schema, KAFKA, JSON);
}
Also used : GenericRow(io.confluent.ksql.GenericRow) PhysicalSchema(io.confluent.ksql.schema.ksql.PhysicalSchema) HashMap(java.util.HashMap) LogicalSchema(io.confluent.ksql.schema.ksql.LogicalSchema) GenericKey(io.confluent.ksql.GenericKey)

Example 79 with LogicalSchema

use of io.confluent.ksql.schema.ksql.LogicalSchema in project ksql by confluentinc.

the class ProjectOperatorTest method shouldProjectOnlyWindowStartWindowed.

@Test
public void shouldProjectOnlyWindowStartWindowed() {
    // Given:
    final LogicalSchema schema = LogicalSchema.builder().valueColumn(SystemColumns.WINDOWSTART_NAME, SqlTypes.BIGINT).build();
    when(logicalNode.getAddAdditionalColumnsToIntermediateSchema()).thenReturn(true);
    when(logicalNode.getSchema()).thenReturn(schema);
    when(logicalNode.getCompiledSelectExpressions()).thenReturn(Collections.emptyList());
    final ProjectOperator projectOperator = new ProjectOperator(logger, logicalNode, selectValueMapperFactorySupplier);
    projectOperator.addChild(child);
    final QueryRowImpl windowedRow = QueryRowImpl.of(WINDOWED_INTERMEDIATE_SCHEMA_WITH_PSEUDO, A_KEY, Optional.of(A_WINDOW), GenericRow.genericRow("a", "b", A_ROWTIME, "k", A_WINDOW.start().toEpochMilli(), A_WINDOW.end().toEpochMilli()), A_ROWTIME);
    when(child.next()).thenReturn(windowedRow);
    when(selectValueMapperFactorySupplier.create(any(), any())).thenReturn(selectValueMapper);
    when(selectValueMapper.getTransformer(logger)).thenReturn(transformer);
    when(transformer.transform(A_KEY, windowedRow.value(), new PullProcessingContext(12335L))).thenReturn(GenericRow.genericRow(A_WINDOW.start().toEpochMilli()));
    projectOperator.open();
    // When:
    QueryRow result = (QueryRow) projectOperator.next();
    // Then:
    assertThat(result.value().values(), is(ImmutableList.of(A_WINDOW.start().toEpochMilli())));
}
Also used : QueryRow(io.confluent.ksql.physical.common.QueryRow) QueryRowImpl(io.confluent.ksql.physical.common.QueryRowImpl) LogicalSchema(io.confluent.ksql.schema.ksql.LogicalSchema) PullProcessingContext(io.confluent.ksql.execution.streams.materialization.PullProcessingContext) Test(org.junit.Test)

Example 80 with LogicalSchema

use of io.confluent.ksql.schema.ksql.LogicalSchema in project ksql by confluentinc.

the class ProjectOperatorTest method shouldProjectKeyAndValueNonWindowed.

@Test
public void shouldProjectKeyAndValueNonWindowed() {
    // Given:
    final LogicalSchema schema = LogicalSchema.builder().keyColumn(ColumnName.of("k0"), SqlTypes.STRING).valueColumn(ColumnName.of("v1"), SqlTypes.STRING).build();
    when(logicalNode.getAddAdditionalColumnsToIntermediateSchema()).thenReturn(true);
    when(logicalNode.getSchema()).thenReturn(schema);
    when(logicalNode.getCompiledSelectExpressions()).thenReturn(Collections.emptyList());
    final ProjectOperator projectOperator = new ProjectOperator(logger, logicalNode, selectValueMapperFactorySupplier);
    projectOperator.addChild(child);
    final QueryRowImpl row = QueryRowImpl.of(INTERMEDIATE_SCHEMA_WITH_PSEUDO, A_KEY, Optional.empty(), GenericRow.genericRow("a", "b", A_ROWTIME, "k"), A_ROWTIME);
    when(child.next()).thenReturn(row);
    when(selectValueMapperFactorySupplier.create(any(), any())).thenReturn(selectValueMapper);
    when(selectValueMapper.getTransformer(logger)).thenReturn(transformer);
    when(transformer.transform(A_KEY, row.value(), new PullProcessingContext(12335L))).thenReturn(GenericRow.genericRow("k", "b"));
    projectOperator.open();
    // When:
    QueryRow result = (QueryRow) projectOperator.next();
    // Then:
    final List<Object> expected = new ArrayList<>(row.key().values());
    expected.add(row.value().values().get(1));
    assertThat(result.value().values(), is(expected));
}
Also used : QueryRow(io.confluent.ksql.physical.common.QueryRow) QueryRowImpl(io.confluent.ksql.physical.common.QueryRowImpl) ArrayList(java.util.ArrayList) LogicalSchema(io.confluent.ksql.schema.ksql.LogicalSchema) PullProcessingContext(io.confluent.ksql.execution.streams.materialization.PullProcessingContext) Test(org.junit.Test)

Aggregations

LogicalSchema (io.confluent.ksql.schema.ksql.LogicalSchema)223 Test (org.junit.Test)152 Expression (io.confluent.ksql.execution.expression.tree.Expression)44 ColumnName (io.confluent.ksql.name.ColumnName)31 GenericRow (io.confluent.ksql.GenericRow)30 UnqualifiedColumnReferenceExp (io.confluent.ksql.execution.expression.tree.UnqualifiedColumnReferenceExp)29 KsqlException (io.confluent.ksql.util.KsqlException)27 GenericKey (io.confluent.ksql.GenericKey)20 StringLiteral (io.confluent.ksql.execution.expression.tree.StringLiteral)19 List (java.util.List)16 TimestampColumn (io.confluent.ksql.execution.timestamp.TimestampColumn)14 SqlType (io.confluent.ksql.schema.ksql.types.SqlType)14 Optional (java.util.Optional)14 Collectors (java.util.stream.Collectors)14 QueryContext (io.confluent.ksql.execution.context.QueryContext)13 ArithmeticBinaryExpression (io.confluent.ksql.execution.expression.tree.ArithmeticBinaryExpression)12 DereferenceExpression (io.confluent.ksql.execution.expression.tree.DereferenceExpression)12 SelectExpression (io.confluent.ksql.execution.plan.SelectExpression)12 Column (io.confluent.ksql.schema.ksql.Column)12 ComparisonExpression (io.confluent.ksql.execution.expression.tree.ComparisonExpression)11