Search in sources :

Example 6 with KeyFormat

use of io.confluent.ksql.serde.KeyFormat in project ksql by confluentinc.

the class JoinNode method resolveKeyFormats.

/**
 * Determines the key format of the join.
 *
 * <p>For now, the left key format is the preferred join key format unless the
 * right source is not already being repartitioned and the left source is.
 *
 * @see <a href="https://github.com/confluentinc/ksql/blob/master/design-proposals/klip-33-key-format.md">KLIP-33</a>
 */
public void resolveKeyFormats() {
    final KeyFormat joinKeyFormat = getPreferredKeyFormat().orElseGet(this::getDefaultSourceKeyFormat);
    setKeyFormat(joinKeyFormat);
}
Also used : KeyFormat(io.confluent.ksql.serde.KeyFormat)

Example 7 with KeyFormat

use of io.confluent.ksql.serde.KeyFormat in project ksql by confluentinc.

the class MetaStoreFixture method getNewMetaStore.

public static MutableMetaStore getNewMetaStore(final FunctionRegistry functionRegistry, final ValueFormat valueFormat) {
    final MutableMetaStore metaStore = new MetaStoreImpl(functionRegistry);
    final KeyFormat keyFormat = KeyFormat.nonWindowed(FormatInfo.of(FormatFactory.KAFKA.name()), SerdeFeatures.of());
    final LogicalSchema test1Schema = LogicalSchema.builder().keyColumn(ColumnName.of("COL0"), SqlTypes.BIGINT).valueColumn(ColumnName.of("COL1"), SqlTypes.STRING).valueColumn(ColumnName.of("COL2"), SqlTypes.STRING).valueColumn(ColumnName.of("COL3"), SqlTypes.DOUBLE).valueColumn(ColumnName.of("COL4"), SqlTypes.array(SqlTypes.DOUBLE)).valueColumn(ColumnName.of("COL5"), SqlTypes.map(SqlTypes.STRING, SqlTypes.DOUBLE)).headerColumn(ColumnName.of("HEAD"), Optional.empty()).build();
    final KsqlTopic ksqlTopic0 = new KsqlTopic("test0", keyFormat, valueFormat);
    final KsqlStream<?> ksqlStream0 = new KsqlStream<>("sqlexpression", SourceName.of("TEST0"), test1Schema, Optional.empty(), false, ksqlTopic0, false);
    metaStore.putSource(ksqlStream0, false);
    final KsqlTopic ksqlTopic1 = new KsqlTopic("test1", keyFormat, valueFormat);
    final KsqlStream<?> ksqlStream1 = new KsqlStream<>("sqlexpression", SourceName.of("TEST1"), test1Schema, Optional.empty(), false, ksqlTopic1, false);
    metaStore.putSource(ksqlStream1, false);
    final LogicalSchema test2Schema = LogicalSchema.builder().keyColumn(ColumnName.of("COL0"), SqlTypes.BIGINT).valueColumn(ColumnName.of("COL1"), SqlTypes.STRING).valueColumn(ColumnName.of("COL2"), SqlTypes.STRING).valueColumn(ColumnName.of("COL3"), SqlTypes.DOUBLE).valueColumn(ColumnName.of("COL4"), SqlTypes.BOOLEAN).build();
    final KsqlTopic ksqlTopic2 = new KsqlTopic("test2", keyFormat, valueFormat);
    final KsqlTable<String> ksqlTable = new KsqlTable<>("sqlexpression", SourceName.of("TEST2"), test2Schema, Optional.empty(), false, ksqlTopic2, false);
    metaStore.putSource(ksqlTable, false);
    final SqlType addressSchema = SqlTypes.struct().field("NUMBER", SqlTypes.BIGINT).field("STREET", SqlTypes.STRING).field("CITY", SqlTypes.STRING).field("STATE", SqlTypes.STRING).field("ZIPCODE", SqlTypes.BIGINT).build();
    final SqlType categorySchema = SqlTypes.struct().field("ID", SqlTypes.BIGINT).field("NAME", SqlTypes.STRING).build();
    final SqlType itemInfoSchema = SqlTypes.struct().field("ITEMID", SqlTypes.BIGINT).field("NAME", SqlTypes.STRING).field("CATEGORY", categorySchema).build();
    final LogicalSchema ordersSchema = LogicalSchema.builder().keyColumn(ColumnName.of("ORDERTIME"), SqlTypes.BIGINT).valueColumn(ColumnName.of("ORDERID"), SqlTypes.BIGINT).valueColumn(ColumnName.of("ITEMID"), SqlTypes.STRING).valueColumn(ColumnName.of("ITEMINFO"), itemInfoSchema).valueColumn(ColumnName.of("ORDERUNITS"), SqlTypes.INTEGER).valueColumn(ColumnName.of("ARRAYCOL"), SqlTypes.array(SqlTypes.DOUBLE)).valueColumn(ColumnName.of("MAPCOL"), SqlTypes.map(SqlTypes.STRING, SqlTypes.DOUBLE)).valueColumn(ColumnName.of("ADDRESS"), addressSchema).valueColumn(ColumnName.of("TIMESTAMPCOL"), SqlTypes.TIMESTAMP).valueColumn(ColumnName.of("TIMECOL"), SqlTypes.TIME).valueColumn(ColumnName.of("DATECOL"), SqlTypes.DATE).valueColumn(ColumnName.of("BYTESCOL"), SqlTypes.BYTES).build();
    final KsqlTopic ksqlTopicOrders = new KsqlTopic("orders_topic", keyFormat, valueFormat);
    final KsqlStream<?> ksqlStreamOrders = new KsqlStream<>("sqlexpression", SourceName.of("ORDERS"), ordersSchema, Optional.empty(), false, ksqlTopicOrders, false);
    metaStore.putSource(ksqlStreamOrders, false);
    final LogicalSchema testTable3 = LogicalSchema.builder().keyColumn(ColumnName.of("COL0"), SqlTypes.BIGINT).valueColumn(ColumnName.of("COL1"), SqlTypes.STRING).valueColumn(ColumnName.of("COL2"), SqlTypes.STRING).valueColumn(ColumnName.of("COL3"), SqlTypes.DOUBLE).valueColumn(ColumnName.of("COL4"), SqlTypes.BOOLEAN).build();
    final KsqlTopic ksqlTopic3 = new KsqlTopic("test3", keyFormat, valueFormat);
    final KsqlTable<String> ksqlTable3 = new KsqlTable<>("sqlexpression", SourceName.of("TEST3"), testTable3, Optional.empty(), false, ksqlTopic3, false);
    metaStore.putSource(ksqlTable3, false);
    final SqlType nestedOrdersSchema = SqlTypes.struct().field("ORDERTIME", SqlTypes.BIGINT).field("ORDERID", SqlTypes.BIGINT).field("ITEMID", SqlTypes.STRING).field("ITEMINFO", itemInfoSchema).field("ORDERUNITS", SqlTypes.INTEGER).field("ARRAYCOL", SqlTypes.array(SqlTypes.DOUBLE)).field("MAPCOL", SqlTypes.map(SqlTypes.STRING, SqlTypes.DOUBLE)).field("ADDRESS", addressSchema).build();
    final LogicalSchema nestedArrayStructMapSchema = LogicalSchema.builder().keyColumn(ColumnName.of("K"), SqlTypes.STRING).valueColumn(ColumnName.of("ARRAYCOL"), SqlTypes.array(itemInfoSchema)).valueColumn(ColumnName.of("MAPCOL"), SqlTypes.map(SqlTypes.STRING, itemInfoSchema)).valueColumn(ColumnName.of("NESTED_ORDER_COL"), nestedOrdersSchema).valueColumn(ColumnName.of("ITEM"), itemInfoSchema).build();
    final KsqlTopic nestedArrayStructMapTopic = new KsqlTopic("NestedArrayStructMap_topic", keyFormat, valueFormat);
    final KsqlStream<?> nestedArrayStructMapOrders = new KsqlStream<>("sqlexpression", SourceName.of("NESTED_STREAM"), nestedArrayStructMapSchema, Optional.empty(), false, nestedArrayStructMapTopic, false);
    metaStore.putSource(nestedArrayStructMapOrders, false);
    final KsqlTopic ksqlTopic4 = new KsqlTopic("test4", keyFormat, valueFormat);
    final KsqlStream<?> ksqlStream4 = new KsqlStream<>("sqlexpression4", SourceName.of("TEST4"), test1Schema, Optional.empty(), false, ksqlTopic4, false);
    metaStore.putSource(ksqlStream4, false);
    final LogicalSchema sensorReadingsSchema = LogicalSchema.builder().keyColumn(ColumnName.of("ID"), SqlTypes.BIGINT).valueColumn(ColumnName.of("SENSOR_NAME"), SqlTypes.STRING).valueColumn(ColumnName.of("ARR1"), SqlTypes.array(SqlTypes.BIGINT)).valueColumn(ColumnName.of("ARR2"), SqlTypes.array(SqlTypes.STRING)).build();
    final KsqlTopic ksqlTopicSensorReadings = new KsqlTopic("sensor_readings_topic", keyFormat, valueFormat);
    final KsqlStream<?> ksqlStreamSensorReadings = new KsqlStream<>("sqlexpression", SourceName.of("SENSOR_READINGS"), sensorReadingsSchema, Optional.empty(), false, ksqlTopicSensorReadings, false);
    metaStore.putSource(ksqlStreamSensorReadings, false);
    final LogicalSchema testTable5 = LogicalSchema.builder().keyColumn(ColumnName.of("A"), SqlTypes.BOOLEAN).valueColumn(ColumnName.of("B"), SqlTypes.BOOLEAN).valueColumn(ColumnName.of("C"), SqlTypes.BOOLEAN).valueColumn(ColumnName.of("D"), SqlTypes.BOOLEAN).valueColumn(ColumnName.of("E"), SqlTypes.BOOLEAN).valueColumn(ColumnName.of("F"), SqlTypes.BOOLEAN).valueColumn(ColumnName.of("G"), SqlTypes.BOOLEAN).build();
    final KsqlTopic ksqlTopic5 = new KsqlTopic("test5", keyFormat, valueFormat);
    final KsqlTable<String> ksqlTable5 = new KsqlTable<>("sqlexpression", SourceName.of("TEST5"), testTable5, Optional.empty(), false, ksqlTopic5, false);
    metaStore.putSource(ksqlTable5, false);
    return metaStore;
}
Also used : KsqlStream(io.confluent.ksql.metastore.model.KsqlStream) MetaStoreImpl(io.confluent.ksql.metastore.MetaStoreImpl) KsqlTable(io.confluent.ksql.metastore.model.KsqlTable) MutableMetaStore(io.confluent.ksql.metastore.MutableMetaStore) LogicalSchema(io.confluent.ksql.schema.ksql.LogicalSchema) SqlType(io.confluent.ksql.schema.ksql.types.SqlType) KeyFormat(io.confluent.ksql.serde.KeyFormat) KsqlTopic(io.confluent.ksql.execution.ddl.commands.KsqlTopic)

Example 8 with KeyFormat

use of io.confluent.ksql.serde.KeyFormat in project ksql by confluentinc.

the class ScalablePushRegistryTest method setUp.

@Before
public void setUp() {
    when(ksqlTopic.getKafkaTopicName()).thenReturn(TOPIC);
    when(kafkaConsumerFactory.create(any(), any(), any(), any(), any(), any())).thenReturn(kafkaConsumer);
    catchupCoordinator = new TestCatchupCoordinator();
    latestConsumer = new TestLatestConsumer(TOPIC, false, SCHEMA, kafkaConsumer, catchupCoordinator, assignment -> {
    }, ksqlConfig, Clock.systemUTC());
    latestConsumer2 = new TestLatestConsumer(TOPIC, false, SCHEMA, kafkaConsumer, catchupCoordinator, assignment -> {
    }, ksqlConfig, Clock.systemUTC());
    catchupConsumer = new TestCatchupConsumer(TOPIC, false, SCHEMA, kafkaConsumer, () -> latestConsumer, catchupCoordinator, pushOffsetRange, Clock.systemUTC(), pq -> {
    });
    when(latestConsumerFactory.create(any(), anyBoolean(), any(), any(), any(), any(), any(), any())).thenReturn(latestConsumer, latestConsumer2);
    when(catchupConsumerFactory.create(any(), anyBoolean(), any(), any(), any(), any(), any(), any(), anyLong(), any())).thenReturn(catchupConsumer);
    when(ksqlTopic.getKeyFormat()).thenReturn(keyFormat);
    when(keyFormat.isWindowed()).thenReturn(false);
    realExecutorService = Executors.newFixedThreadPool(2);
    doAnswer(a -> {
        final Runnable runnable = a.getArgument(0);
        startLatestRunnable.set(runnable);
        realExecutorService.submit(runnable);
        return null;
    }).when(executorService).submit(any(Runnable.class));
    doAnswer(a -> {
        final Runnable runnable = a.getArgument(0);
        realExecutorService.submit(runnable);
        return null;
    }).when(catchupService).submit(any(Runnable.class));
    when(processingQueue.getQueryId()).thenReturn(new QueryId("q1"));
    when(processingQueue2.getQueryId()).thenReturn(new QueryId("q2"));
    registry = new ScalablePushRegistry(locator, SCHEMA, false, ImmutableMap.of(), ksqlTopic, serviceContext, ksqlConfig, SOURCE_APP_ID, kafkaConsumerFactory, latestConsumerFactory, catchupConsumerFactory, executorService, catchupService);
    when(ksqlConfig.getInt(KsqlConfig.KSQL_QUERY_PUSH_V2_MAX_CATCHUP_CONSUMERS)).thenReturn(10);
}
Also used : CatchupCoordinator(io.confluent.ksql.physical.scalablepush.consumer.CatchupCoordinator) ColumnName(io.confluent.ksql.name.ColumnName) AssertEventually.assertThatEventually(io.confluent.ksql.test.util.AssertEventually.assertThatEventually) ServiceContext(io.confluent.ksql.services.ServiceContext) ArgumentMatchers.contains(org.mockito.ArgumentMatchers.contains) PushLocator(io.confluent.ksql.physical.scalablepush.locator.PushLocator) Mockito.doThrow(org.mockito.Mockito.doThrow) CatchupConsumer(io.confluent.ksql.physical.scalablepush.consumer.CatchupConsumer) Mockito.doAnswer(org.mockito.Mockito.doAnswer) After(org.junit.After) QueryId(io.confluent.ksql.query.QueryId) Mockito.doReturn(org.mockito.Mockito.doReturn) TopicPartition(org.apache.kafka.common.TopicPartition) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) KsqlConfig(io.confluent.ksql.util.KsqlConfig) LogicalSchema(io.confluent.ksql.schema.ksql.LogicalSchema) Executors(java.util.concurrent.Executors) CatchupMetadata(io.confluent.ksql.physical.scalablepush.ScalablePushRegistry.CatchupMetadata) Optional(java.util.Optional) Matchers.is(org.hamcrest.Matchers.is) Matchers.containsString(org.hamcrest.Matchers.containsString) MockitoJUnitRunner(org.mockito.junit.MockitoJUnitRunner) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) StreamsConfig(org.apache.kafka.streams.StreamsConfig) ArgumentMatchers.anyLong(org.mockito.ArgumentMatchers.anyLong) KeyFormat(io.confluent.ksql.serde.KeyFormat) Mock(org.mockito.Mock) Assert.assertThrows(org.junit.Assert.assertThrows) RunWith(org.junit.runner.RunWith) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ArgumentMatchers.anyBoolean(org.mockito.ArgumentMatchers.anyBoolean) AtomicReference(java.util.concurrent.atomic.AtomicReference) Function(java.util.function.Function) Supplier(java.util.function.Supplier) CatchupConsumerFactory(io.confluent.ksql.physical.scalablepush.consumer.CatchupConsumer.CatchupConsumerFactory) PushOffsetRange(io.confluent.ksql.util.PushOffsetRange) KafkaConsumerFactoryInterface(io.confluent.ksql.physical.scalablepush.consumer.KafkaConsumerFactory.KafkaConsumerFactoryInterface) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) ExecutorService(java.util.concurrent.ExecutorService) Before(org.junit.Before) LatestConsumer(io.confluent.ksql.physical.scalablepush.consumer.LatestConsumer) LatestConsumerFactory(io.confluent.ksql.physical.scalablepush.consumer.LatestConsumer.LatestConsumerFactory) Mockito.times(org.mockito.Mockito.times) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) Mockito.verify(org.mockito.Mockito.verify) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) Mockito.never(org.mockito.Mockito.never) GenericRow(io.confluent.ksql.GenericRow) KsqlTopic(io.confluent.ksql.execution.ddl.commands.KsqlTopic) Clock(java.time.Clock) SqlTypes(io.confluent.ksql.schema.ksql.types.SqlTypes) Collections(java.util.Collections) QueryId(io.confluent.ksql.query.QueryId) Before(org.junit.Before)

Example 9 with KeyFormat

use of io.confluent.ksql.serde.KeyFormat in project ksql by confluentinc.

the class SchemaKGroupedTable method aggregate.

@Override
public SchemaKTable<GenericKey> aggregate(final List<ColumnName> nonAggregateColumns, final List<FunctionCall> aggregations, final Optional<WindowExpression> windowExpression, final FormatInfo valueFormat, final Stacker contextStacker) {
    if (windowExpression.isPresent()) {
        throw new KsqlException("Windowing not supported for table aggregations.");
    }
    final List<String> unsupportedFunctionNames = aggregations.stream().map(call -> UdafUtil.resolveAggregateFunction(functionRegistry, call, schema, ksqlConfig)).filter(function -> !(function instanceof TableAggregationFunction)).map(KsqlAggregateFunction::name).map(FunctionName::text).distinct().collect(Collectors.toList());
    if (!unsupportedFunctionNames.isEmpty()) {
        final String postfix = unsupportedFunctionNames.size() == 1 ? "" : "s";
        throw new KsqlException("The aggregation function" + postfix + " " + GrammaticalJoiner.and().join(unsupportedFunctionNames) + " cannot be applied to a table source, only to a stream source.");
    }
    final TableAggregate step = ExecutionStepFactory.tableAggregate(contextStacker, sourceTableStep, InternalFormats.of(keyFormat, valueFormat), nonAggregateColumns, aggregations);
    return new SchemaKTable<>(step, resolveSchema(step), keyFormat, ksqlConfig, functionRegistry);
}
Also used : ColumnName(io.confluent.ksql.name.ColumnName) GrammaticalJoiner(io.confluent.ksql.util.GrammaticalJoiner) KeyFormat(io.confluent.ksql.serde.KeyFormat) KGroupedTableHolder(io.confluent.ksql.execution.plan.KGroupedTableHolder) KsqlAggregateFunction(io.confluent.ksql.function.KsqlAggregateFunction) WindowExpression(io.confluent.ksql.parser.tree.WindowExpression) ExecutionStepFactory(io.confluent.ksql.execution.streams.ExecutionStepFactory) ExecutionStep(io.confluent.ksql.execution.plan.ExecutionStep) FunctionName(io.confluent.ksql.name.FunctionName) FunctionRegistry(io.confluent.ksql.function.FunctionRegistry) TableAggregate(io.confluent.ksql.execution.plan.TableAggregate) KsqlConfig(io.confluent.ksql.util.KsqlConfig) InternalFormats(io.confluent.ksql.serde.InternalFormats) LogicalSchema(io.confluent.ksql.schema.ksql.LogicalSchema) Collectors(java.util.stream.Collectors) UdafUtil(io.confluent.ksql.execution.function.UdafUtil) Objects(java.util.Objects) FunctionCall(io.confluent.ksql.execution.expression.tree.FunctionCall) List(java.util.List) Stacker(io.confluent.ksql.execution.context.QueryContext.Stacker) TableAggregationFunction(io.confluent.ksql.execution.function.TableAggregationFunction) KsqlException(io.confluent.ksql.util.KsqlException) Optional(java.util.Optional) GenericKey(io.confluent.ksql.GenericKey) FormatInfo(io.confluent.ksql.serde.FormatInfo) FunctionName(io.confluent.ksql.name.FunctionName) TableAggregate(io.confluent.ksql.execution.plan.TableAggregate) TableAggregationFunction(io.confluent.ksql.execution.function.TableAggregationFunction) KsqlException(io.confluent.ksql.util.KsqlException)

Example 10 with KeyFormat

use of io.confluent.ksql.serde.KeyFormat in project ksql by confluentinc.

the class SchemaKStream method groupBy.

public SchemaKGroupedStream groupBy(final FormatInfo valueFormat, final List<Expression> groupByExpressions, final Stacker contextStacker) {
    if (!repartitionNeeded(groupByExpressions)) {
        return groupByKey(keyFormat, valueFormat, contextStacker);
    }
    final KeyFormat rekeyedFormat;
    if (keyFormat.getFormatInfo().getFormat().equals(NoneFormat.NAME)) {
        final FormatInfo defaultFormat = FormatInfo.of(ksqlConfig.getString(KsqlConfig.KSQL_DEFAULT_KEY_FORMAT_CONFIG));
        rekeyedFormat = KeyFormat.nonWindowed(defaultFormat, SerdeFeatures.of());
    } else {
        rekeyedFormat = keyFormat;
    }
    final KeyFormat sanitizedKeyFormat = SerdeFeaturesFactory.sanitizeKeyFormat(rekeyedFormat, toSqlTypes(groupByExpressions), true);
    final StreamGroupBy<K> source = ExecutionStepFactory.streamGroupBy(contextStacker, sourceStep, InternalFormats.of(sanitizedKeyFormat, valueFormat), groupByExpressions);
    return new SchemaKGroupedStream(source, resolveSchema(source), sanitizedKeyFormat, ksqlConfig, functionRegistry);
}
Also used : KeyFormat(io.confluent.ksql.serde.KeyFormat) FormatInfo(io.confluent.ksql.serde.FormatInfo)

Aggregations

KeyFormat (io.confluent.ksql.serde.KeyFormat)16 LogicalSchema (io.confluent.ksql.schema.ksql.LogicalSchema)7 KsqlTopic (io.confluent.ksql.execution.ddl.commands.KsqlTopic)5 KsqlException (io.confluent.ksql.util.KsqlException)5 DataSource (io.confluent.ksql.metastore.model.DataSource)4 FormatInfo (io.confluent.ksql.serde.FormatInfo)4 ValueFormat (io.confluent.ksql.serde.ValueFormat)4 KsqlConfig (io.confluent.ksql.util.KsqlConfig)3 Optional (java.util.Optional)3 ImmutableMap (com.google.common.collect.ImmutableMap)2 GenericKey (io.confluent.ksql.GenericKey)2 Stacker (io.confluent.ksql.execution.context.QueryContext.Stacker)2 ExecutionStep (io.confluent.ksql.execution.plan.ExecutionStep)2 KStreamHolder (io.confluent.ksql.execution.plan.KStreamHolder)2 KTableHolder (io.confluent.ksql.execution.plan.KTableHolder)2 ExecutionStepFactory (io.confluent.ksql.execution.streams.ExecutionStepFactory)2 ColumnName (io.confluent.ksql.name.ColumnName)2 QueryId (io.confluent.ksql.query.QueryId)2 InternalFormats (io.confluent.ksql.serde.InternalFormats)2 WindowInfo (io.confluent.ksql.serde.WindowInfo)2