Search in sources :

Example 11 with KeyFormat

use of io.confluent.ksql.serde.KeyFormat in project ksql by confluentinc.

the class SchemaKStream method selectKey.

/**
 * @param valueFormat value format used in constructing serdes. Unchanged by this step.
 * @param keyExpression expression for the key being selected
 * @param forceInternalKeyFormat new key format to be used, if present
 * @param contextStacker context for this step
 * @param forceRepartition if true, this step will repartition even if there is no change in
 *                         either key format or value. Used to ensure co-partitioning for
 *                         joins on Schema-Registry-enabled key formats
 * @return result stream: repartitioned if needed or forced, else this stream unchanged
 */
public SchemaKStream<K> selectKey(final FormatInfo valueFormat, final List<Expression> keyExpression, final Optional<KeyFormat> forceInternalKeyFormat, final Stacker contextStacker, final boolean forceRepartition) {
    final boolean keyFormatChange = forceInternalKeyFormat.isPresent() && !forceInternalKeyFormat.get().equals(keyFormat);
    final boolean repartitionNeeded = repartitionNeeded(keyExpression);
    if (!keyFormatChange && !forceRepartition && !repartitionNeeded) {
        return this;
    }
    if ((repartitionNeeded || !forceRepartition) && keyFormat.isWindowed()) {
        throw new KsqlException("Implicit repartitioning of windowed sources is not supported. " + "See https://github.com/confluentinc/ksql/issues/4385.");
    }
    final ExecutionStep<KStreamHolder<K>> step = ExecutionStepFactory.streamSelectKey(contextStacker, sourceStep, keyExpression);
    final KeyFormat newKeyFormat = forceInternalKeyFormat.orElse(keyFormat);
    return new SchemaKStream<>(step, resolveSchema(step), SerdeFeaturesFactory.sanitizeKeyFormat(newKeyFormat, toSqlTypes(keyExpression), true), ksqlConfig, functionRegistry);
}
Also used : KStreamHolder(io.confluent.ksql.execution.plan.KStreamHolder) KsqlException(io.confluent.ksql.util.KsqlException) KeyFormat(io.confluent.ksql.serde.KeyFormat)

Example 12 with KeyFormat

use of io.confluent.ksql.serde.KeyFormat in project ksql by confluentinc.

the class SchemaKSourceFactory method buildTable.

private static SchemaKTable<?> buildTable(final PlanBuildContext buildContext, final DataSource dataSource, final Stacker contextStacker) {
    final KeyFormat keyFormat = dataSource.getKsqlTopic().getKeyFormat();
    if (keyFormat.isWindowed()) {
        throw new IllegalArgumentException("windowed");
    }
    final SourceStep<KTableHolder<GenericKey>> step;
    final int pseudoColumnVersionToUse = determinePseudoColumnVersionToUse(buildContext);
    // If the old query has a v1 table step, continue to use it.
    // See https://github.com/confluentinc/ksql/pull/7990
    boolean useOldExecutionStepVersion = false;
    if (buildContext.getPlanInfo().isPresent()) {
        final Set<ExecutionStep<?>> sourceSteps = buildContext.getPlanInfo().get().getSources();
        useOldExecutionStepVersion = sourceSteps.stream().anyMatch(executionStep -> executionStep instanceof TableSourceV1);
    }
    if (useOldExecutionStepVersion && pseudoColumnVersionToUse != SystemColumns.LEGACY_PSEUDOCOLUMN_VERSION_NUMBER) {
        throw new IllegalStateException("TableSourceV2 was released in conjunction with pseudocolumn" + "version 1. Something has gone very wrong");
    }
    if (buildContext.getKsqlConfig().getBoolean(KsqlConfig.KSQL_ROWPARTITION_ROWOFFSET_ENABLED) && !useOldExecutionStepVersion) {
        step = ExecutionStepFactory.tableSource(contextStacker, dataSource.getSchema(), dataSource.getKafkaTopicName(), Formats.from(dataSource.getKsqlTopic()), dataSource.getTimestampColumn(), InternalFormats.of(keyFormat, Formats.from(dataSource.getKsqlTopic()).getValueFormat()), pseudoColumnVersionToUse);
    } else {
        step = ExecutionStepFactory.tableSourceV1(contextStacker, dataSource.getSchema(), dataSource.getKafkaTopicName(), Formats.from(dataSource.getKsqlTopic()), dataSource.getTimestampColumn(), pseudoColumnVersionToUse);
    }
    return schemaKTable(buildContext, resolveSchema(buildContext, step, dataSource), dataSource.getKsqlTopic().getKeyFormat(), step);
}
Also used : ExecutionStep(io.confluent.ksql.execution.plan.ExecutionStep) DataSource(io.confluent.ksql.metastore.model.DataSource) ExecutionStep(io.confluent.ksql.execution.plan.ExecutionStep) SystemColumns(io.confluent.ksql.schema.ksql.SystemColumns) PlanBuildContext(io.confluent.ksql.planner.plan.PlanBuildContext) KeyFormat(io.confluent.ksql.serde.KeyFormat) QueryContext(io.confluent.ksql.execution.context.QueryContext) Set(java.util.Set) KsqlConfig(io.confluent.ksql.util.KsqlConfig) Formats(io.confluent.ksql.execution.plan.Formats) InternalFormats(io.confluent.ksql.serde.InternalFormats) LogicalSchema(io.confluent.ksql.schema.ksql.LogicalSchema) StreamSource(io.confluent.ksql.execution.plan.StreamSource) StepSchemaResolver(io.confluent.ksql.execution.streams.StepSchemaResolver) WindowInfo(io.confluent.ksql.serde.WindowInfo) KStreamHolder(io.confluent.ksql.execution.plan.KStreamHolder) KTableHolder(io.confluent.ksql.execution.plan.KTableHolder) SourceStep(io.confluent.ksql.execution.plan.SourceStep) WindowedStreamSource(io.confluent.ksql.execution.plan.WindowedStreamSource) TableSourceV1(io.confluent.ksql.execution.plan.TableSourceV1) Windowed(org.apache.kafka.streams.kstream.Windowed) Stacker(io.confluent.ksql.execution.context.QueryContext.Stacker) GenericKey(io.confluent.ksql.GenericKey) ExecutionStepFactory(io.confluent.ksql.execution.streams.ExecutionStepFactory) TableSourceV1(io.confluent.ksql.execution.plan.TableSourceV1) KTableHolder(io.confluent.ksql.execution.plan.KTableHolder) KeyFormat(io.confluent.ksql.serde.KeyFormat)

Example 13 with KeyFormat

use of io.confluent.ksql.serde.KeyFormat in project ksql by confluentinc.

the class SchemaKTable method groupBy.

@Override
public SchemaKGroupedTable groupBy(final FormatInfo valueFormat, final List<Expression> groupByExpressions, final Stacker contextStacker) {
    // Since tables must have a key, we know that the keyFormat is both
    // not NONE and has at least one column; this allows us to inherit
    // the key format directly (as opposed to the logic in SchemaKStream)
    final KeyFormat groupedKeyFormat = SerdeFeaturesFactory.sanitizeKeyFormat(KeyFormat.nonWindowed(keyFormat.getFormatInfo(), keyFormat.getFeatures()), toSqlTypes(groupByExpressions), true);
    final TableGroupBy<K> step = ExecutionStepFactory.tableGroupBy(contextStacker, sourceTableStep, InternalFormats.of(groupedKeyFormat, valueFormat), groupByExpressions);
    return new SchemaKGroupedTable(step, resolveSchema(step), groupedKeyFormat, ksqlConfig, functionRegistry);
}
Also used : KeyFormat(io.confluent.ksql.serde.KeyFormat)

Example 14 with KeyFormat

use of io.confluent.ksql.serde.KeyFormat in project ksql by confluentinc.

the class SchemaKTable method selectKey.

@Override
public SchemaKTable<K> selectKey(final FormatInfo valueFormat, final List<Expression> keyExpression, final Optional<KeyFormat> forceInternalKeyFormat, final Stacker contextStacker, final boolean forceRepartition) {
    final boolean repartitionNeeded = repartitionNeeded(keyExpression);
    final boolean keyFormatChange = forceInternalKeyFormat.isPresent() && !forceInternalKeyFormat.get().equals(keyFormat);
    if (!forceRepartition && !keyFormatChange && !repartitionNeeded) {
        return this;
    }
    if (schema.key().size() > 1) {
        // let's throw a better error message in the case of multi-column tables
        throw new UnsupportedOperationException("Cannot repartition a TABLE source. If this is " + "a join, joins on tables with multiple columns is not yet supported.");
    }
    // differently (thus ensuring all keys are routed to the same partitions)
    if (repartitionNeeded) {
        throw new UnsupportedOperationException("Cannot repartition a TABLE source. " + "If this is a join, make sure that the criteria uses the TABLE's key column " + Iterables.getOnlyElement(schema.key()).name().text() + " instead of " + keyExpression);
    }
    if (keyFormat.isWindowed()) {
        final String errorMsg = "Implicit repartitioning of windowed sources is not supported. " + "See https://github.com/confluentinc/ksql/issues/4385.";
        final String additionalMsg = forceRepartition ? " As a result, ksqlDB does not support joins on windowed sources with " + "Schema-Registry-enabled key formats (AVRO, JSON_SR, PROTOBUF) at this time. " + "Please repartition your sources to use a different key format before performing " + "the join." : "";
        throw new KsqlException(errorMsg + additionalMsg);
    }
    final KeyFormat newKeyFormat = SerdeFeaturesFactory.sanitizeKeyFormat(forceInternalKeyFormat.orElse(keyFormat), toSqlTypes(keyExpression), // logical schema changes are not supported
    false);
    final ExecutionStep<KTableHolder<K>> step = ExecutionStepFactory.tableSelectKey(contextStacker, sourceTableStep, InternalFormats.of(newKeyFormat, valueFormat), keyExpression);
    return new SchemaKTable<>(step, resolveSchema(step), newKeyFormat, ksqlConfig, functionRegistry);
}
Also used : KTableHolder(io.confluent.ksql.execution.plan.KTableHolder) KsqlException(io.confluent.ksql.util.KsqlException) KeyFormat(io.confluent.ksql.serde.KeyFormat)

Example 15 with KeyFormat

use of io.confluent.ksql.serde.KeyFormat in project ksql by confluentinc.

the class InsertValuesExecutor method ensureKeySchemasMatch.

/**
 * Ensures that the key schema that we generate will be identical
 * to the schema that is registered in schema registry, if it exists.
 * Otherwise, it is possible that we will publish messages with a new
 * schemaID, meaning that logically identical keys might be routed to
 * different partitions.
 */
private static void ensureKeySchemasMatch(final PersistenceSchema keySchema, final DataSource dataSource, final ServiceContext serviceContext) {
    final KeyFormat keyFormat = dataSource.getKsqlTopic().getKeyFormat();
    final Format format = FormatFactory.fromName(keyFormat.getFormat());
    if (!format.supportsFeature(SerdeFeature.SCHEMA_INFERENCE)) {
        return;
    }
    final ParsedSchema schema = format.getSchemaTranslator(keyFormat.getFormatInfo().getProperties()).toParsedSchema(keySchema);
    final Optional<SchemaMetadata> latest;
    try {
        latest = SchemaRegistryUtil.getLatestSchema(serviceContext.getSchemaRegistryClient(), dataSource.getKafkaTopicName(), true);
    } catch (final KsqlException e) {
        maybeThrowSchemaRegistryAuthError(format, dataSource.getKafkaTopicName(), true, AclOperation.READ, e);
        throw new KsqlException("Could not determine that insert values operations is safe; " + "operation potentially overrides existing key schema in schema registry.", e);
    }
    if (latest.isPresent() && !latest.get().getSchema().equals(schema.canonicalString())) {
        throw new KsqlException("Cannot INSERT VALUES into data source " + dataSource.getName() + ". ksqlDB generated schema would overwrite existing key schema." + "\n\tExisting Schema: " + latest.get().getSchema() + "\n\tksqlDB Generated: " + schema.canonicalString());
    }
}
Also used : KeyFormat(io.confluent.ksql.serde.KeyFormat) Format(io.confluent.ksql.serde.Format) SchemaMetadata(io.confluent.kafka.schemaregistry.client.SchemaMetadata) ParsedSchema(io.confluent.kafka.schemaregistry.ParsedSchema) KeyFormat(io.confluent.ksql.serde.KeyFormat) KsqlException(io.confluent.ksql.util.KsqlException)

Aggregations

KeyFormat (io.confluent.ksql.serde.KeyFormat)16 LogicalSchema (io.confluent.ksql.schema.ksql.LogicalSchema)7 KsqlTopic (io.confluent.ksql.execution.ddl.commands.KsqlTopic)5 KsqlException (io.confluent.ksql.util.KsqlException)5 DataSource (io.confluent.ksql.metastore.model.DataSource)4 FormatInfo (io.confluent.ksql.serde.FormatInfo)4 ValueFormat (io.confluent.ksql.serde.ValueFormat)4 KsqlConfig (io.confluent.ksql.util.KsqlConfig)3 Optional (java.util.Optional)3 ImmutableMap (com.google.common.collect.ImmutableMap)2 GenericKey (io.confluent.ksql.GenericKey)2 Stacker (io.confluent.ksql.execution.context.QueryContext.Stacker)2 ExecutionStep (io.confluent.ksql.execution.plan.ExecutionStep)2 KStreamHolder (io.confluent.ksql.execution.plan.KStreamHolder)2 KTableHolder (io.confluent.ksql.execution.plan.KTableHolder)2 ExecutionStepFactory (io.confluent.ksql.execution.streams.ExecutionStepFactory)2 ColumnName (io.confluent.ksql.name.ColumnName)2 QueryId (io.confluent.ksql.query.QueryId)2 InternalFormats (io.confluent.ksql.serde.InternalFormats)2 WindowInfo (io.confluent.ksql.serde.WindowInfo)2