Search in sources :

Example 6 with ColumnReferenceExp

use of io.confluent.ksql.execution.expression.tree.ColumnReferenceExp in project ksql by confluentinc.

the class Repartitioning method repartitionNeeded.

/**
 * Determine if a repartition is needed.
 *
 * <p>A repartition is only not required if partitioning by the existing key columns.
 *
 * @param schema the schema of the data before any repartition
 * @param partitionBy the expressions to partition by
 * @return {@code true} if a repartition is needed.
 */
public static boolean repartitionNeeded(final LogicalSchema schema, final List<Expression> partitionBy) {
    if (schema.key().isEmpty()) {
        // No current key, so repartition needed:
        return true;
    }
    // do end up supporting this (we'll have to change the logic here)
    if (schema.key().size() != 1) {
        return true;
    }
    if (partitionBy.size() != schema.key().size()) {
        // Different number of expressions to keys means it must be a repartition:
        return true;
    }
    final Expression expression = partitionBy.get(0);
    if (!(expression instanceof ColumnReferenceExp)) {
        // If expression is not a column reference then the key will be changing
        return true;
    }
    final ColumnName newKeyColName = ((ColumnReferenceExp) expression).getColumnName();
    return !newKeyColName.equals(schema.key().get(0).name());
}
Also used : ColumnReferenceExp(io.confluent.ksql.execution.expression.tree.ColumnReferenceExp) ColumnName(io.confluent.ksql.name.ColumnName) Expression(io.confluent.ksql.execution.expression.tree.Expression)

Example 7 with ColumnReferenceExp

use of io.confluent.ksql.execution.expression.tree.ColumnReferenceExp in project ksql by confluentinc.

the class LogicalPlanner method joinOnNonKeyAttribute.

private static boolean joinOnNonKeyAttribute(final Expression joinExpression, final PlanNode node, final AliasedDataSource aliasedDataSource) {
    if (!(joinExpression instanceof ColumnReferenceExp)) {
        return true;
    }
    final ColumnReferenceExp simpleJoinExpression = (ColumnReferenceExp) joinExpression;
    final ColumnName joinAttributeName = simpleJoinExpression.getColumnName();
    final List<DataSourceNode> dataSourceNodes = node.getSourceNodes().collect(Collectors.toList());
    final List<Column> keyColumns;
    // n-way join sub-tree (ie, not a leaf)
    if (isInnerNode(node)) {
        final DataSourceNode qualifiedNode;
        if (simpleJoinExpression.maybeQualifier().isPresent()) {
            final SourceName qualifierOrAlias = simpleJoinExpression.maybeQualifier().get();
            final SourceName qualifier;
            if (aliasedDataSource.getAlias().equals(qualifierOrAlias)) {
                qualifier = aliasedDataSource.getDataSource().getName();
            } else {
                qualifier = qualifierOrAlias;
            }
            final List<DataSourceNode> allNodes = dataSourceNodes.stream().filter(n -> n.getDataSource().getName().equals(qualifier)).collect(Collectors.toList());
            if (allNodes.size() != 1) {
                throw new KsqlException(String.format("Join qualifier '%s' could not be resolved (either not found or not unique).", qualifier));
            }
            qualifiedNode = Iterables.getOnlyElement(allNodes);
        } else {
            final List<DataSourceNode> allNodes = dataSourceNodes.stream().filter(n -> n.getSchema().findColumn(simpleJoinExpression.getColumnName()).isPresent()).collect(Collectors.toList());
            if (allNodes.size() != 1) {
                throw new KsqlException(String.format("Join identifier '%s' could not be resolved (either not found or not unique).", joinAttributeName));
            }
            qualifiedNode = Iterables.getOnlyElement(allNodes);
        }
        keyColumns = qualifiedNode.getSchema().key();
    } else {
        // leaf node: we know we have single data source
        keyColumns = Iterables.getOnlyElement(dataSourceNodes).getSchema().key();
    }
    // - thus, if the key has more than one column, the join is not on the key
    if (keyColumns.size() > 1) {
        return true;
    }
    return !joinAttributeName.equals(Iterables.getOnlyElement(keyColumns).name());
}
Also used : JoinInfo(io.confluent.ksql.analyzer.Analysis.JoinInfo) DataSource(io.confluent.ksql.metastore.model.DataSource) Leaf(io.confluent.ksql.planner.JoinTree.Leaf) AggregateAnalysisResult(io.confluent.ksql.analyzer.AggregateAnalysisResult) Into(io.confluent.ksql.analyzer.Analysis.Into) ColumnName(io.confluent.ksql.name.ColumnName) SourceName(io.confluent.ksql.name.SourceName) BiFunction(java.util.function.BiFunction) AggregateAnalyzer(io.confluent.ksql.analyzer.AggregateAnalyzer) FilterNode(io.confluent.ksql.planner.plan.FilterNode) SerdeFeaturesFactory(io.confluent.ksql.serde.SerdeFeaturesFactory) JoinKey(io.confluent.ksql.planner.plan.JoinNode.JoinKey) CodeGenRunner(io.confluent.ksql.execution.codegen.CodeGenRunner) WindowInfo(io.confluent.ksql.serde.WindowInfo) RewrittenAnalysis(io.confluent.ksql.analyzer.RewrittenAnalysis) QueryLimitNode(io.confluent.ksql.planner.plan.QueryLimitNode) AggregateNode(io.confluent.ksql.planner.plan.AggregateNode) AliasedDataSource(io.confluent.ksql.analyzer.Analysis.AliasedDataSource) TimestampExtractionPolicyFactory(io.confluent.ksql.execution.streams.timestamp.TimestampExtractionPolicyFactory) ExpressionTypeManager(io.confluent.ksql.execution.util.ExpressionTypeManager) KsqlBareOutputNode(io.confluent.ksql.planner.plan.KsqlBareOutputNode) SelectionUtil(io.confluent.ksql.planner.plan.SelectionUtil) PartitionBy(io.confluent.ksql.parser.tree.PartitionBy) ColumnReferenceExp(io.confluent.ksql.execution.expression.tree.ColumnReferenceExp) PreJoinProjectNode(io.confluent.ksql.planner.plan.PreJoinProjectNode) VisitParentExpressionVisitor(io.confluent.ksql.execution.expression.tree.VisitParentExpressionVisitor) FinalProjectNode(io.confluent.ksql.planner.plan.FinalProjectNode) ColumnNames(io.confluent.ksql.schema.ksql.ColumnNames) RefinementInfo(io.confluent.ksql.serde.RefinementInfo) ImmutableAnalysis(io.confluent.ksql.analyzer.ImmutableAnalysis) ExpressionEvaluator(io.confluent.ksql.execution.transform.ExpressionEvaluator) Expression(io.confluent.ksql.execution.expression.tree.Expression) JoinType(io.confluent.ksql.planner.plan.JoinNode.JoinType) Set(java.util.Set) QueryFilterNode(io.confluent.ksql.planner.plan.QueryFilterNode) KsqlConfig(io.confluent.ksql.util.KsqlConfig) LogicalSchema(io.confluent.ksql.schema.ksql.LogicalSchema) Collectors(java.util.stream.Collectors) TimestampColumn(io.confluent.ksql.execution.timestamp.TimestampColumn) PlanNodeId(io.confluent.ksql.planner.plan.PlanNodeId) SingleSourcePlanNode(io.confluent.ksql.planner.plan.SingleSourcePlanNode) Builder(io.confluent.ksql.schema.ksql.LogicalSchema.Builder) Objects(java.util.Objects) Join(io.confluent.ksql.planner.JoinTree.Join) List(java.util.List) KsqlException(io.confluent.ksql.util.KsqlException) Optional(java.util.Optional) QueryProjectNode(io.confluent.ksql.planner.plan.QueryProjectNode) Column(io.confluent.ksql.schema.ksql.Column) FormatInfo(io.confluent.ksql.serde.FormatInfo) ProjectNode(io.confluent.ksql.planner.plan.ProjectNode) Iterables(com.google.common.collect.Iterables) FormatFactory(io.confluent.ksql.serde.FormatFactory) GrammaticalJoiner(io.confluent.ksql.util.GrammaticalJoiner) KeyFormat(io.confluent.ksql.serde.KeyFormat) JoinNode(io.confluent.ksql.planner.plan.JoinNode) SuppressNode(io.confluent.ksql.planner.plan.SuppressNode) UnqualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.UnqualifiedColumnReferenceExp) PartitionByParamsFactory(io.confluent.ksql.execution.streams.PartitionByParamsFactory) DataSourceType(io.confluent.ksql.metastore.model.DataSource.DataSourceType) Function(java.util.function.Function) NoneFormat(io.confluent.ksql.serde.none.NoneFormat) AsValue(io.confluent.ksql.function.udf.AsValue) QualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.QualifiedColumnReferenceExp) MetaStore(io.confluent.ksql.metastore.MetaStore) GroupBy(io.confluent.ksql.parser.tree.GroupBy) KsqlStructuredDataOutputNode(io.confluent.ksql.planner.plan.KsqlStructuredDataOutputNode) UserRepartitionNode(io.confluent.ksql.planner.plan.UserRepartitionNode) WindowExpression(io.confluent.ksql.parser.tree.WindowExpression) SqlType(io.confluent.ksql.schema.ksql.types.SqlType) SerdeFeatures(io.confluent.ksql.serde.SerdeFeatures) DataSourceNode(io.confluent.ksql.planner.plan.DataSourceNode) NewTopic(io.confluent.ksql.analyzer.Analysis.Into.NewTopic) KsqlWindowExpression(io.confluent.ksql.execution.windows.KsqlWindowExpression) OutputNode(io.confluent.ksql.planner.plan.OutputNode) FilterTypeValidator(io.confluent.ksql.analyzer.FilterTypeValidator) FlatMapNode(io.confluent.ksql.planner.plan.FlatMapNode) ValueFormat(io.confluent.ksql.serde.ValueFormat) SelectExpression(io.confluent.ksql.execution.plan.SelectExpression) NodeLocation(io.confluent.ksql.parser.NodeLocation) PreJoinRepartitionNode(io.confluent.ksql.planner.plan.PreJoinRepartitionNode) FunctionCall(io.confluent.ksql.execution.expression.tree.FunctionCall) KsqlTopic(io.confluent.ksql.execution.ddl.commands.KsqlTopic) PlanNode(io.confluent.ksql.planner.plan.PlanNode) Context(io.confluent.ksql.engine.rewrite.ExpressionTreeRewriter.Context) ExpressionTreeRewriter(io.confluent.ksql.engine.rewrite.ExpressionTreeRewriter) OutputRefinement(io.confluent.ksql.parser.OutputRefinement) FilterType(io.confluent.ksql.analyzer.FilterTypeValidator.FilterType) Collections(java.util.Collections) ColumnReferenceExp(io.confluent.ksql.execution.expression.tree.ColumnReferenceExp) UnqualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.UnqualifiedColumnReferenceExp) QualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.QualifiedColumnReferenceExp) ColumnName(io.confluent.ksql.name.ColumnName) DataSourceNode(io.confluent.ksql.planner.plan.DataSourceNode) TimestampColumn(io.confluent.ksql.execution.timestamp.TimestampColumn) Column(io.confluent.ksql.schema.ksql.Column) SourceName(io.confluent.ksql.name.SourceName) KsqlException(io.confluent.ksql.util.KsqlException)

Example 8 with ColumnReferenceExp

use of io.confluent.ksql.execution.expression.tree.ColumnReferenceExp in project ksql by confluentinc.

the class LogicalPlanner method buildAggregateSchema.

private LogicalSchema buildAggregateSchema(final PlanNode sourcePlanNode, final GroupBy groupBy, final List<SelectExpression> projectionExpressions) {
    final LogicalSchema sourceSchema = sourcePlanNode.getSchema();
    final LogicalSchema projectionSchema = SelectionUtil.buildProjectionSchema(sourceSchema.withPseudoAndKeyColsInValue(analysis.getWindowExpression().isPresent(), ksqlConfig), projectionExpressions, metaStore);
    final List<Expression> groupByExps = groupBy.getGroupingExpressions();
    final Function<Expression, Optional<ColumnName>> selectResolver = expression -> {
        final List<ColumnName> foundInProjection = projectionExpressions.stream().filter(e -> e.getExpression().equals(expression)).map(SelectExpression::getAlias).collect(Collectors.toList());
        switch(foundInProjection.size()) {
            case 0:
                return Optional.empty();
            case 1:
                return Optional.of(foundInProjection.get(0));
            default:
                final String keys = GrammaticalJoiner.and().join(foundInProjection);
                throw new KsqlException("The projection contains a key column more than once: " + keys + "." + System.lineSeparator() + "Each key column must only be in the projection once. " + "If you intended to copy the key into the value, then consider using the " + AsValue.NAME + " function to indicate which key reference should be copied.");
        }
    };
    final List<Column> valueColumns;
    if (analysis.getInto().isPresent()) {
        // Persistent query:
        final Set<ColumnName> keyColumnNames = groupBy.getGroupingExpressions().stream().map(selectResolver).filter(Optional::isPresent).map(Optional::get).collect(Collectors.toSet());
        valueColumns = projectionSchema.value().stream().filter(col -> !keyColumnNames.contains(col.name())).collect(Collectors.toList());
        if (valueColumns.isEmpty()) {
            throw new KsqlException("The projection contains no value columns.");
        }
    } else {
        // Transient query:
        // Transient queries only return value columns, so must have key columns in the value:
        valueColumns = projectionSchema.columns();
    }
    final Builder builder = LogicalSchema.builder();
    final ExpressionTypeManager typeManager = new ExpressionTypeManager(sourceSchema, metaStore);
    for (final Expression expression : groupByExps) {
        final SqlType keyType = typeManager.getExpressionSqlType(expression);
        final ColumnName keyName = selectResolver.apply(expression).orElseGet(() -> expression instanceof ColumnReferenceExp ? ((ColumnReferenceExp) expression).getColumnName() : ColumnNames.uniqueAliasFor(expression, sourceSchema));
        builder.keyColumn(keyName, keyType);
    }
    return builder.valueColumns(valueColumns).build();
}
Also used : JoinInfo(io.confluent.ksql.analyzer.Analysis.JoinInfo) DataSource(io.confluent.ksql.metastore.model.DataSource) Leaf(io.confluent.ksql.planner.JoinTree.Leaf) AggregateAnalysisResult(io.confluent.ksql.analyzer.AggregateAnalysisResult) Into(io.confluent.ksql.analyzer.Analysis.Into) ColumnName(io.confluent.ksql.name.ColumnName) SourceName(io.confluent.ksql.name.SourceName) BiFunction(java.util.function.BiFunction) AggregateAnalyzer(io.confluent.ksql.analyzer.AggregateAnalyzer) FilterNode(io.confluent.ksql.planner.plan.FilterNode) SerdeFeaturesFactory(io.confluent.ksql.serde.SerdeFeaturesFactory) JoinKey(io.confluent.ksql.planner.plan.JoinNode.JoinKey) CodeGenRunner(io.confluent.ksql.execution.codegen.CodeGenRunner) WindowInfo(io.confluent.ksql.serde.WindowInfo) RewrittenAnalysis(io.confluent.ksql.analyzer.RewrittenAnalysis) QueryLimitNode(io.confluent.ksql.planner.plan.QueryLimitNode) AggregateNode(io.confluent.ksql.planner.plan.AggregateNode) AliasedDataSource(io.confluent.ksql.analyzer.Analysis.AliasedDataSource) TimestampExtractionPolicyFactory(io.confluent.ksql.execution.streams.timestamp.TimestampExtractionPolicyFactory) ExpressionTypeManager(io.confluent.ksql.execution.util.ExpressionTypeManager) KsqlBareOutputNode(io.confluent.ksql.planner.plan.KsqlBareOutputNode) SelectionUtil(io.confluent.ksql.planner.plan.SelectionUtil) PartitionBy(io.confluent.ksql.parser.tree.PartitionBy) ColumnReferenceExp(io.confluent.ksql.execution.expression.tree.ColumnReferenceExp) PreJoinProjectNode(io.confluent.ksql.planner.plan.PreJoinProjectNode) VisitParentExpressionVisitor(io.confluent.ksql.execution.expression.tree.VisitParentExpressionVisitor) FinalProjectNode(io.confluent.ksql.planner.plan.FinalProjectNode) ColumnNames(io.confluent.ksql.schema.ksql.ColumnNames) RefinementInfo(io.confluent.ksql.serde.RefinementInfo) ImmutableAnalysis(io.confluent.ksql.analyzer.ImmutableAnalysis) ExpressionEvaluator(io.confluent.ksql.execution.transform.ExpressionEvaluator) Expression(io.confluent.ksql.execution.expression.tree.Expression) JoinType(io.confluent.ksql.planner.plan.JoinNode.JoinType) Set(java.util.Set) QueryFilterNode(io.confluent.ksql.planner.plan.QueryFilterNode) KsqlConfig(io.confluent.ksql.util.KsqlConfig) LogicalSchema(io.confluent.ksql.schema.ksql.LogicalSchema) Collectors(java.util.stream.Collectors) TimestampColumn(io.confluent.ksql.execution.timestamp.TimestampColumn) PlanNodeId(io.confluent.ksql.planner.plan.PlanNodeId) SingleSourcePlanNode(io.confluent.ksql.planner.plan.SingleSourcePlanNode) Builder(io.confluent.ksql.schema.ksql.LogicalSchema.Builder) Objects(java.util.Objects) Join(io.confluent.ksql.planner.JoinTree.Join) List(java.util.List) KsqlException(io.confluent.ksql.util.KsqlException) Optional(java.util.Optional) QueryProjectNode(io.confluent.ksql.planner.plan.QueryProjectNode) Column(io.confluent.ksql.schema.ksql.Column) FormatInfo(io.confluent.ksql.serde.FormatInfo) ProjectNode(io.confluent.ksql.planner.plan.ProjectNode) Iterables(com.google.common.collect.Iterables) FormatFactory(io.confluent.ksql.serde.FormatFactory) GrammaticalJoiner(io.confluent.ksql.util.GrammaticalJoiner) KeyFormat(io.confluent.ksql.serde.KeyFormat) JoinNode(io.confluent.ksql.planner.plan.JoinNode) SuppressNode(io.confluent.ksql.planner.plan.SuppressNode) UnqualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.UnqualifiedColumnReferenceExp) PartitionByParamsFactory(io.confluent.ksql.execution.streams.PartitionByParamsFactory) DataSourceType(io.confluent.ksql.metastore.model.DataSource.DataSourceType) Function(java.util.function.Function) NoneFormat(io.confluent.ksql.serde.none.NoneFormat) AsValue(io.confluent.ksql.function.udf.AsValue) QualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.QualifiedColumnReferenceExp) MetaStore(io.confluent.ksql.metastore.MetaStore) GroupBy(io.confluent.ksql.parser.tree.GroupBy) KsqlStructuredDataOutputNode(io.confluent.ksql.planner.plan.KsqlStructuredDataOutputNode) UserRepartitionNode(io.confluent.ksql.planner.plan.UserRepartitionNode) WindowExpression(io.confluent.ksql.parser.tree.WindowExpression) SqlType(io.confluent.ksql.schema.ksql.types.SqlType) SerdeFeatures(io.confluent.ksql.serde.SerdeFeatures) DataSourceNode(io.confluent.ksql.planner.plan.DataSourceNode) NewTopic(io.confluent.ksql.analyzer.Analysis.Into.NewTopic) KsqlWindowExpression(io.confluent.ksql.execution.windows.KsqlWindowExpression) OutputNode(io.confluent.ksql.planner.plan.OutputNode) FilterTypeValidator(io.confluent.ksql.analyzer.FilterTypeValidator) FlatMapNode(io.confluent.ksql.planner.plan.FlatMapNode) ValueFormat(io.confluent.ksql.serde.ValueFormat) SelectExpression(io.confluent.ksql.execution.plan.SelectExpression) NodeLocation(io.confluent.ksql.parser.NodeLocation) PreJoinRepartitionNode(io.confluent.ksql.planner.plan.PreJoinRepartitionNode) FunctionCall(io.confluent.ksql.execution.expression.tree.FunctionCall) KsqlTopic(io.confluent.ksql.execution.ddl.commands.KsqlTopic) PlanNode(io.confluent.ksql.planner.plan.PlanNode) Context(io.confluent.ksql.engine.rewrite.ExpressionTreeRewriter.Context) ExpressionTreeRewriter(io.confluent.ksql.engine.rewrite.ExpressionTreeRewriter) OutputRefinement(io.confluent.ksql.parser.OutputRefinement) FilterType(io.confluent.ksql.analyzer.FilterTypeValidator.FilterType) Collections(java.util.Collections) ExpressionTypeManager(io.confluent.ksql.execution.util.ExpressionTypeManager) Optional(java.util.Optional) Builder(io.confluent.ksql.schema.ksql.LogicalSchema.Builder) LogicalSchema(io.confluent.ksql.schema.ksql.LogicalSchema) SelectExpression(io.confluent.ksql.execution.plan.SelectExpression) KsqlException(io.confluent.ksql.util.KsqlException) ColumnName(io.confluent.ksql.name.ColumnName) ColumnReferenceExp(io.confluent.ksql.execution.expression.tree.ColumnReferenceExp) UnqualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.UnqualifiedColumnReferenceExp) QualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.QualifiedColumnReferenceExp) Expression(io.confluent.ksql.execution.expression.tree.Expression) WindowExpression(io.confluent.ksql.parser.tree.WindowExpression) KsqlWindowExpression(io.confluent.ksql.execution.windows.KsqlWindowExpression) SelectExpression(io.confluent.ksql.execution.plan.SelectExpression) TimestampColumn(io.confluent.ksql.execution.timestamp.TimestampColumn) Column(io.confluent.ksql.schema.ksql.Column) List(java.util.List) SqlType(io.confluent.ksql.schema.ksql.types.SqlType)

Example 9 with ColumnReferenceExp

use of io.confluent.ksql.execution.expression.tree.ColumnReferenceExp in project ksql by confluentinc.

the class PreJoinProjectNode method validateColumns.

@Override
protected Set<ColumnReferenceExp> validateColumns(final RequiredColumns requiredColumns) {
    final List<? extends ColumnReferenceExp> aliased = requiredColumns.get().stream().filter(columnRef -> columnRef instanceof UnqualifiedColumnReferenceExp).filter(columnRef -> aliases.inverse().containsKey(columnRef.getColumnName())).collect(Collectors.toList());
    final Builder builder = requiredColumns.asBuilder();
    aliased.forEach(columnRef -> {
        builder.remove(columnRef);
        builder.add(new UnqualifiedColumnReferenceExp(columnRef.getLocation(), aliases.inverse().get(columnRef.getColumnName())));
    });
    return super.validateColumns(builder.build());
}
Also used : ColumnNames(io.confluent.ksql.schema.ksql.ColumnNames) ColumnName(io.confluent.ksql.name.ColumnName) SourceName(io.confluent.ksql.name.SourceName) KeyFormat(io.confluent.ksql.serde.KeyFormat) UnqualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.UnqualifiedColumnReferenceExp) Set(java.util.Set) Builder(io.confluent.ksql.planner.RequiredColumns.Builder) LogicalSchema(io.confluent.ksql.schema.ksql.LogicalSchema) Collectors(java.util.stream.Collectors) Iterators(com.google.common.collect.Iterators) SelectExpression(io.confluent.ksql.execution.plan.SelectExpression) Namespace(io.confluent.ksql.schema.ksql.Column.Namespace) ImmutableBiMap(com.google.common.collect.ImmutableBiMap) List(java.util.List) Stream(java.util.stream.Stream) ImmutableList(com.google.common.collect.ImmutableList) Optional(java.util.Optional) ColumnReferenceExp(io.confluent.ksql.execution.expression.tree.ColumnReferenceExp) RequiredColumns(io.confluent.ksql.planner.RequiredColumns) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) Builder(io.confluent.ksql.planner.RequiredColumns.Builder) UnqualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.UnqualifiedColumnReferenceExp)

Example 10 with ColumnReferenceExp

use of io.confluent.ksql.execution.expression.tree.ColumnReferenceExp in project ksql by confluentinc.

the class SelectionUtil method buildProjectionSchema.

/*
   * The algorithm behind this method feels unnecessarily complicated and is begging
   * for someone to come along and improve it, but until that time here is
   * a description of what's going on.
   *
   * Essentially, we need to build a logical schema that mirrors the physical
   * schema until https://github.com/confluentinc/ksql/issues/6374 is addressed.
   * That means that the keys must be ordered in the same way as the parent schema
   * (e.g. if the source schema was K1 INT KEY, K2 INT KEY and the projection is
   * SELECT K2, K1 this method will produce an output schema that is K1, K2
   * despite the way that the keys were ordered in the projection) - see
   * https://github.com/confluentinc/ksql/pull/7477 for context on the bug.
   *
   * But we cannot simply select all the keys and then the values, we must maintain
   * the interleaving of key and values because transient queries return all columns
   * to the user as "value columns". If someone issues a SELECT VALUE, * FROM FOO
   * it is expected that VALUE shows up _before_ the key fields. This means we need to
   * reorder the key columns within the list of projections without affecting the
   * relative order the keys/values.
   *
   * To spice things up even further, there's the possibility that the same key is
   * aliased multiple times (SELECT K1 AS X, K2 AS Y FROM ...), which is not supported
   * but is verified later when building the final projection - so we maintain it here.
   *
   * Now on to the algorithm itself: we make two passes through the list of projections.
   * The first pass builds a mapping from source key to all the projections for that key.
   * We will use this mapping to sort the keys in the second pass. This mapping is two
   * dimensional to address the possibility of the same key with multiple aliases.
   *
   * The second pass goes through the list of projections again and builds the logical schema,
   * but this time if we encounter a projection that references a key column, we instead take
   * it from the list we built in the first pass (in order defined by the parent schema).
   */
public static LogicalSchema buildProjectionSchema(final LogicalSchema parentSchema, final List<SelectExpression> projection, final FunctionRegistry functionRegistry) {
    final ExpressionTypeManager expressionTypeManager = new ExpressionTypeManager(parentSchema, functionRegistry);
    // keyExpressions[i] represents the expressions found in projection
    // that are associated with parentSchema's key at index i
    final List<List<SelectExpression>> keyExpressions = new ArrayList<>(parentSchema.key().size());
    for (int i = 0; i < parentSchema.key().size(); i++) {
        keyExpressions.add(new ArrayList<>());
    }
    // first pass to construct keyExpressions, keyExpressionMembership
    // is just a convenience data structure so that we don't have to do
    // the isKey check in the second iteration below
    final Set<SelectExpression> keyExpressionMembership = new HashSet<>();
    for (final SelectExpression select : projection) {
        final Expression expression = select.getExpression();
        if (expression instanceof ColumnReferenceExp) {
            final ColumnName name = ((ColumnReferenceExp) expression).getColumnName();
            parentSchema.findColumn(name).filter(c -> c.namespace() == Namespace.KEY).ifPresent(c -> {
                keyExpressions.get(c.index()).add(select);
                keyExpressionMembership.add(select);
            });
        }
    }
    // second pass, which iterates the projections but ignores any key expressions,
    // instead taking them from the ordered keyExpressions list
    final Builder builder = LogicalSchema.builder();
    int currKeyIdx = 0;
    for (final SelectExpression select : projection) {
        if (keyExpressionMembership.contains(select)) {
            while (keyExpressions.get(currKeyIdx).isEmpty()) {
                currKeyIdx++;
            }
            final SelectExpression keyExp = keyExpressions.get(currKeyIdx).remove(0);
            final SqlType type = expressionTypeManager.getExpressionSqlType(keyExp.getExpression());
            builder.keyColumn(keyExp.getAlias(), type);
        } else {
            final Expression expression = select.getExpression();
            final SqlType type = expressionTypeManager.getExpressionSqlType(expression);
            if (type == null) {
                throw new IllegalArgumentException("Can't infer a type of null. Please explicitly cast " + "it to a required type, e.g. CAST(null AS VARCHAR).");
            }
            builder.valueColumn(select.getAlias(), type);
        }
    }
    return builder.build();
}
Also used : IntStream(java.util.stream.IntStream) Expression(io.confluent.ksql.execution.expression.tree.Expression) ColumnName(io.confluent.ksql.name.ColumnName) FunctionRegistry(io.confluent.ksql.function.FunctionRegistry) UnqualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.UnqualifiedColumnReferenceExp) Set(java.util.Set) LogicalSchema(io.confluent.ksql.schema.ksql.LogicalSchema) Collectors(java.util.stream.Collectors) SelectExpression(io.confluent.ksql.execution.plan.SelectExpression) SelectItem(io.confluent.ksql.parser.tree.SelectItem) Namespace(io.confluent.ksql.schema.ksql.Column.Namespace) Builder(io.confluent.ksql.schema.ksql.LogicalSchema.Builder) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) List(java.util.List) SingleColumn(io.confluent.ksql.parser.tree.SingleColumn) Stream(java.util.stream.Stream) ExpressionTypeManager(io.confluent.ksql.execution.util.ExpressionTypeManager) Optional(java.util.Optional) AllColumns(io.confluent.ksql.parser.tree.AllColumns) ColumnReferenceExp(io.confluent.ksql.execution.expression.tree.ColumnReferenceExp) Column(io.confluent.ksql.schema.ksql.Column) SqlType(io.confluent.ksql.schema.ksql.types.SqlType) ExpressionTypeManager(io.confluent.ksql.execution.util.ExpressionTypeManager) Builder(io.confluent.ksql.schema.ksql.LogicalSchema.Builder) ArrayList(java.util.ArrayList) SelectExpression(io.confluent.ksql.execution.plan.SelectExpression) UnqualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.UnqualifiedColumnReferenceExp) ColumnReferenceExp(io.confluent.ksql.execution.expression.tree.ColumnReferenceExp) ColumnName(io.confluent.ksql.name.ColumnName) Expression(io.confluent.ksql.execution.expression.tree.Expression) SelectExpression(io.confluent.ksql.execution.plan.SelectExpression) ArrayList(java.util.ArrayList) List(java.util.List) SqlType(io.confluent.ksql.schema.ksql.types.SqlType) HashSet(java.util.HashSet)

Aggregations

ColumnReferenceExp (io.confluent.ksql.execution.expression.tree.ColumnReferenceExp)10 ColumnName (io.confluent.ksql.name.ColumnName)9 LogicalSchema (io.confluent.ksql.schema.ksql.LogicalSchema)8 Expression (io.confluent.ksql.execution.expression.tree.Expression)7 UnqualifiedColumnReferenceExp (io.confluent.ksql.execution.expression.tree.UnqualifiedColumnReferenceExp)7 List (java.util.List)7 Set (java.util.Set)7 Collectors (java.util.stream.Collectors)7 SelectExpression (io.confluent.ksql.execution.plan.SelectExpression)6 Builder (io.confluent.ksql.schema.ksql.LogicalSchema.Builder)6 Optional (java.util.Optional)6 ExpressionTypeManager (io.confluent.ksql.execution.util.ExpressionTypeManager)5 SourceName (io.confluent.ksql.name.SourceName)5 Column (io.confluent.ksql.schema.ksql.Column)5 ColumnNames (io.confluent.ksql.schema.ksql.ColumnNames)5 SqlType (io.confluent.ksql.schema.ksql.types.SqlType)5 CodeGenRunner (io.confluent.ksql.execution.codegen.CodeGenRunner)4 QualifiedColumnReferenceExp (io.confluent.ksql.execution.expression.tree.QualifiedColumnReferenceExp)4 Iterables (com.google.common.collect.Iterables)3 AggregateAnalysisResult (io.confluent.ksql.analyzer.AggregateAnalysisResult)3