Search in sources :

Example 1 with Join

use of io.confluent.ksql.planner.JoinTree.Join in project ksql by confluentinc.

the class LogicalPlanner method buildJoin.

/**
 * @param root    the root of the Join Tree
 * @param prefix  the prefix to uniquely identify the plan node
 * @return the PlanNode representing this Join Tree
 */
private JoinNode buildJoin(final Join root, final String prefix, final boolean isWindowed) {
    final PlanNode preRepartitionLeft;
    if (root.getLeft() instanceof JoinTree.Join) {
        preRepartitionLeft = buildJoin((Join) root.getLeft(), prefix + "L_", isWindowed);
    } else {
        final JoinTree.Leaf leaf = (Leaf) root.getLeft();
        preRepartitionLeft = new DataSourceNode(new PlanNodeId("KafkaTopic_" + prefix + "Left"), leaf.getSource().getDataSource(), leaf.getSource().getAlias(), isWindowed, ksqlConfig);
    }
    final PlanNode preRepartitionRight;
    if (root.getRight() instanceof JoinTree.Join) {
        preRepartitionRight = buildJoin((Join) root.getRight(), prefix + "R_", isWindowed);
    } else {
        final JoinTree.Leaf leaf = (Leaf) root.getRight();
        preRepartitionRight = new DataSourceNode(new PlanNodeId("KafkaTopic_" + prefix + "Right"), leaf.getSource().getDataSource(), leaf.getSource().getAlias(), isWindowed, ksqlConfig);
    }
    final Optional<Expression> fkExpression = verifyJoin(root.getInfo(), preRepartitionLeft, preRepartitionRight);
    final JoinKey joinKey = fkExpression.map(columnReferenceExp -> buildForeignJoinKey(root, fkExpression.get())).orElseGet(() -> buildJoinKey(root));
    final PlanNode left = prepareSourceForJoin(root.getLeft(), preRepartitionLeft, prefix + "Left", root.getInfo().getLeftJoinExpression(), fkExpression.isPresent());
    final PlanNode right = prepareSourceForJoin(root.getRight(), preRepartitionRight, prefix + "Right", root.getInfo().getRightJoinExpression(), fkExpression.isPresent());
    return new JoinNode(new PlanNodeId(prefix + "Join"), root.getInfo().getType(), joinKey.rewriteWith(refRewriter::process), prefix.isEmpty(), left, right, root.getInfo().getWithinExpression(), ksqlConfig.getString(KsqlConfig.KSQL_DEFAULT_KEY_FORMAT_CONFIG));
}
Also used : PlanNodeId(io.confluent.ksql.planner.plan.PlanNodeId) JoinInfo(io.confluent.ksql.analyzer.Analysis.JoinInfo) DataSource(io.confluent.ksql.metastore.model.DataSource) Leaf(io.confluent.ksql.planner.JoinTree.Leaf) AggregateAnalysisResult(io.confluent.ksql.analyzer.AggregateAnalysisResult) Into(io.confluent.ksql.analyzer.Analysis.Into) ColumnName(io.confluent.ksql.name.ColumnName) SourceName(io.confluent.ksql.name.SourceName) BiFunction(java.util.function.BiFunction) AggregateAnalyzer(io.confluent.ksql.analyzer.AggregateAnalyzer) FilterNode(io.confluent.ksql.planner.plan.FilterNode) SerdeFeaturesFactory(io.confluent.ksql.serde.SerdeFeaturesFactory) JoinKey(io.confluent.ksql.planner.plan.JoinNode.JoinKey) CodeGenRunner(io.confluent.ksql.execution.codegen.CodeGenRunner) WindowInfo(io.confluent.ksql.serde.WindowInfo) RewrittenAnalysis(io.confluent.ksql.analyzer.RewrittenAnalysis) QueryLimitNode(io.confluent.ksql.planner.plan.QueryLimitNode) AggregateNode(io.confluent.ksql.planner.plan.AggregateNode) AliasedDataSource(io.confluent.ksql.analyzer.Analysis.AliasedDataSource) TimestampExtractionPolicyFactory(io.confluent.ksql.execution.streams.timestamp.TimestampExtractionPolicyFactory) ExpressionTypeManager(io.confluent.ksql.execution.util.ExpressionTypeManager) KsqlBareOutputNode(io.confluent.ksql.planner.plan.KsqlBareOutputNode) SelectionUtil(io.confluent.ksql.planner.plan.SelectionUtil) PartitionBy(io.confluent.ksql.parser.tree.PartitionBy) ColumnReferenceExp(io.confluent.ksql.execution.expression.tree.ColumnReferenceExp) PreJoinProjectNode(io.confluent.ksql.planner.plan.PreJoinProjectNode) VisitParentExpressionVisitor(io.confluent.ksql.execution.expression.tree.VisitParentExpressionVisitor) FinalProjectNode(io.confluent.ksql.planner.plan.FinalProjectNode) ColumnNames(io.confluent.ksql.schema.ksql.ColumnNames) RefinementInfo(io.confluent.ksql.serde.RefinementInfo) ImmutableAnalysis(io.confluent.ksql.analyzer.ImmutableAnalysis) ExpressionEvaluator(io.confluent.ksql.execution.transform.ExpressionEvaluator) Expression(io.confluent.ksql.execution.expression.tree.Expression) JoinType(io.confluent.ksql.planner.plan.JoinNode.JoinType) Set(java.util.Set) QueryFilterNode(io.confluent.ksql.planner.plan.QueryFilterNode) KsqlConfig(io.confluent.ksql.util.KsqlConfig) LogicalSchema(io.confluent.ksql.schema.ksql.LogicalSchema) Collectors(java.util.stream.Collectors) TimestampColumn(io.confluent.ksql.execution.timestamp.TimestampColumn) PlanNodeId(io.confluent.ksql.planner.plan.PlanNodeId) SingleSourcePlanNode(io.confluent.ksql.planner.plan.SingleSourcePlanNode) Builder(io.confluent.ksql.schema.ksql.LogicalSchema.Builder) Objects(java.util.Objects) Join(io.confluent.ksql.planner.JoinTree.Join) List(java.util.List) KsqlException(io.confluent.ksql.util.KsqlException) Optional(java.util.Optional) QueryProjectNode(io.confluent.ksql.planner.plan.QueryProjectNode) Column(io.confluent.ksql.schema.ksql.Column) FormatInfo(io.confluent.ksql.serde.FormatInfo) ProjectNode(io.confluent.ksql.planner.plan.ProjectNode) Iterables(com.google.common.collect.Iterables) FormatFactory(io.confluent.ksql.serde.FormatFactory) GrammaticalJoiner(io.confluent.ksql.util.GrammaticalJoiner) KeyFormat(io.confluent.ksql.serde.KeyFormat) JoinNode(io.confluent.ksql.planner.plan.JoinNode) SuppressNode(io.confluent.ksql.planner.plan.SuppressNode) UnqualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.UnqualifiedColumnReferenceExp) PartitionByParamsFactory(io.confluent.ksql.execution.streams.PartitionByParamsFactory) DataSourceType(io.confluent.ksql.metastore.model.DataSource.DataSourceType) Function(java.util.function.Function) NoneFormat(io.confluent.ksql.serde.none.NoneFormat) AsValue(io.confluent.ksql.function.udf.AsValue) QualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.QualifiedColumnReferenceExp) MetaStore(io.confluent.ksql.metastore.MetaStore) GroupBy(io.confluent.ksql.parser.tree.GroupBy) KsqlStructuredDataOutputNode(io.confluent.ksql.planner.plan.KsqlStructuredDataOutputNode) UserRepartitionNode(io.confluent.ksql.planner.plan.UserRepartitionNode) WindowExpression(io.confluent.ksql.parser.tree.WindowExpression) SqlType(io.confluent.ksql.schema.ksql.types.SqlType) SerdeFeatures(io.confluent.ksql.serde.SerdeFeatures) DataSourceNode(io.confluent.ksql.planner.plan.DataSourceNode) NewTopic(io.confluent.ksql.analyzer.Analysis.Into.NewTopic) KsqlWindowExpression(io.confluent.ksql.execution.windows.KsqlWindowExpression) OutputNode(io.confluent.ksql.planner.plan.OutputNode) FilterTypeValidator(io.confluent.ksql.analyzer.FilterTypeValidator) FlatMapNode(io.confluent.ksql.planner.plan.FlatMapNode) ValueFormat(io.confluent.ksql.serde.ValueFormat) SelectExpression(io.confluent.ksql.execution.plan.SelectExpression) NodeLocation(io.confluent.ksql.parser.NodeLocation) PreJoinRepartitionNode(io.confluent.ksql.planner.plan.PreJoinRepartitionNode) FunctionCall(io.confluent.ksql.execution.expression.tree.FunctionCall) KsqlTopic(io.confluent.ksql.execution.ddl.commands.KsqlTopic) PlanNode(io.confluent.ksql.planner.plan.PlanNode) Context(io.confluent.ksql.engine.rewrite.ExpressionTreeRewriter.Context) ExpressionTreeRewriter(io.confluent.ksql.engine.rewrite.ExpressionTreeRewriter) OutputRefinement(io.confluent.ksql.parser.OutputRefinement) FilterType(io.confluent.ksql.analyzer.FilterTypeValidator.FilterType) Collections(java.util.Collections) DataSourceNode(io.confluent.ksql.planner.plan.DataSourceNode) SingleSourcePlanNode(io.confluent.ksql.planner.plan.SingleSourcePlanNode) PlanNode(io.confluent.ksql.planner.plan.PlanNode) JoinKey(io.confluent.ksql.planner.plan.JoinNode.JoinKey) Expression(io.confluent.ksql.execution.expression.tree.Expression) WindowExpression(io.confluent.ksql.parser.tree.WindowExpression) KsqlWindowExpression(io.confluent.ksql.execution.windows.KsqlWindowExpression) SelectExpression(io.confluent.ksql.execution.plan.SelectExpression) JoinNode(io.confluent.ksql.planner.plan.JoinNode) Join(io.confluent.ksql.planner.JoinTree.Join) Leaf(io.confluent.ksql.planner.JoinTree.Leaf) Leaf(io.confluent.ksql.planner.JoinTree.Leaf)

Example 2 with Join

use of io.confluent.ksql.planner.JoinTree.Join in project ksql by confluentinc.

the class LogicalPlanner method buildForeignJoinKey.

private JoinKey buildForeignJoinKey(final Join join, final Expression foreignKeyExpression) {
    final AliasedDataSource leftSource = join.getInfo().getLeftSource();
    final SourceName alias = leftSource.getAlias();
    final List<QualifiedColumnReferenceExp> leftSourceKeys = leftSource.getDataSource().getSchema().key().stream().map(c -> new QualifiedColumnReferenceExp(alias, c.name())).collect(Collectors.toList());
    final VisitParentExpressionVisitor<Optional<Expression>, Context<Void>> aliasRewritter = new VisitParentExpressionVisitor<Optional<Expression>, Context<Void>>(Optional.empty()) {

        @Override
        public Optional<Expression> visitQualifiedColumnReference(final QualifiedColumnReferenceExp node, final Context<Void> ctx) {
            return Optional.of(new UnqualifiedColumnReferenceExp(ColumnNames.generatedJoinColumnAlias(node.getQualifier(), node.getColumnName())));
        }
    };
    final Expression aliasedForeignKeyExpression = ExpressionTreeRewriter.rewriteWith(aliasRewritter::process, foreignKeyExpression);
    return JoinKey.foreignKey(aliasedForeignKeyExpression, leftSourceKeys);
}
Also used : JoinInfo(io.confluent.ksql.analyzer.Analysis.JoinInfo) DataSource(io.confluent.ksql.metastore.model.DataSource) Leaf(io.confluent.ksql.planner.JoinTree.Leaf) AggregateAnalysisResult(io.confluent.ksql.analyzer.AggregateAnalysisResult) Into(io.confluent.ksql.analyzer.Analysis.Into) ColumnName(io.confluent.ksql.name.ColumnName) SourceName(io.confluent.ksql.name.SourceName) BiFunction(java.util.function.BiFunction) AggregateAnalyzer(io.confluent.ksql.analyzer.AggregateAnalyzer) FilterNode(io.confluent.ksql.planner.plan.FilterNode) SerdeFeaturesFactory(io.confluent.ksql.serde.SerdeFeaturesFactory) JoinKey(io.confluent.ksql.planner.plan.JoinNode.JoinKey) CodeGenRunner(io.confluent.ksql.execution.codegen.CodeGenRunner) WindowInfo(io.confluent.ksql.serde.WindowInfo) RewrittenAnalysis(io.confluent.ksql.analyzer.RewrittenAnalysis) QueryLimitNode(io.confluent.ksql.planner.plan.QueryLimitNode) AggregateNode(io.confluent.ksql.planner.plan.AggregateNode) AliasedDataSource(io.confluent.ksql.analyzer.Analysis.AliasedDataSource) TimestampExtractionPolicyFactory(io.confluent.ksql.execution.streams.timestamp.TimestampExtractionPolicyFactory) ExpressionTypeManager(io.confluent.ksql.execution.util.ExpressionTypeManager) KsqlBareOutputNode(io.confluent.ksql.planner.plan.KsqlBareOutputNode) SelectionUtil(io.confluent.ksql.planner.plan.SelectionUtil) PartitionBy(io.confluent.ksql.parser.tree.PartitionBy) ColumnReferenceExp(io.confluent.ksql.execution.expression.tree.ColumnReferenceExp) PreJoinProjectNode(io.confluent.ksql.planner.plan.PreJoinProjectNode) VisitParentExpressionVisitor(io.confluent.ksql.execution.expression.tree.VisitParentExpressionVisitor) FinalProjectNode(io.confluent.ksql.planner.plan.FinalProjectNode) ColumnNames(io.confluent.ksql.schema.ksql.ColumnNames) RefinementInfo(io.confluent.ksql.serde.RefinementInfo) ImmutableAnalysis(io.confluent.ksql.analyzer.ImmutableAnalysis) ExpressionEvaluator(io.confluent.ksql.execution.transform.ExpressionEvaluator) Expression(io.confluent.ksql.execution.expression.tree.Expression) JoinType(io.confluent.ksql.planner.plan.JoinNode.JoinType) Set(java.util.Set) QueryFilterNode(io.confluent.ksql.planner.plan.QueryFilterNode) KsqlConfig(io.confluent.ksql.util.KsqlConfig) LogicalSchema(io.confluent.ksql.schema.ksql.LogicalSchema) Collectors(java.util.stream.Collectors) TimestampColumn(io.confluent.ksql.execution.timestamp.TimestampColumn) PlanNodeId(io.confluent.ksql.planner.plan.PlanNodeId) SingleSourcePlanNode(io.confluent.ksql.planner.plan.SingleSourcePlanNode) Builder(io.confluent.ksql.schema.ksql.LogicalSchema.Builder) Objects(java.util.Objects) Join(io.confluent.ksql.planner.JoinTree.Join) List(java.util.List) KsqlException(io.confluent.ksql.util.KsqlException) Optional(java.util.Optional) QueryProjectNode(io.confluent.ksql.planner.plan.QueryProjectNode) Column(io.confluent.ksql.schema.ksql.Column) FormatInfo(io.confluent.ksql.serde.FormatInfo) ProjectNode(io.confluent.ksql.planner.plan.ProjectNode) Iterables(com.google.common.collect.Iterables) FormatFactory(io.confluent.ksql.serde.FormatFactory) GrammaticalJoiner(io.confluent.ksql.util.GrammaticalJoiner) KeyFormat(io.confluent.ksql.serde.KeyFormat) JoinNode(io.confluent.ksql.planner.plan.JoinNode) SuppressNode(io.confluent.ksql.planner.plan.SuppressNode) UnqualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.UnqualifiedColumnReferenceExp) PartitionByParamsFactory(io.confluent.ksql.execution.streams.PartitionByParamsFactory) DataSourceType(io.confluent.ksql.metastore.model.DataSource.DataSourceType) Function(java.util.function.Function) NoneFormat(io.confluent.ksql.serde.none.NoneFormat) AsValue(io.confluent.ksql.function.udf.AsValue) QualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.QualifiedColumnReferenceExp) MetaStore(io.confluent.ksql.metastore.MetaStore) GroupBy(io.confluent.ksql.parser.tree.GroupBy) KsqlStructuredDataOutputNode(io.confluent.ksql.planner.plan.KsqlStructuredDataOutputNode) UserRepartitionNode(io.confluent.ksql.planner.plan.UserRepartitionNode) WindowExpression(io.confluent.ksql.parser.tree.WindowExpression) SqlType(io.confluent.ksql.schema.ksql.types.SqlType) SerdeFeatures(io.confluent.ksql.serde.SerdeFeatures) DataSourceNode(io.confluent.ksql.planner.plan.DataSourceNode) NewTopic(io.confluent.ksql.analyzer.Analysis.Into.NewTopic) KsqlWindowExpression(io.confluent.ksql.execution.windows.KsqlWindowExpression) OutputNode(io.confluent.ksql.planner.plan.OutputNode) FilterTypeValidator(io.confluent.ksql.analyzer.FilterTypeValidator) FlatMapNode(io.confluent.ksql.planner.plan.FlatMapNode) ValueFormat(io.confluent.ksql.serde.ValueFormat) SelectExpression(io.confluent.ksql.execution.plan.SelectExpression) NodeLocation(io.confluent.ksql.parser.NodeLocation) PreJoinRepartitionNode(io.confluent.ksql.planner.plan.PreJoinRepartitionNode) FunctionCall(io.confluent.ksql.execution.expression.tree.FunctionCall) KsqlTopic(io.confluent.ksql.execution.ddl.commands.KsqlTopic) PlanNode(io.confluent.ksql.planner.plan.PlanNode) Context(io.confluent.ksql.engine.rewrite.ExpressionTreeRewriter.Context) ExpressionTreeRewriter(io.confluent.ksql.engine.rewrite.ExpressionTreeRewriter) OutputRefinement(io.confluent.ksql.parser.OutputRefinement) FilterType(io.confluent.ksql.analyzer.FilterTypeValidator.FilterType) Collections(java.util.Collections) Context(io.confluent.ksql.engine.rewrite.ExpressionTreeRewriter.Context) Optional(java.util.Optional) QualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.QualifiedColumnReferenceExp) SourceName(io.confluent.ksql.name.SourceName) VisitParentExpressionVisitor(io.confluent.ksql.execution.expression.tree.VisitParentExpressionVisitor) UnqualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.UnqualifiedColumnReferenceExp) AliasedDataSource(io.confluent.ksql.analyzer.Analysis.AliasedDataSource) Expression(io.confluent.ksql.execution.expression.tree.Expression) WindowExpression(io.confluent.ksql.parser.tree.WindowExpression) KsqlWindowExpression(io.confluent.ksql.execution.windows.KsqlWindowExpression) SelectExpression(io.confluent.ksql.execution.plan.SelectExpression)

Example 3 with Join

use of io.confluent.ksql.planner.JoinTree.Join in project ksql by confluentinc.

the class JoinTreeTest method handlesRightThreeWayJoin.

@Test
public void handlesRightThreeWayJoin() {
    // Given:
    when(j1.getLeftSource()).thenReturn(a);
    when(j1.getRightSource()).thenReturn(b);
    when(j2.getLeftSource()).thenReturn(c);
    when(j2.getRightSource()).thenReturn(a);
    when(j2.flip()).thenReturn(j2);
    final List<JoinInfo> joins = ImmutableList.of(j1, j2);
    // When:
    final Node root = JoinTree.build(joins);
    // Then:
    assertThat(root, instanceOf(Join.class));
    assertThat(root, is(new Join(new Join(new Leaf(a), new Leaf(b), j1), new Leaf(c), j2)));
}
Also used : JoinInfo(io.confluent.ksql.analyzer.Analysis.JoinInfo) Node(io.confluent.ksql.planner.JoinTree.Node) Join(io.confluent.ksql.planner.JoinTree.Join) Leaf(io.confluent.ksql.planner.JoinTree.Leaf) Test(org.junit.Test)

Example 4 with Join

use of io.confluent.ksql.planner.JoinTree.Join in project ksql by confluentinc.

the class LogicalPlanner method joinOnNonKeyAttribute.

private static boolean joinOnNonKeyAttribute(final Expression joinExpression, final PlanNode node, final AliasedDataSource aliasedDataSource) {
    if (!(joinExpression instanceof ColumnReferenceExp)) {
        return true;
    }
    final ColumnReferenceExp simpleJoinExpression = (ColumnReferenceExp) joinExpression;
    final ColumnName joinAttributeName = simpleJoinExpression.getColumnName();
    final List<DataSourceNode> dataSourceNodes = node.getSourceNodes().collect(Collectors.toList());
    final List<Column> keyColumns;
    // n-way join sub-tree (ie, not a leaf)
    if (isInnerNode(node)) {
        final DataSourceNode qualifiedNode;
        if (simpleJoinExpression.maybeQualifier().isPresent()) {
            final SourceName qualifierOrAlias = simpleJoinExpression.maybeQualifier().get();
            final SourceName qualifier;
            if (aliasedDataSource.getAlias().equals(qualifierOrAlias)) {
                qualifier = aliasedDataSource.getDataSource().getName();
            } else {
                qualifier = qualifierOrAlias;
            }
            final List<DataSourceNode> allNodes = dataSourceNodes.stream().filter(n -> n.getDataSource().getName().equals(qualifier)).collect(Collectors.toList());
            if (allNodes.size() != 1) {
                throw new KsqlException(String.format("Join qualifier '%s' could not be resolved (either not found or not unique).", qualifier));
            }
            qualifiedNode = Iterables.getOnlyElement(allNodes);
        } else {
            final List<DataSourceNode> allNodes = dataSourceNodes.stream().filter(n -> n.getSchema().findColumn(simpleJoinExpression.getColumnName()).isPresent()).collect(Collectors.toList());
            if (allNodes.size() != 1) {
                throw new KsqlException(String.format("Join identifier '%s' could not be resolved (either not found or not unique).", joinAttributeName));
            }
            qualifiedNode = Iterables.getOnlyElement(allNodes);
        }
        keyColumns = qualifiedNode.getSchema().key();
    } else {
        // leaf node: we know we have single data source
        keyColumns = Iterables.getOnlyElement(dataSourceNodes).getSchema().key();
    }
    // - thus, if the key has more than one column, the join is not on the key
    if (keyColumns.size() > 1) {
        return true;
    }
    return !joinAttributeName.equals(Iterables.getOnlyElement(keyColumns).name());
}
Also used : JoinInfo(io.confluent.ksql.analyzer.Analysis.JoinInfo) DataSource(io.confluent.ksql.metastore.model.DataSource) Leaf(io.confluent.ksql.planner.JoinTree.Leaf) AggregateAnalysisResult(io.confluent.ksql.analyzer.AggregateAnalysisResult) Into(io.confluent.ksql.analyzer.Analysis.Into) ColumnName(io.confluent.ksql.name.ColumnName) SourceName(io.confluent.ksql.name.SourceName) BiFunction(java.util.function.BiFunction) AggregateAnalyzer(io.confluent.ksql.analyzer.AggregateAnalyzer) FilterNode(io.confluent.ksql.planner.plan.FilterNode) SerdeFeaturesFactory(io.confluent.ksql.serde.SerdeFeaturesFactory) JoinKey(io.confluent.ksql.planner.plan.JoinNode.JoinKey) CodeGenRunner(io.confluent.ksql.execution.codegen.CodeGenRunner) WindowInfo(io.confluent.ksql.serde.WindowInfo) RewrittenAnalysis(io.confluent.ksql.analyzer.RewrittenAnalysis) QueryLimitNode(io.confluent.ksql.planner.plan.QueryLimitNode) AggregateNode(io.confluent.ksql.planner.plan.AggregateNode) AliasedDataSource(io.confluent.ksql.analyzer.Analysis.AliasedDataSource) TimestampExtractionPolicyFactory(io.confluent.ksql.execution.streams.timestamp.TimestampExtractionPolicyFactory) ExpressionTypeManager(io.confluent.ksql.execution.util.ExpressionTypeManager) KsqlBareOutputNode(io.confluent.ksql.planner.plan.KsqlBareOutputNode) SelectionUtil(io.confluent.ksql.planner.plan.SelectionUtil) PartitionBy(io.confluent.ksql.parser.tree.PartitionBy) ColumnReferenceExp(io.confluent.ksql.execution.expression.tree.ColumnReferenceExp) PreJoinProjectNode(io.confluent.ksql.planner.plan.PreJoinProjectNode) VisitParentExpressionVisitor(io.confluent.ksql.execution.expression.tree.VisitParentExpressionVisitor) FinalProjectNode(io.confluent.ksql.planner.plan.FinalProjectNode) ColumnNames(io.confluent.ksql.schema.ksql.ColumnNames) RefinementInfo(io.confluent.ksql.serde.RefinementInfo) ImmutableAnalysis(io.confluent.ksql.analyzer.ImmutableAnalysis) ExpressionEvaluator(io.confluent.ksql.execution.transform.ExpressionEvaluator) Expression(io.confluent.ksql.execution.expression.tree.Expression) JoinType(io.confluent.ksql.planner.plan.JoinNode.JoinType) Set(java.util.Set) QueryFilterNode(io.confluent.ksql.planner.plan.QueryFilterNode) KsqlConfig(io.confluent.ksql.util.KsqlConfig) LogicalSchema(io.confluent.ksql.schema.ksql.LogicalSchema) Collectors(java.util.stream.Collectors) TimestampColumn(io.confluent.ksql.execution.timestamp.TimestampColumn) PlanNodeId(io.confluent.ksql.planner.plan.PlanNodeId) SingleSourcePlanNode(io.confluent.ksql.planner.plan.SingleSourcePlanNode) Builder(io.confluent.ksql.schema.ksql.LogicalSchema.Builder) Objects(java.util.Objects) Join(io.confluent.ksql.planner.JoinTree.Join) List(java.util.List) KsqlException(io.confluent.ksql.util.KsqlException) Optional(java.util.Optional) QueryProjectNode(io.confluent.ksql.planner.plan.QueryProjectNode) Column(io.confluent.ksql.schema.ksql.Column) FormatInfo(io.confluent.ksql.serde.FormatInfo) ProjectNode(io.confluent.ksql.planner.plan.ProjectNode) Iterables(com.google.common.collect.Iterables) FormatFactory(io.confluent.ksql.serde.FormatFactory) GrammaticalJoiner(io.confluent.ksql.util.GrammaticalJoiner) KeyFormat(io.confluent.ksql.serde.KeyFormat) JoinNode(io.confluent.ksql.planner.plan.JoinNode) SuppressNode(io.confluent.ksql.planner.plan.SuppressNode) UnqualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.UnqualifiedColumnReferenceExp) PartitionByParamsFactory(io.confluent.ksql.execution.streams.PartitionByParamsFactory) DataSourceType(io.confluent.ksql.metastore.model.DataSource.DataSourceType) Function(java.util.function.Function) NoneFormat(io.confluent.ksql.serde.none.NoneFormat) AsValue(io.confluent.ksql.function.udf.AsValue) QualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.QualifiedColumnReferenceExp) MetaStore(io.confluent.ksql.metastore.MetaStore) GroupBy(io.confluent.ksql.parser.tree.GroupBy) KsqlStructuredDataOutputNode(io.confluent.ksql.planner.plan.KsqlStructuredDataOutputNode) UserRepartitionNode(io.confluent.ksql.planner.plan.UserRepartitionNode) WindowExpression(io.confluent.ksql.parser.tree.WindowExpression) SqlType(io.confluent.ksql.schema.ksql.types.SqlType) SerdeFeatures(io.confluent.ksql.serde.SerdeFeatures) DataSourceNode(io.confluent.ksql.planner.plan.DataSourceNode) NewTopic(io.confluent.ksql.analyzer.Analysis.Into.NewTopic) KsqlWindowExpression(io.confluent.ksql.execution.windows.KsqlWindowExpression) OutputNode(io.confluent.ksql.planner.plan.OutputNode) FilterTypeValidator(io.confluent.ksql.analyzer.FilterTypeValidator) FlatMapNode(io.confluent.ksql.planner.plan.FlatMapNode) ValueFormat(io.confluent.ksql.serde.ValueFormat) SelectExpression(io.confluent.ksql.execution.plan.SelectExpression) NodeLocation(io.confluent.ksql.parser.NodeLocation) PreJoinRepartitionNode(io.confluent.ksql.planner.plan.PreJoinRepartitionNode) FunctionCall(io.confluent.ksql.execution.expression.tree.FunctionCall) KsqlTopic(io.confluent.ksql.execution.ddl.commands.KsqlTopic) PlanNode(io.confluent.ksql.planner.plan.PlanNode) Context(io.confluent.ksql.engine.rewrite.ExpressionTreeRewriter.Context) ExpressionTreeRewriter(io.confluent.ksql.engine.rewrite.ExpressionTreeRewriter) OutputRefinement(io.confluent.ksql.parser.OutputRefinement) FilterType(io.confluent.ksql.analyzer.FilterTypeValidator.FilterType) Collections(java.util.Collections) ColumnReferenceExp(io.confluent.ksql.execution.expression.tree.ColumnReferenceExp) UnqualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.UnqualifiedColumnReferenceExp) QualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.QualifiedColumnReferenceExp) ColumnName(io.confluent.ksql.name.ColumnName) DataSourceNode(io.confluent.ksql.planner.plan.DataSourceNode) TimestampColumn(io.confluent.ksql.execution.timestamp.TimestampColumn) Column(io.confluent.ksql.schema.ksql.Column) SourceName(io.confluent.ksql.name.SourceName) KsqlException(io.confluent.ksql.util.KsqlException)

Example 5 with Join

use of io.confluent.ksql.planner.JoinTree.Join in project ksql by confluentinc.

the class LogicalPlanner method buildAggregateSchema.

private LogicalSchema buildAggregateSchema(final PlanNode sourcePlanNode, final GroupBy groupBy, final List<SelectExpression> projectionExpressions) {
    final LogicalSchema sourceSchema = sourcePlanNode.getSchema();
    final LogicalSchema projectionSchema = SelectionUtil.buildProjectionSchema(sourceSchema.withPseudoAndKeyColsInValue(analysis.getWindowExpression().isPresent(), ksqlConfig), projectionExpressions, metaStore);
    final List<Expression> groupByExps = groupBy.getGroupingExpressions();
    final Function<Expression, Optional<ColumnName>> selectResolver = expression -> {
        final List<ColumnName> foundInProjection = projectionExpressions.stream().filter(e -> e.getExpression().equals(expression)).map(SelectExpression::getAlias).collect(Collectors.toList());
        switch(foundInProjection.size()) {
            case 0:
                return Optional.empty();
            case 1:
                return Optional.of(foundInProjection.get(0));
            default:
                final String keys = GrammaticalJoiner.and().join(foundInProjection);
                throw new KsqlException("The projection contains a key column more than once: " + keys + "." + System.lineSeparator() + "Each key column must only be in the projection once. " + "If you intended to copy the key into the value, then consider using the " + AsValue.NAME + " function to indicate which key reference should be copied.");
        }
    };
    final List<Column> valueColumns;
    if (analysis.getInto().isPresent()) {
        // Persistent query:
        final Set<ColumnName> keyColumnNames = groupBy.getGroupingExpressions().stream().map(selectResolver).filter(Optional::isPresent).map(Optional::get).collect(Collectors.toSet());
        valueColumns = projectionSchema.value().stream().filter(col -> !keyColumnNames.contains(col.name())).collect(Collectors.toList());
        if (valueColumns.isEmpty()) {
            throw new KsqlException("The projection contains no value columns.");
        }
    } else {
        // Transient query:
        // Transient queries only return value columns, so must have key columns in the value:
        valueColumns = projectionSchema.columns();
    }
    final Builder builder = LogicalSchema.builder();
    final ExpressionTypeManager typeManager = new ExpressionTypeManager(sourceSchema, metaStore);
    for (final Expression expression : groupByExps) {
        final SqlType keyType = typeManager.getExpressionSqlType(expression);
        final ColumnName keyName = selectResolver.apply(expression).orElseGet(() -> expression instanceof ColumnReferenceExp ? ((ColumnReferenceExp) expression).getColumnName() : ColumnNames.uniqueAliasFor(expression, sourceSchema));
        builder.keyColumn(keyName, keyType);
    }
    return builder.valueColumns(valueColumns).build();
}
Also used : JoinInfo(io.confluent.ksql.analyzer.Analysis.JoinInfo) DataSource(io.confluent.ksql.metastore.model.DataSource) Leaf(io.confluent.ksql.planner.JoinTree.Leaf) AggregateAnalysisResult(io.confluent.ksql.analyzer.AggregateAnalysisResult) Into(io.confluent.ksql.analyzer.Analysis.Into) ColumnName(io.confluent.ksql.name.ColumnName) SourceName(io.confluent.ksql.name.SourceName) BiFunction(java.util.function.BiFunction) AggregateAnalyzer(io.confluent.ksql.analyzer.AggregateAnalyzer) FilterNode(io.confluent.ksql.planner.plan.FilterNode) SerdeFeaturesFactory(io.confluent.ksql.serde.SerdeFeaturesFactory) JoinKey(io.confluent.ksql.planner.plan.JoinNode.JoinKey) CodeGenRunner(io.confluent.ksql.execution.codegen.CodeGenRunner) WindowInfo(io.confluent.ksql.serde.WindowInfo) RewrittenAnalysis(io.confluent.ksql.analyzer.RewrittenAnalysis) QueryLimitNode(io.confluent.ksql.planner.plan.QueryLimitNode) AggregateNode(io.confluent.ksql.planner.plan.AggregateNode) AliasedDataSource(io.confluent.ksql.analyzer.Analysis.AliasedDataSource) TimestampExtractionPolicyFactory(io.confluent.ksql.execution.streams.timestamp.TimestampExtractionPolicyFactory) ExpressionTypeManager(io.confluent.ksql.execution.util.ExpressionTypeManager) KsqlBareOutputNode(io.confluent.ksql.planner.plan.KsqlBareOutputNode) SelectionUtil(io.confluent.ksql.planner.plan.SelectionUtil) PartitionBy(io.confluent.ksql.parser.tree.PartitionBy) ColumnReferenceExp(io.confluent.ksql.execution.expression.tree.ColumnReferenceExp) PreJoinProjectNode(io.confluent.ksql.planner.plan.PreJoinProjectNode) VisitParentExpressionVisitor(io.confluent.ksql.execution.expression.tree.VisitParentExpressionVisitor) FinalProjectNode(io.confluent.ksql.planner.plan.FinalProjectNode) ColumnNames(io.confluent.ksql.schema.ksql.ColumnNames) RefinementInfo(io.confluent.ksql.serde.RefinementInfo) ImmutableAnalysis(io.confluent.ksql.analyzer.ImmutableAnalysis) ExpressionEvaluator(io.confluent.ksql.execution.transform.ExpressionEvaluator) Expression(io.confluent.ksql.execution.expression.tree.Expression) JoinType(io.confluent.ksql.planner.plan.JoinNode.JoinType) Set(java.util.Set) QueryFilterNode(io.confluent.ksql.planner.plan.QueryFilterNode) KsqlConfig(io.confluent.ksql.util.KsqlConfig) LogicalSchema(io.confluent.ksql.schema.ksql.LogicalSchema) Collectors(java.util.stream.Collectors) TimestampColumn(io.confluent.ksql.execution.timestamp.TimestampColumn) PlanNodeId(io.confluent.ksql.planner.plan.PlanNodeId) SingleSourcePlanNode(io.confluent.ksql.planner.plan.SingleSourcePlanNode) Builder(io.confluent.ksql.schema.ksql.LogicalSchema.Builder) Objects(java.util.Objects) Join(io.confluent.ksql.planner.JoinTree.Join) List(java.util.List) KsqlException(io.confluent.ksql.util.KsqlException) Optional(java.util.Optional) QueryProjectNode(io.confluent.ksql.planner.plan.QueryProjectNode) Column(io.confluent.ksql.schema.ksql.Column) FormatInfo(io.confluent.ksql.serde.FormatInfo) ProjectNode(io.confluent.ksql.planner.plan.ProjectNode) Iterables(com.google.common.collect.Iterables) FormatFactory(io.confluent.ksql.serde.FormatFactory) GrammaticalJoiner(io.confluent.ksql.util.GrammaticalJoiner) KeyFormat(io.confluent.ksql.serde.KeyFormat) JoinNode(io.confluent.ksql.planner.plan.JoinNode) SuppressNode(io.confluent.ksql.planner.plan.SuppressNode) UnqualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.UnqualifiedColumnReferenceExp) PartitionByParamsFactory(io.confluent.ksql.execution.streams.PartitionByParamsFactory) DataSourceType(io.confluent.ksql.metastore.model.DataSource.DataSourceType) Function(java.util.function.Function) NoneFormat(io.confluent.ksql.serde.none.NoneFormat) AsValue(io.confluent.ksql.function.udf.AsValue) QualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.QualifiedColumnReferenceExp) MetaStore(io.confluent.ksql.metastore.MetaStore) GroupBy(io.confluent.ksql.parser.tree.GroupBy) KsqlStructuredDataOutputNode(io.confluent.ksql.planner.plan.KsqlStructuredDataOutputNode) UserRepartitionNode(io.confluent.ksql.planner.plan.UserRepartitionNode) WindowExpression(io.confluent.ksql.parser.tree.WindowExpression) SqlType(io.confluent.ksql.schema.ksql.types.SqlType) SerdeFeatures(io.confluent.ksql.serde.SerdeFeatures) DataSourceNode(io.confluent.ksql.planner.plan.DataSourceNode) NewTopic(io.confluent.ksql.analyzer.Analysis.Into.NewTopic) KsqlWindowExpression(io.confluent.ksql.execution.windows.KsqlWindowExpression) OutputNode(io.confluent.ksql.planner.plan.OutputNode) FilterTypeValidator(io.confluent.ksql.analyzer.FilterTypeValidator) FlatMapNode(io.confluent.ksql.planner.plan.FlatMapNode) ValueFormat(io.confluent.ksql.serde.ValueFormat) SelectExpression(io.confluent.ksql.execution.plan.SelectExpression) NodeLocation(io.confluent.ksql.parser.NodeLocation) PreJoinRepartitionNode(io.confluent.ksql.planner.plan.PreJoinRepartitionNode) FunctionCall(io.confluent.ksql.execution.expression.tree.FunctionCall) KsqlTopic(io.confluent.ksql.execution.ddl.commands.KsqlTopic) PlanNode(io.confluent.ksql.planner.plan.PlanNode) Context(io.confluent.ksql.engine.rewrite.ExpressionTreeRewriter.Context) ExpressionTreeRewriter(io.confluent.ksql.engine.rewrite.ExpressionTreeRewriter) OutputRefinement(io.confluent.ksql.parser.OutputRefinement) FilterType(io.confluent.ksql.analyzer.FilterTypeValidator.FilterType) Collections(java.util.Collections) ExpressionTypeManager(io.confluent.ksql.execution.util.ExpressionTypeManager) Optional(java.util.Optional) Builder(io.confluent.ksql.schema.ksql.LogicalSchema.Builder) LogicalSchema(io.confluent.ksql.schema.ksql.LogicalSchema) SelectExpression(io.confluent.ksql.execution.plan.SelectExpression) KsqlException(io.confluent.ksql.util.KsqlException) ColumnName(io.confluent.ksql.name.ColumnName) ColumnReferenceExp(io.confluent.ksql.execution.expression.tree.ColumnReferenceExp) UnqualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.UnqualifiedColumnReferenceExp) QualifiedColumnReferenceExp(io.confluent.ksql.execution.expression.tree.QualifiedColumnReferenceExp) Expression(io.confluent.ksql.execution.expression.tree.Expression) WindowExpression(io.confluent.ksql.parser.tree.WindowExpression) KsqlWindowExpression(io.confluent.ksql.execution.windows.KsqlWindowExpression) SelectExpression(io.confluent.ksql.execution.plan.SelectExpression) TimestampColumn(io.confluent.ksql.execution.timestamp.TimestampColumn) Column(io.confluent.ksql.schema.ksql.Column) List(java.util.List) SqlType(io.confluent.ksql.schema.ksql.types.SqlType)

Aggregations

JoinInfo (io.confluent.ksql.analyzer.Analysis.JoinInfo)7 Join (io.confluent.ksql.planner.JoinTree.Join)7 Leaf (io.confluent.ksql.planner.JoinTree.Leaf)7 Iterables (com.google.common.collect.Iterables)4 AggregateAnalysisResult (io.confluent.ksql.analyzer.AggregateAnalysisResult)4 AggregateAnalyzer (io.confluent.ksql.analyzer.AggregateAnalyzer)4 AliasedDataSource (io.confluent.ksql.analyzer.Analysis.AliasedDataSource)4 Into (io.confluent.ksql.analyzer.Analysis.Into)4 NewTopic (io.confluent.ksql.analyzer.Analysis.Into.NewTopic)4 FilterTypeValidator (io.confluent.ksql.analyzer.FilterTypeValidator)4 FilterType (io.confluent.ksql.analyzer.FilterTypeValidator.FilterType)4 ImmutableAnalysis (io.confluent.ksql.analyzer.ImmutableAnalysis)4 RewrittenAnalysis (io.confluent.ksql.analyzer.RewrittenAnalysis)4 ExpressionTreeRewriter (io.confluent.ksql.engine.rewrite.ExpressionTreeRewriter)4 Context (io.confluent.ksql.engine.rewrite.ExpressionTreeRewriter.Context)4 CodeGenRunner (io.confluent.ksql.execution.codegen.CodeGenRunner)4 KsqlTopic (io.confluent.ksql.execution.ddl.commands.KsqlTopic)4 ColumnReferenceExp (io.confluent.ksql.execution.expression.tree.ColumnReferenceExp)4 Expression (io.confluent.ksql.execution.expression.tree.Expression)4 FunctionCall (io.confluent.ksql.execution.expression.tree.FunctionCall)4