use of io.confluent.ksql.execution.expression.tree.ColumnReferenceExp in project ksql by confluentinc.
the class Repartitioning method repartitionNeeded.
/**
* Determine if a repartition is needed.
*
* <p>A repartition is only not required if partitioning by the existing key columns.
*
* @param schema the schema of the data before any repartition
* @param partitionBy the expressions to partition by
* @return {@code true} if a repartition is needed.
*/
public static boolean repartitionNeeded(final LogicalSchema schema, final List<Expression> partitionBy) {
if (schema.key().isEmpty()) {
// No current key, so repartition needed:
return true;
}
// do end up supporting this (we'll have to change the logic here)
if (schema.key().size() != 1) {
return true;
}
if (partitionBy.size() != schema.key().size()) {
// Different number of expressions to keys means it must be a repartition:
return true;
}
final Expression expression = partitionBy.get(0);
if (!(expression instanceof ColumnReferenceExp)) {
// If expression is not a column reference then the key will be changing
return true;
}
final ColumnName newKeyColName = ((ColumnReferenceExp) expression).getColumnName();
return !newKeyColName.equals(schema.key().get(0).name());
}
use of io.confluent.ksql.execution.expression.tree.ColumnReferenceExp in project ksql by confluentinc.
the class LogicalPlanner method joinOnNonKeyAttribute.
private static boolean joinOnNonKeyAttribute(final Expression joinExpression, final PlanNode node, final AliasedDataSource aliasedDataSource) {
if (!(joinExpression instanceof ColumnReferenceExp)) {
return true;
}
final ColumnReferenceExp simpleJoinExpression = (ColumnReferenceExp) joinExpression;
final ColumnName joinAttributeName = simpleJoinExpression.getColumnName();
final List<DataSourceNode> dataSourceNodes = node.getSourceNodes().collect(Collectors.toList());
final List<Column> keyColumns;
// n-way join sub-tree (ie, not a leaf)
if (isInnerNode(node)) {
final DataSourceNode qualifiedNode;
if (simpleJoinExpression.maybeQualifier().isPresent()) {
final SourceName qualifierOrAlias = simpleJoinExpression.maybeQualifier().get();
final SourceName qualifier;
if (aliasedDataSource.getAlias().equals(qualifierOrAlias)) {
qualifier = aliasedDataSource.getDataSource().getName();
} else {
qualifier = qualifierOrAlias;
}
final List<DataSourceNode> allNodes = dataSourceNodes.stream().filter(n -> n.getDataSource().getName().equals(qualifier)).collect(Collectors.toList());
if (allNodes.size() != 1) {
throw new KsqlException(String.format("Join qualifier '%s' could not be resolved (either not found or not unique).", qualifier));
}
qualifiedNode = Iterables.getOnlyElement(allNodes);
} else {
final List<DataSourceNode> allNodes = dataSourceNodes.stream().filter(n -> n.getSchema().findColumn(simpleJoinExpression.getColumnName()).isPresent()).collect(Collectors.toList());
if (allNodes.size() != 1) {
throw new KsqlException(String.format("Join identifier '%s' could not be resolved (either not found or not unique).", joinAttributeName));
}
qualifiedNode = Iterables.getOnlyElement(allNodes);
}
keyColumns = qualifiedNode.getSchema().key();
} else {
// leaf node: we know we have single data source
keyColumns = Iterables.getOnlyElement(dataSourceNodes).getSchema().key();
}
// - thus, if the key has more than one column, the join is not on the key
if (keyColumns.size() > 1) {
return true;
}
return !joinAttributeName.equals(Iterables.getOnlyElement(keyColumns).name());
}
use of io.confluent.ksql.execution.expression.tree.ColumnReferenceExp in project ksql by confluentinc.
the class LogicalPlanner method buildAggregateSchema.
private LogicalSchema buildAggregateSchema(final PlanNode sourcePlanNode, final GroupBy groupBy, final List<SelectExpression> projectionExpressions) {
final LogicalSchema sourceSchema = sourcePlanNode.getSchema();
final LogicalSchema projectionSchema = SelectionUtil.buildProjectionSchema(sourceSchema.withPseudoAndKeyColsInValue(analysis.getWindowExpression().isPresent(), ksqlConfig), projectionExpressions, metaStore);
final List<Expression> groupByExps = groupBy.getGroupingExpressions();
final Function<Expression, Optional<ColumnName>> selectResolver = expression -> {
final List<ColumnName> foundInProjection = projectionExpressions.stream().filter(e -> e.getExpression().equals(expression)).map(SelectExpression::getAlias).collect(Collectors.toList());
switch(foundInProjection.size()) {
case 0:
return Optional.empty();
case 1:
return Optional.of(foundInProjection.get(0));
default:
final String keys = GrammaticalJoiner.and().join(foundInProjection);
throw new KsqlException("The projection contains a key column more than once: " + keys + "." + System.lineSeparator() + "Each key column must only be in the projection once. " + "If you intended to copy the key into the value, then consider using the " + AsValue.NAME + " function to indicate which key reference should be copied.");
}
};
final List<Column> valueColumns;
if (analysis.getInto().isPresent()) {
// Persistent query:
final Set<ColumnName> keyColumnNames = groupBy.getGroupingExpressions().stream().map(selectResolver).filter(Optional::isPresent).map(Optional::get).collect(Collectors.toSet());
valueColumns = projectionSchema.value().stream().filter(col -> !keyColumnNames.contains(col.name())).collect(Collectors.toList());
if (valueColumns.isEmpty()) {
throw new KsqlException("The projection contains no value columns.");
}
} else {
// Transient query:
// Transient queries only return value columns, so must have key columns in the value:
valueColumns = projectionSchema.columns();
}
final Builder builder = LogicalSchema.builder();
final ExpressionTypeManager typeManager = new ExpressionTypeManager(sourceSchema, metaStore);
for (final Expression expression : groupByExps) {
final SqlType keyType = typeManager.getExpressionSqlType(expression);
final ColumnName keyName = selectResolver.apply(expression).orElseGet(() -> expression instanceof ColumnReferenceExp ? ((ColumnReferenceExp) expression).getColumnName() : ColumnNames.uniqueAliasFor(expression, sourceSchema));
builder.keyColumn(keyName, keyType);
}
return builder.valueColumns(valueColumns).build();
}
use of io.confluent.ksql.execution.expression.tree.ColumnReferenceExp in project ksql by confluentinc.
the class PreJoinProjectNode method validateColumns.
@Override
protected Set<ColumnReferenceExp> validateColumns(final RequiredColumns requiredColumns) {
final List<? extends ColumnReferenceExp> aliased = requiredColumns.get().stream().filter(columnRef -> columnRef instanceof UnqualifiedColumnReferenceExp).filter(columnRef -> aliases.inverse().containsKey(columnRef.getColumnName())).collect(Collectors.toList());
final Builder builder = requiredColumns.asBuilder();
aliased.forEach(columnRef -> {
builder.remove(columnRef);
builder.add(new UnqualifiedColumnReferenceExp(columnRef.getLocation(), aliases.inverse().get(columnRef.getColumnName())));
});
return super.validateColumns(builder.build());
}
use of io.confluent.ksql.execution.expression.tree.ColumnReferenceExp in project ksql by confluentinc.
the class SelectionUtil method buildProjectionSchema.
/*
* The algorithm behind this method feels unnecessarily complicated and is begging
* for someone to come along and improve it, but until that time here is
* a description of what's going on.
*
* Essentially, we need to build a logical schema that mirrors the physical
* schema until https://github.com/confluentinc/ksql/issues/6374 is addressed.
* That means that the keys must be ordered in the same way as the parent schema
* (e.g. if the source schema was K1 INT KEY, K2 INT KEY and the projection is
* SELECT K2, K1 this method will produce an output schema that is K1, K2
* despite the way that the keys were ordered in the projection) - see
* https://github.com/confluentinc/ksql/pull/7477 for context on the bug.
*
* But we cannot simply select all the keys and then the values, we must maintain
* the interleaving of key and values because transient queries return all columns
* to the user as "value columns". If someone issues a SELECT VALUE, * FROM FOO
* it is expected that VALUE shows up _before_ the key fields. This means we need to
* reorder the key columns within the list of projections without affecting the
* relative order the keys/values.
*
* To spice things up even further, there's the possibility that the same key is
* aliased multiple times (SELECT K1 AS X, K2 AS Y FROM ...), which is not supported
* but is verified later when building the final projection - so we maintain it here.
*
* Now on to the algorithm itself: we make two passes through the list of projections.
* The first pass builds a mapping from source key to all the projections for that key.
* We will use this mapping to sort the keys in the second pass. This mapping is two
* dimensional to address the possibility of the same key with multiple aliases.
*
* The second pass goes through the list of projections again and builds the logical schema,
* but this time if we encounter a projection that references a key column, we instead take
* it from the list we built in the first pass (in order defined by the parent schema).
*/
public static LogicalSchema buildProjectionSchema(final LogicalSchema parentSchema, final List<SelectExpression> projection, final FunctionRegistry functionRegistry) {
final ExpressionTypeManager expressionTypeManager = new ExpressionTypeManager(parentSchema, functionRegistry);
// keyExpressions[i] represents the expressions found in projection
// that are associated with parentSchema's key at index i
final List<List<SelectExpression>> keyExpressions = new ArrayList<>(parentSchema.key().size());
for (int i = 0; i < parentSchema.key().size(); i++) {
keyExpressions.add(new ArrayList<>());
}
// first pass to construct keyExpressions, keyExpressionMembership
// is just a convenience data structure so that we don't have to do
// the isKey check in the second iteration below
final Set<SelectExpression> keyExpressionMembership = new HashSet<>();
for (final SelectExpression select : projection) {
final Expression expression = select.getExpression();
if (expression instanceof ColumnReferenceExp) {
final ColumnName name = ((ColumnReferenceExp) expression).getColumnName();
parentSchema.findColumn(name).filter(c -> c.namespace() == Namespace.KEY).ifPresent(c -> {
keyExpressions.get(c.index()).add(select);
keyExpressionMembership.add(select);
});
}
}
// second pass, which iterates the projections but ignores any key expressions,
// instead taking them from the ordered keyExpressions list
final Builder builder = LogicalSchema.builder();
int currKeyIdx = 0;
for (final SelectExpression select : projection) {
if (keyExpressionMembership.contains(select)) {
while (keyExpressions.get(currKeyIdx).isEmpty()) {
currKeyIdx++;
}
final SelectExpression keyExp = keyExpressions.get(currKeyIdx).remove(0);
final SqlType type = expressionTypeManager.getExpressionSqlType(keyExp.getExpression());
builder.keyColumn(keyExp.getAlias(), type);
} else {
final Expression expression = select.getExpression();
final SqlType type = expressionTypeManager.getExpressionSqlType(expression);
if (type == null) {
throw new IllegalArgumentException("Can't infer a type of null. Please explicitly cast " + "it to a required type, e.g. CAST(null AS VARCHAR).");
}
builder.valueColumn(select.getAlias(), type);
}
}
return builder.build();
}
Aggregations