use of io.trino.util.DisjointSet in project trino by trinodb.
the class FilterStatsCalculator method extractCorrelatedGroups.
private static List<List<Expression>> extractCorrelatedGroups(List<Expression> terms, double filterConjunctionIndependenceFactor) {
if (filterConjunctionIndependenceFactor == 1) {
// Allows the filters to be estimated as if there is no correlation between any of the terms
return ImmutableList.of(terms);
}
ListMultimap<Expression, Symbol> expressionUniqueSymbols = ArrayListMultimap.create();
terms.forEach(expression -> expressionUniqueSymbols.putAll(expression, extractUnique(expression)));
// Partition symbols into disjoint sets such that the symbols belonging to different disjoint sets
// do not appear together in any expression.
DisjointSet<Symbol> symbolsPartitioner = new DisjointSet<>();
for (Expression term : terms) {
List<Symbol> expressionSymbols = expressionUniqueSymbols.get(term);
if (expressionSymbols.isEmpty()) {
continue;
}
// Ensure that symbol is added to DisjointSet when there is only one symbol in the list
symbolsPartitioner.find(expressionSymbols.get(0));
for (int i = 1; i < expressionSymbols.size(); i++) {
symbolsPartitioner.findAndUnion(expressionSymbols.get(0), expressionSymbols.get(i));
}
}
// Use disjoint sets of symbols to partition the given list of expressions
List<Set<Symbol>> symbolPartitions = ImmutableList.copyOf(symbolsPartitioner.getEquivalentClasses());
checkState(symbolPartitions.size() <= terms.size(), "symbolPartitions size exceeds number of expressions");
ListMultimap<Integer, Expression> expressionPartitions = ArrayListMultimap.create();
for (Expression term : terms) {
List<Symbol> expressionSymbols = expressionUniqueSymbols.get(term);
int expressionPartitionId;
if (expressionSymbols.isEmpty()) {
// For expressions with no symbols
expressionPartitionId = symbolPartitions.size();
} else {
// Lookup any symbol to find the partition id
Symbol symbol = expressionSymbols.get(0);
expressionPartitionId = IntStream.range(0, symbolPartitions.size()).filter(partition -> symbolPartitions.get(partition).contains(symbol)).findFirst().orElseThrow();
}
expressionPartitions.put(expressionPartitionId, term);
}
return expressionPartitions.keySet().stream().map(expressionPartitions::get).collect(toImmutableList());
}
use of io.trino.util.DisjointSet in project trino by trinodb.
the class EqualityInference method newInstance.
public static EqualityInference newInstance(Metadata metadata, Collection<Expression> expressions) {
DisjointSet<Expression> equalities = new DisjointSet<>();
expressions.stream().flatMap(expression -> extractConjuncts(expression).stream()).filter(expression -> isInferenceCandidate(metadata, expression)).forEach(expression -> {
ComparisonExpression comparison = (ComparisonExpression) expression;
Expression expression1 = comparison.getLeft();
Expression expression2 = comparison.getRight();
equalities.findAndUnion(expression1, expression2);
});
Collection<Set<Expression>> equivalentClasses = equalities.getEquivalentClasses();
// Map every expression to the set of equivalent expressions
Map<Expression, Set<Expression>> byExpression = new HashMap<>();
for (Set<Expression> equivalence : equivalentClasses) {
equivalence.forEach(expression -> byExpression.put(expression, equivalence));
}
// For every non-derived expression, extract the sub-expressions and see if they can be rewritten as other expressions. If so,
// use this new information to update the known equalities.
Set<Expression> derivedExpressions = new LinkedHashSet<>();
for (Expression expression : byExpression.keySet()) {
if (derivedExpressions.contains(expression)) {
continue;
}
SubExpressionExtractor.extract(expression).filter(e -> !e.equals(expression)).forEach(subExpression -> {
byExpression.getOrDefault(subExpression, ImmutableSet.of()).stream().filter(e -> !e.equals(subExpression)).forEach(equivalentSubExpression -> {
Expression rewritten = replaceExpression(expression, ImmutableMap.of(subExpression, equivalentSubExpression));
equalities.findAndUnion(expression, rewritten);
derivedExpressions.add(rewritten);
});
});
}
Multimap<Expression, Expression> equalitySets = makeEqualitySets(equalities);
ImmutableMap.Builder<Expression, Expression> canonicalMappings = ImmutableMap.builder();
for (Map.Entry<Expression, Expression> entry : equalitySets.entries()) {
Expression canonical = entry.getKey();
Expression expression = entry.getValue();
canonicalMappings.put(expression, canonical);
}
return new EqualityInference(equalitySets, canonicalMappings.buildOrThrow(), derivedExpressions);
}
Aggregations