use of io.prestosql.sql.tree.ComparisonExpression in project hetu-core by openlookeng.
the class TestComparisonStatsCalculator method symbolToCastExpressionNotEqual.
@Test
public void symbolToCastExpressionNotEqual() {
double rowCount = 807.3;
assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("u"), new Cast(new SymbolReference("w"), BIGINT))).outputRowsCount(rowCount).symbolStats("u", equalTo(capNDV(zeroNullsFraction(uStats), rowCount))).symbolStats("w", equalTo(capNDV(wStats, rowCount))).symbolStats("z", equalTo(capNDV(zStats, rowCount)));
rowCount = 897.0;
assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("u"), new Cast(new LongLiteral("10"), BIGINT))).outputRowsCount(rowCount).symbolStats("u", equalTo(capNDV(updateNDV(zeroNullsFraction(uStats), -1), rowCount))).symbolStats("z", equalTo(capNDV(zStats, rowCount)));
}
use of io.prestosql.sql.tree.ComparisonExpression in project hetu-core by openlookeng.
the class TestComparisonStatsCalculator method symbolToLiteralGreaterThanStats.
@Test
public void symbolToLiteralGreaterThanStats() {
// Simple case
assertCalculate(new ComparisonExpression(GREATER_THAN, new SymbolReference("y"), new DoubleLiteral("2.5"))).outputRowsCount(// all rows minus nulls times range coverage (50%)
250.0).symbolStats("y", symbolAssert -> {
symbolAssert.averageRowSize(4.0).distinctValuesCount(10.0).lowValue(2.5).highValue(5.0).nullsFraction(0.0);
});
// Literal on the edge of symbol range (whole range included)
assertCalculate(new ComparisonExpression(GREATER_THAN, new SymbolReference("x"), new DoubleLiteral("-10.0"))).outputRowsCount(// all rows minus nulls times range coverage (100%)
750.0).symbolStats("x", symbolAssert -> {
symbolAssert.averageRowSize(4.0).distinctValuesCount(40.0).lowValue(-10.0).highValue(10.0).nullsFraction(0.0);
});
// Literal on the edge of symbol range (whole range excluded)
assertCalculate(new ComparisonExpression(GREATER_THAN, new SymbolReference("x"), new DoubleLiteral("10.0"))).outputRowsCount(// all rows minus nulls divided by NDV (one value from edge is included as approximation)
18.75).symbolStats("x", symbolAssert -> {
symbolAssert.averageRowSize(4.0).distinctValuesCount(1.0).lowValue(10.0).highValue(10.0).nullsFraction(0.0);
});
// Literal range out of symbol range
assertCalculate(new ComparisonExpression(GREATER_THAN, new SymbolReference("y"), new DoubleLiteral("10.0"))).outputRowsCount(// all rows minus nulls times range coverage (0%)
0.0).symbolStats("y", symbolAssert -> {
symbolAssert.averageRowSize(0.0).distinctValuesCount(0.0).emptyRange().nullsFraction(1.0);
});
// Literal in left open range
assertCalculate(new ComparisonExpression(GREATER_THAN, new SymbolReference("leftOpen"), new DoubleLiteral("0.0"))).outputRowsCount(// all rows minus nulls times range coverage (25% - heuristic)
225.0).symbolStats("leftOpen", symbolAssert -> {
symbolAssert.averageRowSize(4.0).distinctValuesCount(// (25% heuristic)
12.5).lowValue(0.0).highValue(15.0).nullsFraction(0.0);
});
// Literal in right open range
assertCalculate(new ComparisonExpression(GREATER_THAN, new SymbolReference("rightOpen"), new DoubleLiteral("0.0"))).outputRowsCount(// all rows minus nulls times range coverage (50% - heuristic)
450.0).symbolStats("rightOpen", symbolAssert -> {
symbolAssert.averageRowSize(4.0).distinctValuesCount(// (50% heuristic)
25.0).lowValue(0.0).highValueUnknown().nullsFraction(0.0);
});
// Literal in unknown range
assertCalculate(new ComparisonExpression(GREATER_THAN, new SymbolReference("unknownRange"), new DoubleLiteral("0.0"))).outputRowsCount(// all rows minus nulls times range coverage (50% - heuristic)
450.0).symbolStats("unknownRange", symbolAssert -> {
symbolAssert.averageRowSize(4.0).distinctValuesCount(// (50% heuristic)
25.0).lowValue(0.0).highValueUnknown().nullsFraction(0.0);
});
// Literal in empty range
assertCalculate(new ComparisonExpression(GREATER_THAN, new SymbolReference("emptyRange"), new DoubleLiteral("0.0"))).outputRowsCount(0.0).symbolStats("emptyRange", equalTo(emptyRangeStats));
}
use of io.prestosql.sql.tree.ComparisonExpression in project hetu-core by openlookeng.
the class TestComparisonStatsCalculator method symbolToSymbolNotEqual.
@Test
public void symbolToSymbolNotEqual() {
// Equal ranges
double rowCount = 807.3;
assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("u"), new SymbolReference("w"))).outputRowsCount(rowCount).symbolStats("u", equalTo(capNDV(zeroNullsFraction(uStats), rowCount))).symbolStats("w", equalTo(capNDV(zeroNullsFraction(wStats), rowCount))).symbolStats("z", equalTo(capNDV(zStats, rowCount)));
// One symbol's range is within the other's
rowCount = 365.625;
assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("x"), new SymbolReference("y"))).outputRowsCount(rowCount).symbolStats("x", equalTo(capNDV(zeroNullsFraction(xStats), rowCount))).symbolStats("y", equalTo(capNDV(zeroNullsFraction(yStats), rowCount))).symbolStats("z", equalTo(capNDV(zStats, rowCount)));
// Partially overlapping ranges
rowCount = 658.125;
assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("x"), new SymbolReference("w"))).outputRowsCount(rowCount).symbolStats("x", equalTo(capNDV(zeroNullsFraction(xStats), rowCount))).symbolStats("w", equalTo(capNDV(zeroNullsFraction(wStats), rowCount))).symbolStats("z", equalTo(capNDV(zStats, rowCount)));
// None of the ranges is included in the other, and one symbol has much higher cardinality, so that it has bigger NDV in intersect than the other in total
rowCount = 672.75;
assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("x"), new SymbolReference("u"))).outputRowsCount(rowCount).symbolStats("x", equalTo(capNDV(zeroNullsFraction(xStats), rowCount))).symbolStats("u", equalTo(capNDV(zeroNullsFraction(uStats), rowCount))).symbolStats("z", equalTo(capNDV(zStats, rowCount)));
}
use of io.prestosql.sql.tree.ComparisonExpression in project hetu-core by openlookeng.
the class TestComparisonStatsCalculator method symbolToLiteralNotEqualStats.
@Test
public void symbolToLiteralNotEqualStats() {
// Simple case
assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("y"), new DoubleLiteral("2.5"))).outputRowsCount(// all rows minus nulls multiplied by ((distinct values - 1) / distinct values)
475.0).symbolStats("y", symbolAssert -> {
symbolAssert.averageRowSize(4.0).distinctValuesCount(19.0).lowValue(0.0).highValue(5.0).nullsFraction(0.0);
});
// Literal on the edge of symbol range
assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("x"), new DoubleLiteral("10.0"))).outputRowsCount(// all rows minus nulls multiplied by ((distinct values - 1) / distinct values)
731.25).symbolStats("x", symbolAssert -> {
symbolAssert.averageRowSize(4.0).distinctValuesCount(39.0).lowValue(-10.0).highValue(10.0).nullsFraction(0.0);
});
// Literal out of symbol range
assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("y"), new DoubleLiteral("10.0"))).outputRowsCount(// all rows minus nulls
500.0).symbolStats("y", symbolAssert -> {
symbolAssert.averageRowSize(4.0).distinctValuesCount(19.0).lowValue(0.0).highValue(5.0).nullsFraction(0.0);
});
// Literal in left open range
assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("leftOpen"), new DoubleLiteral("2.5"))).outputRowsCount(// all rows minus nulls multiplied by ((distinct values - 1) / distinct values)
882.0).symbolStats("leftOpen", symbolAssert -> {
symbolAssert.averageRowSize(4.0).distinctValuesCount(49.0).lowValueUnknown().highValue(15.0).nullsFraction(0.0);
});
// Literal in right open range
assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("rightOpen"), new DoubleLiteral("-2.5"))).outputRowsCount(// all rows minus nulls divided by distinct values count
882.0).symbolStats("rightOpen", symbolAssert -> {
symbolAssert.averageRowSize(4.0).distinctValuesCount(49.0).lowValue(-15.0).highValueUnknown().nullsFraction(0.0);
});
// Literal in unknown range
assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("unknownRange"), new DoubleLiteral("0.0"))).outputRowsCount(// all rows minus nulls divided by distinct values count
882.0).symbolStats("unknownRange", symbolAssert -> {
symbolAssert.averageRowSize(4.0).distinctValuesCount(49.0).lowValueUnknown().highValueUnknown().nullsFraction(0.0);
});
// Literal in empty range
assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("emptyRange"), new DoubleLiteral("0.0"))).outputRowsCount(0.0).symbolStats("emptyRange", equalTo(emptyRangeStats));
// Column with values not representable as double (unknown range)
assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("varchar"), new StringLiteral("blah"))).outputRowsCount(// all rows minus nulls divided by distinct values count
882.0).symbolStats("varchar", symbolAssert -> {
symbolAssert.averageRowSize(4.0).distinctValuesCount(49.0).lowValueUnknown().highValueUnknown().nullsFraction(0.0);
});
}
use of io.prestosql.sql.tree.ComparisonExpression in project hetu-core by openlookeng.
the class TransformUnCorrelatedInPredicateSubQuerySelfJoinToAggregate method transformProjectNode.
private Optional<ProjectNode> transformProjectNode(Context context, ProjectNode projectNode) {
// IN predicate requires only one projection
if (projectNode.getOutputSymbols().size() > 1) {
return Optional.empty();
}
PlanNode source = context.getLookup().resolve(projectNode.getSource());
if (source instanceof CTEScanNode) {
source = getChildFilterNode(context, context.getLookup().resolve(((CTEScanNode) source).getSource()));
}
if (!(source instanceof FilterNode && context.getLookup().resolve(((FilterNode) source).getSource()) instanceof JoinNode)) {
return Optional.empty();
}
FilterNode filter = (FilterNode) source;
Expression predicate = OriginalExpressionUtils.castToExpression(filter.getPredicate());
List<SymbolReference> allPredicateSymbols = new ArrayList<>();
getAllSymbols(predicate, allPredicateSymbols);
JoinNode joinNode = (JoinNode) context.getLookup().resolve(((FilterNode) source).getSource());
if (!isSelfJoin(projectNode, predicate, joinNode, context.getLookup())) {
// Check next level for Self Join
PlanNode left = context.getLookup().resolve(joinNode.getLeft());
boolean changed = false;
if (left instanceof ProjectNode) {
Optional<ProjectNode> transformResult = transformProjectNode(context, (ProjectNode) left);
if (transformResult.isPresent()) {
joinNode = new JoinNode(joinNode.getId(), joinNode.getType(), transformResult.get(), joinNode.getRight(), joinNode.getCriteria(), joinNode.getOutputSymbols(), joinNode.getFilter(), joinNode.getLeftHashSymbol(), joinNode.getRightHashSymbol(), joinNode.getDistributionType(), joinNode.isSpillable(), joinNode.getDynamicFilters());
changed = true;
}
}
PlanNode right = context.getLookup().resolve(joinNode.getRight());
if (right instanceof ProjectNode) {
Optional<ProjectNode> transformResult = transformProjectNode(context, (ProjectNode) right);
if (transformResult.isPresent()) {
joinNode = new JoinNode(joinNode.getId(), joinNode.getType(), joinNode.getLeft(), transformResult.get(), joinNode.getCriteria(), joinNode.getOutputSymbols(), joinNode.getFilter(), joinNode.getLeftHashSymbol(), joinNode.getRightHashSymbol(), joinNode.getDistributionType(), joinNode.isSpillable(), joinNode.getDynamicFilters());
changed = true;
}
}
if (changed) {
FilterNode transformedFilter = new FilterNode(filter.getId(), joinNode, filter.getPredicate());
ProjectNode transformedProject = new ProjectNode(projectNode.getId(), transformedFilter, projectNode.getAssignments());
return Optional.of(transformedProject);
}
return Optional.empty();
}
// Choose the table to use based on projected output.
TableScanNode leftTable = (TableScanNode) context.getLookup().resolve(joinNode.getLeft());
TableScanNode rightTable = (TableScanNode) context.getLookup().resolve(joinNode.getRight());
TableScanNode tableToUse;
List<RowExpression> aggregationSymbols;
AggregationNode.GroupingSetDescriptor groupingSetDescriptor;
Assignments projectionsForCTE = null;
Assignments projectionsFromFilter = null;
// Use non-projected column for aggregation
if (context.getLookup().resolve(projectNode.getSource()) instanceof CTEScanNode) {
CTEScanNode cteScanNode = (CTEScanNode) context.getLookup().resolve(projectNode.getSource());
ProjectNode childProjectOfCte = (ProjectNode) context.getLookup().resolve(cteScanNode.getSource());
List<Symbol> completeOutputSymbols = new ArrayList<>();
rightTable.getOutputSymbols().forEach((s) -> completeOutputSymbols.add(s));
leftTable.getOutputSymbols().forEach((s) -> completeOutputSymbols.add(s));
List<Symbol> outputSymbols = new ArrayList<>();
for (int i = 0; i < completeOutputSymbols.size(); i++) {
Symbol outputSymbol = completeOutputSymbols.get(i);
for (Symbol symbol : projectNode.getOutputSymbols()) {
if (childProjectOfCte.getAssignments().getMap().containsKey(symbol)) {
if (((SymbolReference) OriginalExpressionUtils.castToExpression(childProjectOfCte.getAssignments().getMap().get(symbol))).getName().equals(outputSymbol.getName())) {
outputSymbols.add(outputSymbol);
}
}
}
}
Map<Symbol, RowExpression> projectionsForCTEMap = new HashMap<>();
Map<Symbol, RowExpression> projectionsFromFilterMap = new HashMap<>();
for (Map.Entry entry : childProjectOfCte.getAssignments().getMap().entrySet()) {
if (entry.getKey().equals(getOnlyElement(projectNode.getOutputSymbols()))) {
projectionsForCTEMap.put((Symbol) entry.getKey(), (RowExpression) entry.getValue());
}
if (entry.getKey().equals(getOnlyElement(projectNode.getOutputSymbols()))) {
projectionsFromFilterMap.put(getOnlyElement(outputSymbols), (RowExpression) entry.getValue());
}
}
projectionsForCTE = new Assignments(projectionsForCTEMap);
projectionsFromFilter = new Assignments(projectionsFromFilterMap);
tableToUse = leftTable.getOutputSymbols().contains(getOnlyElement(outputSymbols)) ? leftTable : rightTable;
aggregationSymbols = allPredicateSymbols.stream().filter(s -> tableToUse.getOutputSymbols().contains(SymbolUtils.from(s))).filter(s -> !outputSymbols.contains(SymbolUtils.from(s))).map(OriginalExpressionUtils::castToRowExpression).collect(Collectors.toList());
// Create aggregation
groupingSetDescriptor = new AggregationNode.GroupingSetDescriptor(ImmutableList.copyOf(outputSymbols), 1, ImmutableSet.of());
} else {
tableToUse = leftTable.getOutputSymbols().contains(getOnlyElement(projectNode.getOutputSymbols())) ? leftTable : rightTable;
aggregationSymbols = allPredicateSymbols.stream().filter(s -> tableToUse.getOutputSymbols().contains(SymbolUtils.from(s))).filter(s -> !projectNode.getOutputSymbols().contains(SymbolUtils.from(s))).map(OriginalExpressionUtils::castToRowExpression).collect(Collectors.toList());
// Create aggregation
groupingSetDescriptor = new AggregationNode.GroupingSetDescriptor(ImmutableList.copyOf(projectNode.getOutputSymbols()), 1, ImmutableSet.of());
}
AggregationNode.Aggregation aggregation = new AggregationNode.Aggregation(new CallExpression("count", functionResolution.countFunction(), BIGINT, aggregationSymbols), aggregationSymbols, // mark DISTINCT since NOT_EQUALS predicate
true, Optional.empty(), Optional.empty(), Optional.empty());
ImmutableMap.Builder<Symbol, AggregationNode.Aggregation> aggregationsBuilder = ImmutableMap.builder();
Symbol countSymbol = context.getSymbolAllocator().newSymbol(aggregation.getFunctionCall().getDisplayName(), BIGINT);
aggregationsBuilder.put(countSymbol, aggregation);
AggregationNode aggregationNode = new AggregationNode(context.getIdAllocator().getNextId(), tableToUse, aggregationsBuilder.build(), groupingSetDescriptor, ImmutableList.of(), AggregationNode.Step.SINGLE, Optional.empty(), Optional.empty(), AggregationNode.AggregationType.HASH, Optional.empty());
// Filter rows with count < 1 from aggregation results to match the NOT_EQUALS clause in original query.
FilterNode filterNode = new FilterNode(context.getIdAllocator().getNextId(), aggregationNode, OriginalExpressionUtils.castToRowExpression(new ComparisonExpression(ComparisonExpression.Operator.GREATER_THAN, SymbolUtils.toSymbolReference(countSymbol), new GenericLiteral("BIGINT", "1"))));
// Project the aggregated+filtered rows.
ProjectNode transformedSubquery = new ProjectNode(projectNode.getId(), filterNode, projectNode.getAssignments());
if (context.getLookup().resolve(projectNode.getSource()) instanceof CTEScanNode) {
CTEScanNode cteScanNode = (CTEScanNode) context.getLookup().resolve(projectNode.getSource());
PlanNode projectNodeForCTE = context.getLookup().resolve(cteScanNode.getSource());
PlanNode projectNodeFromFilter = context.getLookup().resolve(projectNodeForCTE);
projectNodeFromFilter = new ProjectNode(projectNodeFromFilter.getId(), filterNode, projectionsFromFilter);
projectNodeForCTE = new ProjectNode(projectNodeForCTE.getId(), projectNodeFromFilter, projectionsForCTE);
cteScanNode = (CTEScanNode) cteScanNode.replaceChildren(ImmutableList.of(projectNodeForCTE));
cteScanNode.setOutputSymbols(projectNode.getOutputSymbols());
transformedSubquery = new ProjectNode(projectNode.getId(), cteScanNode, projectNode.getAssignments());
}
return Optional.of(transformedSubquery);
}
Aggregations