use of org.voltdb.expressions.AggregateExpression in project voltdb by VoltDB.
the class AbstractParsedStmt method replaceExpressionsWithPve.
/**
* Helper method to replace all TVEs and aggregated expressions with the corresponding PVEs.
* The original expressions are placed into the map to be propagated to the EE.
* The key to the map is the parameter index.
*
*
* @param stmt - subquery statement
* @param expr - expression with parent TVEs
* @return Expression with parent TVE replaced with PVE
*/
protected AbstractExpression replaceExpressionsWithPve(AbstractExpression expr) {
assert (expr != null);
if (expr instanceof TupleValueExpression) {
int paramIdx = NEXT_PARAMETER_ID++;
ParameterValueExpression pve = new ParameterValueExpression(paramIdx, expr);
m_parameterTveMap.put(paramIdx, expr);
return pve;
}
if (expr instanceof AggregateExpression) {
int paramIdx = NEXT_PARAMETER_ID++;
ParameterValueExpression pve = new ParameterValueExpression(paramIdx, expr);
// Disallow aggregation of parent columns in a subquery.
// except the case HAVING AGG(T1.C1) IN (SELECT T2.C2 ...)
List<TupleValueExpression> tves = ExpressionUtil.getTupleValueExpressions(expr);
assert (m_parentStmt != null);
for (TupleValueExpression tve : tves) {
int origId = tve.getOrigStmtId();
if (m_stmtId != origId && m_parentStmt.m_stmtId != origId) {
throw new PlanningErrorException("Subqueries do not support aggregation of parent statement columns");
}
}
m_parameterTveMap.put(paramIdx, expr);
return pve;
}
if (expr.getLeft() != null) {
expr.setLeft(replaceExpressionsWithPve(expr.getLeft()));
}
if (expr.getRight() != null) {
expr.setRight(replaceExpressionsWithPve(expr.getRight()));
}
if (expr.getArgs() != null) {
List<AbstractExpression> newArgs = new ArrayList<>();
for (AbstractExpression argument : expr.getArgs()) {
newArgs.add(replaceExpressionsWithPve(argument));
}
expr.setArgs(newArgs);
}
return expr;
}
use of org.voltdb.expressions.AggregateExpression in project voltdb by VoltDB.
the class PlanAssembler method handleAggregationOperators.
private AbstractPlanNode handleAggregationOperators(AbstractPlanNode root) {
/*
* "Select A from T group by A" is grouped but has no aggregate operator
* expressions. Catch that case by checking the grouped flag
*/
if (m_parsedSelect.hasAggregateOrGroupby()) {
AggregatePlanNode aggNode = null;
// i.e., on the coordinator
AggregatePlanNode topAggNode = null;
IndexGroupByInfo gbInfo = new IndexGroupByInfo();
if (root instanceof AbstractReceivePlanNode) {
// for distinct that does not group by partition column
if (!m_parsedSelect.hasAggregateDistinct() || m_parsedSelect.hasPartitionColumnInGroupby()) {
AbstractPlanNode candidate = root.getChild(0).getChild(0);
gbInfo.m_multiPartition = true;
switchToIndexScanForGroupBy(candidate, gbInfo);
}
} else if (switchToIndexScanForGroupBy(root, gbInfo)) {
root = gbInfo.m_indexAccess;
}
boolean needHashAgg = gbInfo.needHashAggregator(root, m_parsedSelect);
// Construct the aggregate nodes
if (needHashAgg) {
if (m_parsedSelect.m_mvFixInfo.needed()) {
// TODO: may optimize this edge case in future
aggNode = new HashAggregatePlanNode();
} else {
if (gbInfo.isChangedToSerialAggregate()) {
assert (root instanceof ReceivePlanNode);
aggNode = new AggregatePlanNode();
} else if (gbInfo.isChangedToPartialAggregate()) {
aggNode = new PartialAggregatePlanNode(gbInfo.m_coveredGroupByColumns);
} else {
aggNode = new HashAggregatePlanNode();
}
topAggNode = new HashAggregatePlanNode();
}
} else {
aggNode = new AggregatePlanNode();
if (!m_parsedSelect.m_mvFixInfo.needed()) {
topAggNode = new AggregatePlanNode();
}
}
NodeSchema agg_schema = new NodeSchema();
NodeSchema top_agg_schema = new NodeSchema();
for (int outputColumnIndex = 0; outputColumnIndex < m_parsedSelect.m_aggResultColumns.size(); outputColumnIndex += 1) {
ParsedColInfo col = m_parsedSelect.m_aggResultColumns.get(outputColumnIndex);
AbstractExpression rootExpr = col.expression;
AbstractExpression agg_input_expr = null;
SchemaColumn schema_col = null;
SchemaColumn top_schema_col = null;
if (rootExpr instanceof AggregateExpression) {
ExpressionType agg_expression_type = rootExpr.getExpressionType();
agg_input_expr = rootExpr.getLeft();
// A bit of a hack: ProjectionNodes after the
// aggregate node need the output columns here to
// contain TupleValueExpressions (effectively on a temp table).
// So we construct one based on the output of the
// aggregate expression, the column alias provided by HSQL,
// and the offset into the output table schema for the
// aggregate node that we're computing.
// Oh, oh, it's magic, you know..
TupleValueExpression tve = new TupleValueExpression(AbstractParsedStmt.TEMP_TABLE_NAME, AbstractParsedStmt.TEMP_TABLE_NAME, "", col.alias, rootExpr, outputColumnIndex);
tve.setDifferentiator(col.differentiator);
boolean is_distinct = ((AggregateExpression) rootExpr).isDistinct();
aggNode.addAggregate(agg_expression_type, is_distinct, outputColumnIndex, agg_input_expr);
schema_col = new SchemaColumn(AbstractParsedStmt.TEMP_TABLE_NAME, AbstractParsedStmt.TEMP_TABLE_NAME, "", col.alias, tve, outputColumnIndex);
top_schema_col = new SchemaColumn(AbstractParsedStmt.TEMP_TABLE_NAME, AbstractParsedStmt.TEMP_TABLE_NAME, "", col.alias, tve, outputColumnIndex);
/*
* Special case count(*), count(), sum(), min() and max() to
* push them down to each partition. It will do the
* push-down if the select columns only contains the listed
* aggregate operators and other group-by columns. If the
* select columns includes any other aggregates, it will not
* do the push-down. - nshi
*/
if (topAggNode != null) {
ExpressionType top_expression_type = agg_expression_type;
/*
* For count(*), count() and sum(), the pushed-down
* aggregate node doesn't change. An extra sum()
* aggregate node is added to the coordinator to sum up
* the numbers from all the partitions. The input schema
* and the output schema of the sum() aggregate node is
* the same as the output schema of the push-down
* aggregate node.
*
* If DISTINCT is specified, don't do push-down for
* count() and sum() when not group by partition column.
* An exception is the aggregation arguments are the
* partition column (ENG-4980).
*/
if (agg_expression_type == ExpressionType.AGGREGATE_COUNT_STAR || agg_expression_type == ExpressionType.AGGREGATE_COUNT || agg_expression_type == ExpressionType.AGGREGATE_SUM) {
if (is_distinct && !(m_parsedSelect.hasPartitionColumnInGroupby() || canPushDownDistinctAggregation((AggregateExpression) rootExpr))) {
topAggNode = null;
} else {
// for aggregate distinct when group by
// partition column, the top aggregate node
// will be dropped later, thus there is no
// effect to assign the top_expression_type.
top_expression_type = ExpressionType.AGGREGATE_SUM;
}
} else /*
* For min() and max(), the pushed-down aggregate node
* doesn't change. An extra aggregate node of the same
* type is added to the coordinator. The input schema
* and the output schema of the top aggregate node is
* the same as the output schema of the pushed-down
* aggregate node.
*
* APPROX_COUNT_DISTINCT can be similarly pushed down, but
* must be split into two different functions, which is
* done later, from pushDownAggregate().
*/
if (agg_expression_type != ExpressionType.AGGREGATE_MIN && agg_expression_type != ExpressionType.AGGREGATE_MAX && agg_expression_type != ExpressionType.AGGREGATE_APPROX_COUNT_DISTINCT) {
/*
* Unsupported aggregate for push-down (AVG for example).
*/
topAggNode = null;
}
if (topAggNode != null) {
/*
* Input column of the top aggregate node is the
* output column of the push-down aggregate node
*/
boolean topDistinctFalse = false;
topAggNode.addAggregate(top_expression_type, topDistinctFalse, outputColumnIndex, tve);
}
}
// end if we have a top agg node
} else {
// has already been broken down.
assert (!rootExpr.hasAnySubexpressionOfClass(AggregateExpression.class));
/*
* These columns are the pass through columns that are not being
* aggregated on. These are the ones from the SELECT list. They
* MUST already exist in the child node's output. Find them and
* add them to the aggregate's output.
*/
schema_col = new SchemaColumn(col.tableName, col.tableAlias, col.columnName, col.alias, col.expression, outputColumnIndex);
AbstractExpression topExpr = null;
if (col.groupBy) {
topExpr = m_parsedSelect.m_groupByExpressions.get(col.alias);
} else {
topExpr = col.expression;
}
top_schema_col = new SchemaColumn(col.tableName, col.tableAlias, col.columnName, col.alias, topExpr, outputColumnIndex);
}
agg_schema.addColumn(schema_col);
top_agg_schema.addColumn(top_schema_col);
}
for (ParsedColInfo col : m_parsedSelect.groupByColumns()) {
aggNode.addGroupByExpression(col.expression);
if (topAggNode != null) {
topAggNode.addGroupByExpression(m_parsedSelect.m_groupByExpressions.get(col.alias));
}
}
aggNode.setOutputSchema(agg_schema);
if (topAggNode != null) {
if (m_parsedSelect.hasComplexGroupby()) {
topAggNode.setOutputSchema(top_agg_schema);
} else {
topAggNode.setOutputSchema(agg_schema);
}
}
// Never push down aggregation for MV fix case.
root = pushDownAggregate(root, aggNode, topAggNode, m_parsedSelect);
}
return handleDistinctWithGroupby(root);
}
use of org.voltdb.expressions.AggregateExpression in project voltdb by VoltDB.
the class AggregatePlanNode method updateAggregate.
public void updateAggregate(int index, ExpressionType aggType) {
// Create a new aggregate expression which we'll use to update the
// output schema (whose exprs are TVEs).
AggregateExpression aggExpr = new AggregateExpression(aggType);
aggExpr.finalizeValueTypes();
int outputSchemaIndex = m_aggregateOutputColumns.get(index);
SchemaColumn schemaCol = m_outputSchema.getColumns().get(outputSchemaIndex);
AbstractExpression schemaExpr = schemaCol.getExpression();
schemaExpr.setValueType(aggExpr.getValueType());
schemaExpr.setValueSize(aggExpr.getValueSize());
m_aggregateTypes.set(index, aggType);
}
use of org.voltdb.expressions.AggregateExpression in project voltdb by VoltDB.
the class MaterializedViewFixInfo method edgeCaseQueryNoFixNeeded.
/** ENG-5386: do not fix some cases in order to get better performance.
* There is a special edge case when certain queries are applied to
* partitioned materialized views that do not contain the partition key in
* their GROUP BY columns. In this special case, where the query duplicates
* the reaggregation behavior of the fix -- which must consist of MIN, MAX
* and/or non-distinct SUM reaggregations -- the added-cost fix code can be
* skipped as an optimization.
*/
private boolean edgeCaseQueryNoFixNeeded(Set<String> mvDDLGroupbyColumnNames, Map<String, ExpressionType> mvColumnAggType, List<ParsedColInfo> displayColumns, List<ParsedColInfo> groupByColumns) {
// Condition (1): Group by columns must be part of or all from MV DDL group by TVEs.
for (ParsedColInfo gcol : groupByColumns) {
assert (gcol.expression instanceof TupleValueExpression);
TupleValueExpression tve = (TupleValueExpression) gcol.expression;
if (tve.getTableName().equals(getMVTableName()) && !mvDDLGroupbyColumnNames.contains(tve.getColumnName())) {
return false;
}
}
// Condition (2): All the aggregations must qualify.
for (ParsedColInfo dcol : displayColumns) {
if (groupByColumns.contains(dcol)) {
// Skip a group-by column pass-through.
continue;
}
if (dcol.expression instanceof AggregateExpression == false) {
return false;
}
AggregateExpression aggExpr = (AggregateExpression) dcol.expression;
if (aggExpr.getLeft() instanceof TupleValueExpression == false) {
return false;
}
ExpressionType type = aggExpr.getExpressionType();
// can tolerate a skipped reaggregation.
if ((type != ExpressionType.AGGREGATE_SUM || aggExpr.isDistinct()) && type != ExpressionType.AGGREGATE_MIN && type != ExpressionType.AGGREGATE_MAX) {
return false;
}
TupleValueExpression tve = (TupleValueExpression) aggExpr.getLeft();
if (tve.getTableName().equals(getMVTableName())) {
String columnName = tve.getColumnName();
// SUMming a SUM, MINning a MIN, or MAXxing a MAX.
if (mvColumnAggType.get(columnName) != type) {
return false;
}
} else {
// The duplication would corrupt a SUM.
if (type == ExpressionType.AGGREGATE_SUM) {
return false;
}
}
}
// Edge case query can be optimized with correct answer without MV reAggregation fix.
return true;
}
use of org.voltdb.expressions.AggregateExpression in project voltdb by VoltDB.
the class ParsedSelectStmt method insertAggExpressionsToAggResultColumns.
/**
* ParseDisplayColumns and ParseOrderColumns will call this function
* to add Aggregation expressions to aggResultColumns
* @param aggColumns
* @param cookedCol
*/
private void insertAggExpressionsToAggResultColumns(List<AbstractExpression> aggColumns, ParsedColInfo cookedCol) {
for (AbstractExpression expr : aggColumns) {
assert (expr instanceof AggregateExpression);
if (expr.hasSubquerySubexpression()) {
throw new PlanningErrorException("SQL Aggregate function calls with subquery expression arguments are not allowed.");
}
ParsedColInfo col = new ParsedColInfo();
col.expression = expr.clone();
assert (col.expression instanceof AggregateExpression);
if (col.expression.getExpressionType() == ExpressionType.AGGREGATE_AVG) {
m_hasAverage = true;
}
if (aggColumns.size() == 1 && cookedCol.expression.equals(aggColumns.get(0))) {
col.alias = cookedCol.alias;
col.tableName = cookedCol.tableName;
col.tableAlias = cookedCol.tableAlias;
col.columnName = cookedCol.columnName;
if (!m_aggResultColumns.contains(col)) {
m_aggResultColumns.add(col);
}
return;
}
// Try to check complexAggs earlier
m_hasComplexAgg = true;
// Aggregation column use the the hacky stuff
col.tableName = TEMP_TABLE_NAME;
col.tableAlias = TEMP_TABLE_NAME;
col.columnName = "";
if (!m_aggResultColumns.contains(col)) {
m_aggResultColumns.add(col);
}
ExpressionUtil.finalizeValueTypes(col.expression);
}
}
Aggregations