Search in sources :

Example 1 with AggregateExpression

use of org.voltdb.expressions.AggregateExpression in project voltdb by VoltDB.

the class AbstractParsedStmt method replaceExpressionsWithPve.

/**
     * Helper method to replace all TVEs and aggregated expressions with the corresponding PVEs.
     * The original expressions are placed into the map to be propagated to the EE.
     * The key to the map is the parameter index.
     *
     *
     * @param stmt - subquery statement
     * @param expr - expression with parent TVEs
     * @return Expression with parent TVE replaced with PVE
     */
protected AbstractExpression replaceExpressionsWithPve(AbstractExpression expr) {
    assert (expr != null);
    if (expr instanceof TupleValueExpression) {
        int paramIdx = NEXT_PARAMETER_ID++;
        ParameterValueExpression pve = new ParameterValueExpression(paramIdx, expr);
        m_parameterTveMap.put(paramIdx, expr);
        return pve;
    }
    if (expr instanceof AggregateExpression) {
        int paramIdx = NEXT_PARAMETER_ID++;
        ParameterValueExpression pve = new ParameterValueExpression(paramIdx, expr);
        // Disallow aggregation of parent columns in a subquery.
        // except the case HAVING AGG(T1.C1) IN (SELECT T2.C2 ...)
        List<TupleValueExpression> tves = ExpressionUtil.getTupleValueExpressions(expr);
        assert (m_parentStmt != null);
        for (TupleValueExpression tve : tves) {
            int origId = tve.getOrigStmtId();
            if (m_stmtId != origId && m_parentStmt.m_stmtId != origId) {
                throw new PlanningErrorException("Subqueries do not support aggregation of parent statement columns");
            }
        }
        m_parameterTveMap.put(paramIdx, expr);
        return pve;
    }
    if (expr.getLeft() != null) {
        expr.setLeft(replaceExpressionsWithPve(expr.getLeft()));
    }
    if (expr.getRight() != null) {
        expr.setRight(replaceExpressionsWithPve(expr.getRight()));
    }
    if (expr.getArgs() != null) {
        List<AbstractExpression> newArgs = new ArrayList<>();
        for (AbstractExpression argument : expr.getArgs()) {
            newArgs.add(replaceExpressionsWithPve(argument));
        }
        expr.setArgs(newArgs);
    }
    return expr;
}
Also used : TupleValueExpression(org.voltdb.expressions.TupleValueExpression) AbstractExpression(org.voltdb.expressions.AbstractExpression) ArrayList(java.util.ArrayList) ParameterValueExpression(org.voltdb.expressions.ParameterValueExpression) AggregateExpression(org.voltdb.expressions.AggregateExpression) Constraint(org.voltdb.catalog.Constraint)

Example 2 with AggregateExpression

use of org.voltdb.expressions.AggregateExpression in project voltdb by VoltDB.

the class PlanAssembler method handleAggregationOperators.

private AbstractPlanNode handleAggregationOperators(AbstractPlanNode root) {
    /*
         * "Select A from T group by A" is grouped but has no aggregate operator
         * expressions. Catch that case by checking the grouped flag
         */
    if (m_parsedSelect.hasAggregateOrGroupby()) {
        AggregatePlanNode aggNode = null;
        // i.e., on the coordinator
        AggregatePlanNode topAggNode = null;
        IndexGroupByInfo gbInfo = new IndexGroupByInfo();
        if (root instanceof AbstractReceivePlanNode) {
            // for distinct that does not group by partition column
            if (!m_parsedSelect.hasAggregateDistinct() || m_parsedSelect.hasPartitionColumnInGroupby()) {
                AbstractPlanNode candidate = root.getChild(0).getChild(0);
                gbInfo.m_multiPartition = true;
                switchToIndexScanForGroupBy(candidate, gbInfo);
            }
        } else if (switchToIndexScanForGroupBy(root, gbInfo)) {
            root = gbInfo.m_indexAccess;
        }
        boolean needHashAgg = gbInfo.needHashAggregator(root, m_parsedSelect);
        // Construct the aggregate nodes
        if (needHashAgg) {
            if (m_parsedSelect.m_mvFixInfo.needed()) {
                // TODO: may optimize this edge case in future
                aggNode = new HashAggregatePlanNode();
            } else {
                if (gbInfo.isChangedToSerialAggregate()) {
                    assert (root instanceof ReceivePlanNode);
                    aggNode = new AggregatePlanNode();
                } else if (gbInfo.isChangedToPartialAggregate()) {
                    aggNode = new PartialAggregatePlanNode(gbInfo.m_coveredGroupByColumns);
                } else {
                    aggNode = new HashAggregatePlanNode();
                }
                topAggNode = new HashAggregatePlanNode();
            }
        } else {
            aggNode = new AggregatePlanNode();
            if (!m_parsedSelect.m_mvFixInfo.needed()) {
                topAggNode = new AggregatePlanNode();
            }
        }
        NodeSchema agg_schema = new NodeSchema();
        NodeSchema top_agg_schema = new NodeSchema();
        for (int outputColumnIndex = 0; outputColumnIndex < m_parsedSelect.m_aggResultColumns.size(); outputColumnIndex += 1) {
            ParsedColInfo col = m_parsedSelect.m_aggResultColumns.get(outputColumnIndex);
            AbstractExpression rootExpr = col.expression;
            AbstractExpression agg_input_expr = null;
            SchemaColumn schema_col = null;
            SchemaColumn top_schema_col = null;
            if (rootExpr instanceof AggregateExpression) {
                ExpressionType agg_expression_type = rootExpr.getExpressionType();
                agg_input_expr = rootExpr.getLeft();
                // A bit of a hack: ProjectionNodes after the
                // aggregate node need the output columns here to
                // contain TupleValueExpressions (effectively on a temp table).
                // So we construct one based on the output of the
                // aggregate expression, the column alias provided by HSQL,
                // and the offset into the output table schema for the
                // aggregate node that we're computing.
                // Oh, oh, it's magic, you know..
                TupleValueExpression tve = new TupleValueExpression(AbstractParsedStmt.TEMP_TABLE_NAME, AbstractParsedStmt.TEMP_TABLE_NAME, "", col.alias, rootExpr, outputColumnIndex);
                tve.setDifferentiator(col.differentiator);
                boolean is_distinct = ((AggregateExpression) rootExpr).isDistinct();
                aggNode.addAggregate(agg_expression_type, is_distinct, outputColumnIndex, agg_input_expr);
                schema_col = new SchemaColumn(AbstractParsedStmt.TEMP_TABLE_NAME, AbstractParsedStmt.TEMP_TABLE_NAME, "", col.alias, tve, outputColumnIndex);
                top_schema_col = new SchemaColumn(AbstractParsedStmt.TEMP_TABLE_NAME, AbstractParsedStmt.TEMP_TABLE_NAME, "", col.alias, tve, outputColumnIndex);
                /*
                     * Special case count(*), count(), sum(), min() and max() to
                     * push them down to each partition. It will do the
                     * push-down if the select columns only contains the listed
                     * aggregate operators and other group-by columns. If the
                     * select columns includes any other aggregates, it will not
                     * do the push-down. - nshi
                     */
                if (topAggNode != null) {
                    ExpressionType top_expression_type = agg_expression_type;
                    /*
                         * For count(*), count() and sum(), the pushed-down
                         * aggregate node doesn't change. An extra sum()
                         * aggregate node is added to the coordinator to sum up
                         * the numbers from all the partitions. The input schema
                         * and the output schema of the sum() aggregate node is
                         * the same as the output schema of the push-down
                         * aggregate node.
                         *
                         * If DISTINCT is specified, don't do push-down for
                         * count() and sum() when not group by partition column.
                         * An exception is the aggregation arguments are the
                         * partition column (ENG-4980).
                         */
                    if (agg_expression_type == ExpressionType.AGGREGATE_COUNT_STAR || agg_expression_type == ExpressionType.AGGREGATE_COUNT || agg_expression_type == ExpressionType.AGGREGATE_SUM) {
                        if (is_distinct && !(m_parsedSelect.hasPartitionColumnInGroupby() || canPushDownDistinctAggregation((AggregateExpression) rootExpr))) {
                            topAggNode = null;
                        } else {
                            // for aggregate distinct when group by
                            // partition column, the top aggregate node
                            // will be dropped later, thus there is no
                            // effect to assign the top_expression_type.
                            top_expression_type = ExpressionType.AGGREGATE_SUM;
                        }
                    } else /*
                         * For min() and max(), the pushed-down aggregate node
                         * doesn't change. An extra aggregate node of the same
                         * type is added to the coordinator. The input schema
                         * and the output schema of the top aggregate node is
                         * the same as the output schema of the pushed-down
                         * aggregate node.
                         *
                         * APPROX_COUNT_DISTINCT can be similarly pushed down, but
                         * must be split into two different functions, which is
                         * done later, from pushDownAggregate().
                         */
                    if (agg_expression_type != ExpressionType.AGGREGATE_MIN && agg_expression_type != ExpressionType.AGGREGATE_MAX && agg_expression_type != ExpressionType.AGGREGATE_APPROX_COUNT_DISTINCT) {
                        /*
                             * Unsupported aggregate for push-down (AVG for example).
                             */
                        topAggNode = null;
                    }
                    if (topAggNode != null) {
                        /*
                             * Input column of the top aggregate node is the
                             * output column of the push-down aggregate node
                             */
                        boolean topDistinctFalse = false;
                        topAggNode.addAggregate(top_expression_type, topDistinctFalse, outputColumnIndex, tve);
                    }
                }
            // end if we have a top agg node
            } else {
                // has already been broken down.
                assert (!rootExpr.hasAnySubexpressionOfClass(AggregateExpression.class));
                /*
                     * These columns are the pass through columns that are not being
                     * aggregated on. These are the ones from the SELECT list. They
                     * MUST already exist in the child node's output. Find them and
                     * add them to the aggregate's output.
                     */
                schema_col = new SchemaColumn(col.tableName, col.tableAlias, col.columnName, col.alias, col.expression, outputColumnIndex);
                AbstractExpression topExpr = null;
                if (col.groupBy) {
                    topExpr = m_parsedSelect.m_groupByExpressions.get(col.alias);
                } else {
                    topExpr = col.expression;
                }
                top_schema_col = new SchemaColumn(col.tableName, col.tableAlias, col.columnName, col.alias, topExpr, outputColumnIndex);
            }
            agg_schema.addColumn(schema_col);
            top_agg_schema.addColumn(top_schema_col);
        }
        for (ParsedColInfo col : m_parsedSelect.groupByColumns()) {
            aggNode.addGroupByExpression(col.expression);
            if (topAggNode != null) {
                topAggNode.addGroupByExpression(m_parsedSelect.m_groupByExpressions.get(col.alias));
            }
        }
        aggNode.setOutputSchema(agg_schema);
        if (topAggNode != null) {
            if (m_parsedSelect.hasComplexGroupby()) {
                topAggNode.setOutputSchema(top_agg_schema);
            } else {
                topAggNode.setOutputSchema(agg_schema);
            }
        }
        // Never push down aggregation for MV fix case.
        root = pushDownAggregate(root, aggNode, topAggNode, m_parsedSelect);
    }
    return handleDistinctWithGroupby(root);
}
Also used : AbstractPlanNode(org.voltdb.plannodes.AbstractPlanNode) TupleValueExpression(org.voltdb.expressions.TupleValueExpression) AbstractReceivePlanNode(org.voltdb.plannodes.AbstractReceivePlanNode) HashAggregatePlanNode(org.voltdb.plannodes.HashAggregatePlanNode) AggregatePlanNode(org.voltdb.plannodes.AggregatePlanNode) PartialAggregatePlanNode(org.voltdb.plannodes.PartialAggregatePlanNode) AbstractReceivePlanNode(org.voltdb.plannodes.AbstractReceivePlanNode) MergeReceivePlanNode(org.voltdb.plannodes.MergeReceivePlanNode) ReceivePlanNode(org.voltdb.plannodes.ReceivePlanNode) PartialAggregatePlanNode(org.voltdb.plannodes.PartialAggregatePlanNode) HashAggregatePlanNode(org.voltdb.plannodes.HashAggregatePlanNode) SchemaColumn(org.voltdb.plannodes.SchemaColumn) AggregateExpression(org.voltdb.expressions.AggregateExpression) Constraint(org.voltdb.catalog.Constraint) AbstractExpression(org.voltdb.expressions.AbstractExpression) ExpressionType(org.voltdb.types.ExpressionType) NodeSchema(org.voltdb.plannodes.NodeSchema)

Example 3 with AggregateExpression

use of org.voltdb.expressions.AggregateExpression in project voltdb by VoltDB.

the class AggregatePlanNode method updateAggregate.

public void updateAggregate(int index, ExpressionType aggType) {
    // Create a new aggregate expression which we'll use to update the
    // output schema (whose exprs are TVEs).
    AggregateExpression aggExpr = new AggregateExpression(aggType);
    aggExpr.finalizeValueTypes();
    int outputSchemaIndex = m_aggregateOutputColumns.get(index);
    SchemaColumn schemaCol = m_outputSchema.getColumns().get(outputSchemaIndex);
    AbstractExpression schemaExpr = schemaCol.getExpression();
    schemaExpr.setValueType(aggExpr.getValueType());
    schemaExpr.setValueSize(aggExpr.getValueSize());
    m_aggregateTypes.set(index, aggType);
}
Also used : AbstractExpression(org.voltdb.expressions.AbstractExpression) AggregateExpression(org.voltdb.expressions.AggregateExpression)

Example 4 with AggregateExpression

use of org.voltdb.expressions.AggregateExpression in project voltdb by VoltDB.

the class MaterializedViewFixInfo method edgeCaseQueryNoFixNeeded.

/** ENG-5386: do not fix some cases in order to get better performance.
     * There is a special edge case when certain queries are applied to
     * partitioned materialized views that do not contain the partition key in
     * their GROUP BY columns. In this special case, where the query duplicates
     * the reaggregation behavior of the fix -- which must consist of MIN, MAX
     * and/or non-distinct SUM reaggregations -- the added-cost fix code can be
     * skipped as an optimization.
     */
private boolean edgeCaseQueryNoFixNeeded(Set<String> mvDDLGroupbyColumnNames, Map<String, ExpressionType> mvColumnAggType, List<ParsedColInfo> displayColumns, List<ParsedColInfo> groupByColumns) {
    // Condition (1): Group by columns must be part of or all from MV DDL group by TVEs.
    for (ParsedColInfo gcol : groupByColumns) {
        assert (gcol.expression instanceof TupleValueExpression);
        TupleValueExpression tve = (TupleValueExpression) gcol.expression;
        if (tve.getTableName().equals(getMVTableName()) && !mvDDLGroupbyColumnNames.contains(tve.getColumnName())) {
            return false;
        }
    }
    // Condition (2): All the aggregations must qualify.
    for (ParsedColInfo dcol : displayColumns) {
        if (groupByColumns.contains(dcol)) {
            // Skip a group-by column pass-through.
            continue;
        }
        if (dcol.expression instanceof AggregateExpression == false) {
            return false;
        }
        AggregateExpression aggExpr = (AggregateExpression) dcol.expression;
        if (aggExpr.getLeft() instanceof TupleValueExpression == false) {
            return false;
        }
        ExpressionType type = aggExpr.getExpressionType();
        // can tolerate a skipped reaggregation.
        if ((type != ExpressionType.AGGREGATE_SUM || aggExpr.isDistinct()) && type != ExpressionType.AGGREGATE_MIN && type != ExpressionType.AGGREGATE_MAX) {
            return false;
        }
        TupleValueExpression tve = (TupleValueExpression) aggExpr.getLeft();
        if (tve.getTableName().equals(getMVTableName())) {
            String columnName = tve.getColumnName();
            // SUMming a SUM, MINning a MIN, or MAXxing a MAX.
            if (mvColumnAggType.get(columnName) != type) {
                return false;
            }
        } else {
            // The duplication would corrupt a SUM.
            if (type == ExpressionType.AGGREGATE_SUM) {
                return false;
            }
        }
    }
    // Edge case query can be optimized with correct answer without MV reAggregation fix.
    return true;
}
Also used : TupleValueExpression(org.voltdb.expressions.TupleValueExpression) AggregateExpression(org.voltdb.expressions.AggregateExpression) ExpressionType(org.voltdb.types.ExpressionType)

Example 5 with AggregateExpression

use of org.voltdb.expressions.AggregateExpression in project voltdb by VoltDB.

the class ParsedSelectStmt method insertAggExpressionsToAggResultColumns.

/**
     * ParseDisplayColumns and ParseOrderColumns will call this function
     * to add Aggregation expressions to aggResultColumns
     * @param aggColumns
     * @param cookedCol
     */
private void insertAggExpressionsToAggResultColumns(List<AbstractExpression> aggColumns, ParsedColInfo cookedCol) {
    for (AbstractExpression expr : aggColumns) {
        assert (expr instanceof AggregateExpression);
        if (expr.hasSubquerySubexpression()) {
            throw new PlanningErrorException("SQL Aggregate function calls with subquery expression arguments are not allowed.");
        }
        ParsedColInfo col = new ParsedColInfo();
        col.expression = expr.clone();
        assert (col.expression instanceof AggregateExpression);
        if (col.expression.getExpressionType() == ExpressionType.AGGREGATE_AVG) {
            m_hasAverage = true;
        }
        if (aggColumns.size() == 1 && cookedCol.expression.equals(aggColumns.get(0))) {
            col.alias = cookedCol.alias;
            col.tableName = cookedCol.tableName;
            col.tableAlias = cookedCol.tableAlias;
            col.columnName = cookedCol.columnName;
            if (!m_aggResultColumns.contains(col)) {
                m_aggResultColumns.add(col);
            }
            return;
        }
        // Try to check complexAggs earlier
        m_hasComplexAgg = true;
        // Aggregation column use the the hacky stuff
        col.tableName = TEMP_TABLE_NAME;
        col.tableAlias = TEMP_TABLE_NAME;
        col.columnName = "";
        if (!m_aggResultColumns.contains(col)) {
            m_aggResultColumns.add(col);
        }
        ExpressionUtil.finalizeValueTypes(col.expression);
    }
}
Also used : AbstractExpression(org.voltdb.expressions.AbstractExpression) AggregateExpression(org.voltdb.expressions.AggregateExpression)

Aggregations

AggregateExpression (org.voltdb.expressions.AggregateExpression)9 AbstractExpression (org.voltdb.expressions.AbstractExpression)8 TupleValueExpression (org.voltdb.expressions.TupleValueExpression)3 ExpressionType (org.voltdb.types.ExpressionType)3 ArrayList (java.util.ArrayList)2 Constraint (org.voltdb.catalog.Constraint)2 VoltXMLElement (org.hsqldb_voltpatches.VoltXMLElement)1 VoltType (org.voltdb.VoltType)1 ConstantValueExpression (org.voltdb.expressions.ConstantValueExpression)1 OperatorExpression (org.voltdb.expressions.OperatorExpression)1 ParameterValueExpression (org.voltdb.expressions.ParameterValueExpression)1 AbstractPlanNode (org.voltdb.plannodes.AbstractPlanNode)1 AbstractReceivePlanNode (org.voltdb.plannodes.AbstractReceivePlanNode)1 AggregatePlanNode (org.voltdb.plannodes.AggregatePlanNode)1 HashAggregatePlanNode (org.voltdb.plannodes.HashAggregatePlanNode)1 MergeReceivePlanNode (org.voltdb.plannodes.MergeReceivePlanNode)1 NodeSchema (org.voltdb.plannodes.NodeSchema)1 PartialAggregatePlanNode (org.voltdb.plannodes.PartialAggregatePlanNode)1 ReceivePlanNode (org.voltdb.plannodes.ReceivePlanNode)1 SchemaColumn (org.voltdb.plannodes.SchemaColumn)1