Search in sources :

Example 1 with PartialAggregatePlanNode

use of org.voltdb.plannodes.PartialAggregatePlanNode in project voltdb by VoltDB.

the class PlanAssembler method handleAggregationOperators.

private AbstractPlanNode handleAggregationOperators(AbstractPlanNode root) {
    /*
         * "Select A from T group by A" is grouped but has no aggregate operator
         * expressions. Catch that case by checking the grouped flag
         */
    if (m_parsedSelect.hasAggregateOrGroupby()) {
        AggregatePlanNode aggNode = null;
        // i.e., on the coordinator
        AggregatePlanNode topAggNode = null;
        IndexGroupByInfo gbInfo = new IndexGroupByInfo();
        if (root instanceof AbstractReceivePlanNode) {
            // for distinct that does not group by partition column
            if (!m_parsedSelect.hasAggregateDistinct() || m_parsedSelect.hasPartitionColumnInGroupby()) {
                AbstractPlanNode candidate = root.getChild(0).getChild(0);
                gbInfo.m_multiPartition = true;
                switchToIndexScanForGroupBy(candidate, gbInfo);
            }
        } else if (switchToIndexScanForGroupBy(root, gbInfo)) {
            root = gbInfo.m_indexAccess;
        }
        boolean needHashAgg = gbInfo.needHashAggregator(root, m_parsedSelect);
        // Construct the aggregate nodes
        if (needHashAgg) {
            if (m_parsedSelect.m_mvFixInfo.needed()) {
                // TODO: may optimize this edge case in future
                aggNode = new HashAggregatePlanNode();
            } else {
                if (gbInfo.isChangedToSerialAggregate()) {
                    assert (root instanceof ReceivePlanNode);
                    aggNode = new AggregatePlanNode();
                } else if (gbInfo.isChangedToPartialAggregate()) {
                    aggNode = new PartialAggregatePlanNode(gbInfo.m_coveredGroupByColumns);
                } else {
                    aggNode = new HashAggregatePlanNode();
                }
                topAggNode = new HashAggregatePlanNode();
            }
        } else {
            aggNode = new AggregatePlanNode();
            if (!m_parsedSelect.m_mvFixInfo.needed()) {
                topAggNode = new AggregatePlanNode();
            }
        }
        NodeSchema agg_schema = new NodeSchema();
        NodeSchema top_agg_schema = new NodeSchema();
        for (int outputColumnIndex = 0; outputColumnIndex < m_parsedSelect.m_aggResultColumns.size(); outputColumnIndex += 1) {
            ParsedColInfo col = m_parsedSelect.m_aggResultColumns.get(outputColumnIndex);
            AbstractExpression rootExpr = col.expression;
            AbstractExpression agg_input_expr = null;
            SchemaColumn schema_col = null;
            SchemaColumn top_schema_col = null;
            if (rootExpr instanceof AggregateExpression) {
                ExpressionType agg_expression_type = rootExpr.getExpressionType();
                agg_input_expr = rootExpr.getLeft();
                // A bit of a hack: ProjectionNodes after the
                // aggregate node need the output columns here to
                // contain TupleValueExpressions (effectively on a temp table).
                // So we construct one based on the output of the
                // aggregate expression, the column alias provided by HSQL,
                // and the offset into the output table schema for the
                // aggregate node that we're computing.
                // Oh, oh, it's magic, you know..
                TupleValueExpression tve = new TupleValueExpression(AbstractParsedStmt.TEMP_TABLE_NAME, AbstractParsedStmt.TEMP_TABLE_NAME, "", col.alias, rootExpr, outputColumnIndex);
                tve.setDifferentiator(col.differentiator);
                boolean is_distinct = ((AggregateExpression) rootExpr).isDistinct();
                aggNode.addAggregate(agg_expression_type, is_distinct, outputColumnIndex, agg_input_expr);
                schema_col = new SchemaColumn(AbstractParsedStmt.TEMP_TABLE_NAME, AbstractParsedStmt.TEMP_TABLE_NAME, "", col.alias, tve, outputColumnIndex);
                top_schema_col = new SchemaColumn(AbstractParsedStmt.TEMP_TABLE_NAME, AbstractParsedStmt.TEMP_TABLE_NAME, "", col.alias, tve, outputColumnIndex);
                /*
                     * Special case count(*), count(), sum(), min() and max() to
                     * push them down to each partition. It will do the
                     * push-down if the select columns only contains the listed
                     * aggregate operators and other group-by columns. If the
                     * select columns includes any other aggregates, it will not
                     * do the push-down. - nshi
                     */
                if (topAggNode != null) {
                    ExpressionType top_expression_type = agg_expression_type;
                    /*
                         * For count(*), count() and sum(), the pushed-down
                         * aggregate node doesn't change. An extra sum()
                         * aggregate node is added to the coordinator to sum up
                         * the numbers from all the partitions. The input schema
                         * and the output schema of the sum() aggregate node is
                         * the same as the output schema of the push-down
                         * aggregate node.
                         *
                         * If DISTINCT is specified, don't do push-down for
                         * count() and sum() when not group by partition column.
                         * An exception is the aggregation arguments are the
                         * partition column (ENG-4980).
                         */
                    if (agg_expression_type == ExpressionType.AGGREGATE_COUNT_STAR || agg_expression_type == ExpressionType.AGGREGATE_COUNT || agg_expression_type == ExpressionType.AGGREGATE_SUM) {
                        if (is_distinct && !(m_parsedSelect.hasPartitionColumnInGroupby() || canPushDownDistinctAggregation((AggregateExpression) rootExpr))) {
                            topAggNode = null;
                        } else {
                            // for aggregate distinct when group by
                            // partition column, the top aggregate node
                            // will be dropped later, thus there is no
                            // effect to assign the top_expression_type.
                            top_expression_type = ExpressionType.AGGREGATE_SUM;
                        }
                    } else /*
                         * For min() and max(), the pushed-down aggregate node
                         * doesn't change. An extra aggregate node of the same
                         * type is added to the coordinator. The input schema
                         * and the output schema of the top aggregate node is
                         * the same as the output schema of the pushed-down
                         * aggregate node.
                         *
                         * APPROX_COUNT_DISTINCT can be similarly pushed down, but
                         * must be split into two different functions, which is
                         * done later, from pushDownAggregate().
                         */
                    if (agg_expression_type != ExpressionType.AGGREGATE_MIN && agg_expression_type != ExpressionType.AGGREGATE_MAX && agg_expression_type != ExpressionType.AGGREGATE_APPROX_COUNT_DISTINCT) {
                        /*
                             * Unsupported aggregate for push-down (AVG for example).
                             */
                        topAggNode = null;
                    }
                    if (topAggNode != null) {
                        /*
                             * Input column of the top aggregate node is the
                             * output column of the push-down aggregate node
                             */
                        boolean topDistinctFalse = false;
                        topAggNode.addAggregate(top_expression_type, topDistinctFalse, outputColumnIndex, tve);
                    }
                }
            // end if we have a top agg node
            } else {
                // has already been broken down.
                assert (!rootExpr.hasAnySubexpressionOfClass(AggregateExpression.class));
                /*
                     * These columns are the pass through columns that are not being
                     * aggregated on. These are the ones from the SELECT list. They
                     * MUST already exist in the child node's output. Find them and
                     * add them to the aggregate's output.
                     */
                schema_col = new SchemaColumn(col.tableName, col.tableAlias, col.columnName, col.alias, col.expression, outputColumnIndex);
                AbstractExpression topExpr = null;
                if (col.groupBy) {
                    topExpr = m_parsedSelect.m_groupByExpressions.get(col.alias);
                } else {
                    topExpr = col.expression;
                }
                top_schema_col = new SchemaColumn(col.tableName, col.tableAlias, col.columnName, col.alias, topExpr, outputColumnIndex);
            }
            agg_schema.addColumn(schema_col);
            top_agg_schema.addColumn(top_schema_col);
        }
        for (ParsedColInfo col : m_parsedSelect.groupByColumns()) {
            aggNode.addGroupByExpression(col.expression);
            if (topAggNode != null) {
                topAggNode.addGroupByExpression(m_parsedSelect.m_groupByExpressions.get(col.alias));
            }
        }
        aggNode.setOutputSchema(agg_schema);
        if (topAggNode != null) {
            if (m_parsedSelect.hasComplexGroupby()) {
                topAggNode.setOutputSchema(top_agg_schema);
            } else {
                topAggNode.setOutputSchema(agg_schema);
            }
        }
        // Never push down aggregation for MV fix case.
        root = pushDownAggregate(root, aggNode, topAggNode, m_parsedSelect);
    }
    return handleDistinctWithGroupby(root);
}
Also used : AbstractPlanNode(org.voltdb.plannodes.AbstractPlanNode) TupleValueExpression(org.voltdb.expressions.TupleValueExpression) AbstractReceivePlanNode(org.voltdb.plannodes.AbstractReceivePlanNode) HashAggregatePlanNode(org.voltdb.plannodes.HashAggregatePlanNode) AggregatePlanNode(org.voltdb.plannodes.AggregatePlanNode) PartialAggregatePlanNode(org.voltdb.plannodes.PartialAggregatePlanNode) AbstractReceivePlanNode(org.voltdb.plannodes.AbstractReceivePlanNode) MergeReceivePlanNode(org.voltdb.plannodes.MergeReceivePlanNode) ReceivePlanNode(org.voltdb.plannodes.ReceivePlanNode) PartialAggregatePlanNode(org.voltdb.plannodes.PartialAggregatePlanNode) HashAggregatePlanNode(org.voltdb.plannodes.HashAggregatePlanNode) SchemaColumn(org.voltdb.plannodes.SchemaColumn) AggregateExpression(org.voltdb.expressions.AggregateExpression) Constraint(org.voltdb.catalog.Constraint) AbstractExpression(org.voltdb.expressions.AbstractExpression) ExpressionType(org.voltdb.types.ExpressionType) NodeSchema(org.voltdb.plannodes.NodeSchema)

Aggregations

Constraint (org.voltdb.catalog.Constraint)1 AbstractExpression (org.voltdb.expressions.AbstractExpression)1 AggregateExpression (org.voltdb.expressions.AggregateExpression)1 TupleValueExpression (org.voltdb.expressions.TupleValueExpression)1 AbstractPlanNode (org.voltdb.plannodes.AbstractPlanNode)1 AbstractReceivePlanNode (org.voltdb.plannodes.AbstractReceivePlanNode)1 AggregatePlanNode (org.voltdb.plannodes.AggregatePlanNode)1 HashAggregatePlanNode (org.voltdb.plannodes.HashAggregatePlanNode)1 MergeReceivePlanNode (org.voltdb.plannodes.MergeReceivePlanNode)1 NodeSchema (org.voltdb.plannodes.NodeSchema)1 PartialAggregatePlanNode (org.voltdb.plannodes.PartialAggregatePlanNode)1 ReceivePlanNode (org.voltdb.plannodes.ReceivePlanNode)1 SchemaColumn (org.voltdb.plannodes.SchemaColumn)1 ExpressionType (org.voltdb.types.ExpressionType)1