Search in sources :

Example 1 with AbstractReceivePlanNode

use of org.voltdb.plannodes.AbstractReceivePlanNode in project voltdb by VoltDB.

the class PlanAssembler method handleAggregationOperators.

private AbstractPlanNode handleAggregationOperators(AbstractPlanNode root) {
    /*
         * "Select A from T group by A" is grouped but has no aggregate operator
         * expressions. Catch that case by checking the grouped flag
         */
    if (m_parsedSelect.hasAggregateOrGroupby()) {
        AggregatePlanNode aggNode = null;
        // i.e., on the coordinator
        AggregatePlanNode topAggNode = null;
        IndexGroupByInfo gbInfo = new IndexGroupByInfo();
        if (root instanceof AbstractReceivePlanNode) {
            // for distinct that does not group by partition column
            if (!m_parsedSelect.hasAggregateDistinct() || m_parsedSelect.hasPartitionColumnInGroupby()) {
                AbstractPlanNode candidate = root.getChild(0).getChild(0);
                gbInfo.m_multiPartition = true;
                switchToIndexScanForGroupBy(candidate, gbInfo);
            }
        } else if (switchToIndexScanForGroupBy(root, gbInfo)) {
            root = gbInfo.m_indexAccess;
        }
        boolean needHashAgg = gbInfo.needHashAggregator(root, m_parsedSelect);
        // Construct the aggregate nodes
        if (needHashAgg) {
            if (m_parsedSelect.m_mvFixInfo.needed()) {
                // TODO: may optimize this edge case in future
                aggNode = new HashAggregatePlanNode();
            } else {
                if (gbInfo.isChangedToSerialAggregate()) {
                    assert (root instanceof ReceivePlanNode);
                    aggNode = new AggregatePlanNode();
                } else if (gbInfo.isChangedToPartialAggregate()) {
                    aggNode = new PartialAggregatePlanNode(gbInfo.m_coveredGroupByColumns);
                } else {
                    aggNode = new HashAggregatePlanNode();
                }
                topAggNode = new HashAggregatePlanNode();
            }
        } else {
            aggNode = new AggregatePlanNode();
            if (!m_parsedSelect.m_mvFixInfo.needed()) {
                topAggNode = new AggregatePlanNode();
            }
        }
        NodeSchema agg_schema = new NodeSchema();
        NodeSchema top_agg_schema = new NodeSchema();
        for (int outputColumnIndex = 0; outputColumnIndex < m_parsedSelect.m_aggResultColumns.size(); outputColumnIndex += 1) {
            ParsedColInfo col = m_parsedSelect.m_aggResultColumns.get(outputColumnIndex);
            AbstractExpression rootExpr = col.expression;
            AbstractExpression agg_input_expr = null;
            SchemaColumn schema_col = null;
            SchemaColumn top_schema_col = null;
            if (rootExpr instanceof AggregateExpression) {
                ExpressionType agg_expression_type = rootExpr.getExpressionType();
                agg_input_expr = rootExpr.getLeft();
                // A bit of a hack: ProjectionNodes after the
                // aggregate node need the output columns here to
                // contain TupleValueExpressions (effectively on a temp table).
                // So we construct one based on the output of the
                // aggregate expression, the column alias provided by HSQL,
                // and the offset into the output table schema for the
                // aggregate node that we're computing.
                // Oh, oh, it's magic, you know..
                TupleValueExpression tve = new TupleValueExpression(AbstractParsedStmt.TEMP_TABLE_NAME, AbstractParsedStmt.TEMP_TABLE_NAME, "", col.alias, rootExpr, outputColumnIndex);
                tve.setDifferentiator(col.differentiator);
                boolean is_distinct = ((AggregateExpression) rootExpr).isDistinct();
                aggNode.addAggregate(agg_expression_type, is_distinct, outputColumnIndex, agg_input_expr);
                schema_col = new SchemaColumn(AbstractParsedStmt.TEMP_TABLE_NAME, AbstractParsedStmt.TEMP_TABLE_NAME, "", col.alias, tve, outputColumnIndex);
                top_schema_col = new SchemaColumn(AbstractParsedStmt.TEMP_TABLE_NAME, AbstractParsedStmt.TEMP_TABLE_NAME, "", col.alias, tve, outputColumnIndex);
                /*
                     * Special case count(*), count(), sum(), min() and max() to
                     * push them down to each partition. It will do the
                     * push-down if the select columns only contains the listed
                     * aggregate operators and other group-by columns. If the
                     * select columns includes any other aggregates, it will not
                     * do the push-down. - nshi
                     */
                if (topAggNode != null) {
                    ExpressionType top_expression_type = agg_expression_type;
                    /*
                         * For count(*), count() and sum(), the pushed-down
                         * aggregate node doesn't change. An extra sum()
                         * aggregate node is added to the coordinator to sum up
                         * the numbers from all the partitions. The input schema
                         * and the output schema of the sum() aggregate node is
                         * the same as the output schema of the push-down
                         * aggregate node.
                         *
                         * If DISTINCT is specified, don't do push-down for
                         * count() and sum() when not group by partition column.
                         * An exception is the aggregation arguments are the
                         * partition column (ENG-4980).
                         */
                    if (agg_expression_type == ExpressionType.AGGREGATE_COUNT_STAR || agg_expression_type == ExpressionType.AGGREGATE_COUNT || agg_expression_type == ExpressionType.AGGREGATE_SUM) {
                        if (is_distinct && !(m_parsedSelect.hasPartitionColumnInGroupby() || canPushDownDistinctAggregation((AggregateExpression) rootExpr))) {
                            topAggNode = null;
                        } else {
                            // for aggregate distinct when group by
                            // partition column, the top aggregate node
                            // will be dropped later, thus there is no
                            // effect to assign the top_expression_type.
                            top_expression_type = ExpressionType.AGGREGATE_SUM;
                        }
                    } else /*
                         * For min() and max(), the pushed-down aggregate node
                         * doesn't change. An extra aggregate node of the same
                         * type is added to the coordinator. The input schema
                         * and the output schema of the top aggregate node is
                         * the same as the output schema of the pushed-down
                         * aggregate node.
                         *
                         * APPROX_COUNT_DISTINCT can be similarly pushed down, but
                         * must be split into two different functions, which is
                         * done later, from pushDownAggregate().
                         */
                    if (agg_expression_type != ExpressionType.AGGREGATE_MIN && agg_expression_type != ExpressionType.AGGREGATE_MAX && agg_expression_type != ExpressionType.AGGREGATE_APPROX_COUNT_DISTINCT) {
                        /*
                             * Unsupported aggregate for push-down (AVG for example).
                             */
                        topAggNode = null;
                    }
                    if (topAggNode != null) {
                        /*
                             * Input column of the top aggregate node is the
                             * output column of the push-down aggregate node
                             */
                        boolean topDistinctFalse = false;
                        topAggNode.addAggregate(top_expression_type, topDistinctFalse, outputColumnIndex, tve);
                    }
                }
            // end if we have a top agg node
            } else {
                // has already been broken down.
                assert (!rootExpr.hasAnySubexpressionOfClass(AggregateExpression.class));
                /*
                     * These columns are the pass through columns that are not being
                     * aggregated on. These are the ones from the SELECT list. They
                     * MUST already exist in the child node's output. Find them and
                     * add them to the aggregate's output.
                     */
                schema_col = new SchemaColumn(col.tableName, col.tableAlias, col.columnName, col.alias, col.expression, outputColumnIndex);
                AbstractExpression topExpr = null;
                if (col.groupBy) {
                    topExpr = m_parsedSelect.m_groupByExpressions.get(col.alias);
                } else {
                    topExpr = col.expression;
                }
                top_schema_col = new SchemaColumn(col.tableName, col.tableAlias, col.columnName, col.alias, topExpr, outputColumnIndex);
            }
            agg_schema.addColumn(schema_col);
            top_agg_schema.addColumn(top_schema_col);
        }
        for (ParsedColInfo col : m_parsedSelect.groupByColumns()) {
            aggNode.addGroupByExpression(col.expression);
            if (topAggNode != null) {
                topAggNode.addGroupByExpression(m_parsedSelect.m_groupByExpressions.get(col.alias));
            }
        }
        aggNode.setOutputSchema(agg_schema);
        if (topAggNode != null) {
            if (m_parsedSelect.hasComplexGroupby()) {
                topAggNode.setOutputSchema(top_agg_schema);
            } else {
                topAggNode.setOutputSchema(agg_schema);
            }
        }
        // Never push down aggregation for MV fix case.
        root = pushDownAggregate(root, aggNode, topAggNode, m_parsedSelect);
    }
    return handleDistinctWithGroupby(root);
}
Also used : AbstractPlanNode(org.voltdb.plannodes.AbstractPlanNode) TupleValueExpression(org.voltdb.expressions.TupleValueExpression) AbstractReceivePlanNode(org.voltdb.plannodes.AbstractReceivePlanNode) HashAggregatePlanNode(org.voltdb.plannodes.HashAggregatePlanNode) AggregatePlanNode(org.voltdb.plannodes.AggregatePlanNode) PartialAggregatePlanNode(org.voltdb.plannodes.PartialAggregatePlanNode) AbstractReceivePlanNode(org.voltdb.plannodes.AbstractReceivePlanNode) MergeReceivePlanNode(org.voltdb.plannodes.MergeReceivePlanNode) ReceivePlanNode(org.voltdb.plannodes.ReceivePlanNode) PartialAggregatePlanNode(org.voltdb.plannodes.PartialAggregatePlanNode) HashAggregatePlanNode(org.voltdb.plannodes.HashAggregatePlanNode) SchemaColumn(org.voltdb.plannodes.SchemaColumn) AggregateExpression(org.voltdb.expressions.AggregateExpression) Constraint(org.voltdb.catalog.Constraint) AbstractExpression(org.voltdb.expressions.AbstractExpression) ExpressionType(org.voltdb.types.ExpressionType) NodeSchema(org.voltdb.plannodes.NodeSchema)

Example 2 with AbstractReceivePlanNode

use of org.voltdb.plannodes.AbstractReceivePlanNode in project voltdb by VoltDB.

the class QueryPlanner method compileFromXML.

/**
     * Find the best plan given the VoltXMLElement.  By best here we mean the plan
     * which is scored the best according to our plan metric scoring.  The plan
     * metric scoring takes into account join order and index use, but it does
     * not take into account the output schema.  Consequently, we don't compute the
     * output schema for the plan nodes until after the best plan is discovered.
     *
     * The order here is:
     * <ol>
     * <li>
     *   Parse the VoltXMLElement to create an AbstractParsedStatement.  This has
     *   a second effect of loading lists of join orders and access paths for planning.
     *   For us, and access path is a way of scanning something scannable.  It's a generalization
     *   of the notion of scanning a table or an index.
     * </li>
     * <li>
     *   Create a PlanAssembler, and ask it for the best cost plan.  This uses the
     *   side data created by the parser in the previous step.
     * </li>
     * <li>
     *   If the plan is read only, slap a SendPlanNode on the front.  Presumably
     *   an insert, delete or upsert will have added the SendPlanNode into the plan node tree already.
     * </li>
     * <li>
     *   Compute the output schema.  This computes the output schema for each
     *   node recursively, using a node specific method.
     * </li>
     * <li>
     *   Resolve the column indices.  This makes sure that the indices of all
     *   TVEs in the output columns refer to the right input columns.
     * </li>
     * <li>
     *   Do some final cleaning up and verifying of the plan.  For example,
     *   We renumber the nodes starting at 1.
     * </li>
     * </ol>
     *
     * @param xmlSQL
     * @param paramValues
     * @return
     */
private CompiledPlan compileFromXML(VoltXMLElement xmlSQL, String[] paramValues) {
    // Get a parsed statement from the xml
    // The callers of compilePlan are ready to catch any exceptions thrown here.
    AbstractParsedStmt parsedStmt = AbstractParsedStmt.parse(m_sql, xmlSQL, paramValues, m_db, m_joinOrder);
    if (parsedStmt == null) {
        m_recentErrorMsg = "Failed to parse SQL statement: " + getOriginalSql();
        return null;
    }
    if (m_isUpsert) {
        // no insert/upsert with joins
        if (parsedStmt.m_tableList.size() != 1) {
            m_recentErrorMsg = "UPSERT is supported only with one single table: " + getOriginalSql();
            return null;
        }
        Table tb = parsedStmt.m_tableList.get(0);
        Constraint pkey = null;
        for (Constraint ct : tb.getConstraints()) {
            if (ct.getType() == ConstraintType.PRIMARY_KEY.getValue()) {
                pkey = ct;
                break;
            }
        }
        if (pkey == null) {
            m_recentErrorMsg = "Unsupported UPSERT table without primary key: " + getOriginalSql();
            return null;
        }
    }
    m_planSelector.outputParsedStatement(parsedStmt);
    // Init Assembler. Each plan assembler requires a new instance of the PlanSelector
    // to keep track of the best plan
    PlanAssembler assembler = new PlanAssembler(m_db, m_partitioning, (PlanSelector) m_planSelector.clone());
    // find the plan with minimal cost
    CompiledPlan bestPlan = assembler.getBestCostPlan(parsedStmt);
    // make sure we got a winner
    if (bestPlan == null) {
        if (m_debuggingStaticModeToRetryOnError) {
            assembler.getBestCostPlan(parsedStmt);
        }
        m_recentErrorMsg = assembler.getErrorMessage();
        if (m_recentErrorMsg == null) {
            m_recentErrorMsg = "Unable to plan for statement. Error unknown.";
        }
        return null;
    }
    if (bestPlan.isReadOnly()) {
        SendPlanNode sendNode = new SendPlanNode();
        // connect the nodes to build the graph
        sendNode.addAndLinkChild(bestPlan.rootPlanGraph);
        // this plan is final, generate schema and resolve all the column index references
        bestPlan.rootPlanGraph = sendNode;
    }
    // Execute the generateOutputSchema and resolveColumnIndexes once for the best plan
    bestPlan.rootPlanGraph.generateOutputSchema(m_db);
    bestPlan.rootPlanGraph.resolveColumnIndexes();
    if (parsedStmt instanceof ParsedSelectStmt) {
        List<SchemaColumn> columns = bestPlan.rootPlanGraph.getOutputSchema().getColumns();
        ((ParsedSelectStmt) parsedStmt).checkPlanColumnMatch(columns);
    }
    // Output the best plan debug info
    assembler.finalizeBestCostPlan();
    // reset all the plan node ids for a given plan
    // this makes the ids deterministic
    bestPlan.resetPlanNodeIds(1);
    // split up the plan everywhere we see send/receive into multiple plan fragments
    List<AbstractPlanNode> receives = bestPlan.rootPlanGraph.findAllNodesOfClass(AbstractReceivePlanNode.class);
    if (receives.size() > 1) {
        // Have too many receive node for two fragment plan limit
        m_recentErrorMsg = "This join of multiple partitioned tables is too complex. " + "Consider simplifying its subqueries: " + getOriginalSql();
        return null;
    }
    /*/ enable for debug ...
        if (receives.size() > 1) {
            System.out.println(plan.rootPlanGraph.toExplainPlanString());
        }
        // ... enable for debug */
    if (receives.size() == 1) {
        AbstractReceivePlanNode recvNode = (AbstractReceivePlanNode) receives.get(0);
        fragmentize(bestPlan, recvNode);
    }
    return bestPlan;
}
Also used : AbstractPlanNode(org.voltdb.plannodes.AbstractPlanNode) AbstractReceivePlanNode(org.voltdb.plannodes.AbstractReceivePlanNode) Table(org.voltdb.catalog.Table) Constraint(org.voltdb.catalog.Constraint) SendPlanNode(org.voltdb.plannodes.SendPlanNode) SchemaColumn(org.voltdb.plannodes.SchemaColumn)

Example 3 with AbstractReceivePlanNode

use of org.voltdb.plannodes.AbstractReceivePlanNode in project voltdb by VoltDB.

the class PlanAssembler method handleMVBasedMultiPartQuery.

private AbstractPlanNode handleMVBasedMultiPartQuery(HashAggregatePlanNode reAggNode, AbstractPlanNode root, boolean edgeCaseOuterJoin) {
    MaterializedViewFixInfo mvFixInfo = m_parsedSelect.m_mvFixInfo;
    AbstractPlanNode receiveNode = root;
    AbstractPlanNode reAggParent = null;
    // re-aggregation plan node.
    if (root instanceof AbstractReceivePlanNode) {
        root = reAggNode;
    } else {
        List<AbstractPlanNode> recList = root.findAllNodesOfClass(AbstractReceivePlanNode.class);
        assert (recList.size() == 1);
        receiveNode = recList.get(0);
        reAggParent = receiveNode.getParent(0);
        boolean result = reAggParent.replaceChild(receiveNode, reAggNode);
        assert (result);
    }
    reAggNode.addAndLinkChild(receiveNode);
    reAggNode.m_isCoordinatingAggregator = true;
    assert (receiveNode instanceof ReceivePlanNode);
    AbstractPlanNode sendNode = receiveNode.getChild(0);
    assert (sendNode instanceof SendPlanNode);
    AbstractPlanNode sendNodeChild = sendNode.getChild(0);
    HashAggregatePlanNode reAggNodeForReplace = null;
    if (m_parsedSelect.m_tableList.size() > 1 && !edgeCaseOuterJoin) {
        reAggNodeForReplace = reAggNode;
    }
    boolean find = mvFixInfo.processScanNodeWithReAggNode(sendNode, reAggNodeForReplace);
    assert (find);
    // receive node with materialized view scan node.
    if (m_parsedSelect.m_tableList.size() > 1 && !edgeCaseOuterJoin) {
        AbstractPlanNode joinNode = sendNodeChild;
        // No agg, limit pushed down at this point.
        assert (joinNode instanceof AbstractJoinPlanNode);
        // Fix the node after Re-aggregation node.
        joinNode.clearParents();
        assert (mvFixInfo.m_scanNode != null);
        mvFixInfo.m_scanNode.clearParents();
        // replace joinNode with MV scan node on each partition.
        sendNode.clearChildren();
        sendNode.addAndLinkChild(mvFixInfo.m_scanNode);
        // its parent will be the parent of the new join node. Update the root node.
        if (reAggParent != null) {
            reAggParent.replaceChild(reAggNode, joinNode);
            root = reAggParent;
        } else {
            root = joinNode;
        }
    }
    return root;
}
Also used : AbstractPlanNode(org.voltdb.plannodes.AbstractPlanNode) AbstractReceivePlanNode(org.voltdb.plannodes.AbstractReceivePlanNode) SendPlanNode(org.voltdb.plannodes.SendPlanNode) AbstractReceivePlanNode(org.voltdb.plannodes.AbstractReceivePlanNode) MergeReceivePlanNode(org.voltdb.plannodes.MergeReceivePlanNode) ReceivePlanNode(org.voltdb.plannodes.ReceivePlanNode) AbstractJoinPlanNode(org.voltdb.plannodes.AbstractJoinPlanNode) HashAggregatePlanNode(org.voltdb.plannodes.HashAggregatePlanNode)

Example 4 with AbstractReceivePlanNode

use of org.voltdb.plannodes.AbstractReceivePlanNode in project voltdb by VoltDB.

the class PlanAssembler method removeCoordinatorSendReceivePairRecursive.

private static AbstractPlanNode removeCoordinatorSendReceivePairRecursive(AbstractPlanNode root, AbstractPlanNode current) {
    if (current instanceof AbstractReceivePlanNode) {
        assert (current.getChildCount() == 1);
        AbstractPlanNode child = current.getChild(0);
        assert (child instanceof SendPlanNode);
        assert (child.getChildCount() == 1);
        child = child.getChild(0);
        child.clearParents();
        if (current == root) {
            return child;
        }
        assert (current.getParentCount() == 1);
        AbstractPlanNode parent = current.getParent(0);
        parent.unlinkChild(current);
        parent.addAndLinkChild(child);
        return root;
    }
    if (current.getChildCount() == 1) {
        // This is still a coordinator node
        return removeCoordinatorSendReceivePairRecursive(root, current.getChild(0));
    }
    // I'm not sure what the correct behavior is for a union.
    return root;
}
Also used : AbstractPlanNode(org.voltdb.plannodes.AbstractPlanNode) AbstractReceivePlanNode(org.voltdb.plannodes.AbstractReceivePlanNode) SendPlanNode(org.voltdb.plannodes.SendPlanNode)

Aggregations

AbstractPlanNode (org.voltdb.plannodes.AbstractPlanNode)4 AbstractReceivePlanNode (org.voltdb.plannodes.AbstractReceivePlanNode)4 SendPlanNode (org.voltdb.plannodes.SendPlanNode)3 Constraint (org.voltdb.catalog.Constraint)2 HashAggregatePlanNode (org.voltdb.plannodes.HashAggregatePlanNode)2 MergeReceivePlanNode (org.voltdb.plannodes.MergeReceivePlanNode)2 ReceivePlanNode (org.voltdb.plannodes.ReceivePlanNode)2 SchemaColumn (org.voltdb.plannodes.SchemaColumn)2 Table (org.voltdb.catalog.Table)1 AbstractExpression (org.voltdb.expressions.AbstractExpression)1 AggregateExpression (org.voltdb.expressions.AggregateExpression)1 TupleValueExpression (org.voltdb.expressions.TupleValueExpression)1 AbstractJoinPlanNode (org.voltdb.plannodes.AbstractJoinPlanNode)1 AggregatePlanNode (org.voltdb.plannodes.AggregatePlanNode)1 NodeSchema (org.voltdb.plannodes.NodeSchema)1 PartialAggregatePlanNode (org.voltdb.plannodes.PartialAggregatePlanNode)1 ExpressionType (org.voltdb.types.ExpressionType)1