use of org.voltdb.plannodes.NodeSchema in project voltdb by VoltDB.
the class PlanAssembler method addCoordinatorToDMLNode.
/**
* Add a receive node, a sum or limit node, and a send node to the given DML node.
* If the DML target is a replicated table, it will add a limit node,
* otherwise it adds a sum node.
*
* @param dmlRoot
* @param isReplicated Whether or not the target table is a replicated table.
* @return
*/
private static AbstractPlanNode addCoordinatorToDMLNode(AbstractPlanNode dmlRoot, boolean isReplicated) {
dmlRoot = SubPlanAssembler.addSendReceivePair(dmlRoot);
AbstractPlanNode sumOrLimitNode;
if (isReplicated) {
// Replicated table DML result doesn't need to be summed. All partitions should
// modify the same number of tuples in replicated table, so just pick the result from
// any partition.
LimitPlanNode limitNode = new LimitPlanNode();
sumOrLimitNode = limitNode;
limitNode.setLimit(1);
} else {
// create the nodes being pushed on top of dmlRoot.
AggregatePlanNode countNode = new AggregatePlanNode();
sumOrLimitNode = countNode;
// configure the count aggregate (sum) node to produce a single
// output column containing the result of the sum.
// Create a TVE that should match the tuple count input column
// This TVE is magic.
// really really need to make this less hard-wired
TupleValueExpression count_tve = new TupleValueExpression(AbstractParsedStmt.TEMP_TABLE_NAME, AbstractParsedStmt.TEMP_TABLE_NAME, "modified_tuples", "modified_tuples", 0);
count_tve.setValueType(VoltType.BIGINT);
count_tve.setValueSize(VoltType.BIGINT.getLengthInBytesForFixedTypes());
countNode.addAggregate(ExpressionType.AGGREGATE_SUM, false, 0, count_tve);
// The output column. Not really based on a TVE (it is really the
// count expression represented by the count configured above). But
// this is sufficient for now. This looks identical to the above
// TVE but it's logically different so we'll create a fresh one.
TupleValueExpression tve = new TupleValueExpression(AbstractParsedStmt.TEMP_TABLE_NAME, AbstractParsedStmt.TEMP_TABLE_NAME, "modified_tuples", "modified_tuples", 0);
tve.setValueType(VoltType.BIGINT);
tve.setValueSize(VoltType.BIGINT.getLengthInBytesForFixedTypes());
NodeSchema count_schema = new NodeSchema();
count_schema.addColumn(AbstractParsedStmt.TEMP_TABLE_NAME, AbstractParsedStmt.TEMP_TABLE_NAME, "modified_tuples", "modified_tuples", tve);
countNode.setOutputSchema(count_schema);
}
// connect the nodes to build the graph
sumOrLimitNode.addAndLinkChild(dmlRoot);
SendPlanNode sendNode = new SendPlanNode();
sendNode.addAndLinkChild(sumOrLimitNode);
return sendNode;
}
use of org.voltdb.plannodes.NodeSchema in project voltdb by VoltDB.
the class PlanAssembler method handleAggregationOperators.
private AbstractPlanNode handleAggregationOperators(AbstractPlanNode root) {
/*
* "Select A from T group by A" is grouped but has no aggregate operator
* expressions. Catch that case by checking the grouped flag
*/
if (m_parsedSelect.hasAggregateOrGroupby()) {
AggregatePlanNode aggNode = null;
// i.e., on the coordinator
AggregatePlanNode topAggNode = null;
IndexGroupByInfo gbInfo = new IndexGroupByInfo();
if (root instanceof AbstractReceivePlanNode) {
// for distinct that does not group by partition column
if (!m_parsedSelect.hasAggregateDistinct() || m_parsedSelect.hasPartitionColumnInGroupby()) {
AbstractPlanNode candidate = root.getChild(0).getChild(0);
gbInfo.m_multiPartition = true;
switchToIndexScanForGroupBy(candidate, gbInfo);
}
} else if (switchToIndexScanForGroupBy(root, gbInfo)) {
root = gbInfo.m_indexAccess;
}
boolean needHashAgg = gbInfo.needHashAggregator(root, m_parsedSelect);
// Construct the aggregate nodes
if (needHashAgg) {
if (m_parsedSelect.m_mvFixInfo.needed()) {
// TODO: may optimize this edge case in future
aggNode = new HashAggregatePlanNode();
} else {
if (gbInfo.isChangedToSerialAggregate()) {
assert (root instanceof ReceivePlanNode);
aggNode = new AggregatePlanNode();
} else if (gbInfo.isChangedToPartialAggregate()) {
aggNode = new PartialAggregatePlanNode(gbInfo.m_coveredGroupByColumns);
} else {
aggNode = new HashAggregatePlanNode();
}
topAggNode = new HashAggregatePlanNode();
}
} else {
aggNode = new AggregatePlanNode();
if (!m_parsedSelect.m_mvFixInfo.needed()) {
topAggNode = new AggregatePlanNode();
}
}
NodeSchema agg_schema = new NodeSchema();
NodeSchema top_agg_schema = new NodeSchema();
for (int outputColumnIndex = 0; outputColumnIndex < m_parsedSelect.m_aggResultColumns.size(); outputColumnIndex += 1) {
ParsedColInfo col = m_parsedSelect.m_aggResultColumns.get(outputColumnIndex);
AbstractExpression rootExpr = col.expression;
AbstractExpression agg_input_expr = null;
SchemaColumn schema_col = null;
SchemaColumn top_schema_col = null;
if (rootExpr instanceof AggregateExpression) {
ExpressionType agg_expression_type = rootExpr.getExpressionType();
agg_input_expr = rootExpr.getLeft();
// A bit of a hack: ProjectionNodes after the
// aggregate node need the output columns here to
// contain TupleValueExpressions (effectively on a temp table).
// So we construct one based on the output of the
// aggregate expression, the column alias provided by HSQL,
// and the offset into the output table schema for the
// aggregate node that we're computing.
// Oh, oh, it's magic, you know..
TupleValueExpression tve = new TupleValueExpression(AbstractParsedStmt.TEMP_TABLE_NAME, AbstractParsedStmt.TEMP_TABLE_NAME, "", col.alias, rootExpr, outputColumnIndex);
tve.setDifferentiator(col.differentiator);
boolean is_distinct = ((AggregateExpression) rootExpr).isDistinct();
aggNode.addAggregate(agg_expression_type, is_distinct, outputColumnIndex, agg_input_expr);
schema_col = new SchemaColumn(AbstractParsedStmt.TEMP_TABLE_NAME, AbstractParsedStmt.TEMP_TABLE_NAME, "", col.alias, tve, outputColumnIndex);
top_schema_col = new SchemaColumn(AbstractParsedStmt.TEMP_TABLE_NAME, AbstractParsedStmt.TEMP_TABLE_NAME, "", col.alias, tve, outputColumnIndex);
/*
* Special case count(*), count(), sum(), min() and max() to
* push them down to each partition. It will do the
* push-down if the select columns only contains the listed
* aggregate operators and other group-by columns. If the
* select columns includes any other aggregates, it will not
* do the push-down. - nshi
*/
if (topAggNode != null) {
ExpressionType top_expression_type = agg_expression_type;
/*
* For count(*), count() and sum(), the pushed-down
* aggregate node doesn't change. An extra sum()
* aggregate node is added to the coordinator to sum up
* the numbers from all the partitions. The input schema
* and the output schema of the sum() aggregate node is
* the same as the output schema of the push-down
* aggregate node.
*
* If DISTINCT is specified, don't do push-down for
* count() and sum() when not group by partition column.
* An exception is the aggregation arguments are the
* partition column (ENG-4980).
*/
if (agg_expression_type == ExpressionType.AGGREGATE_COUNT_STAR || agg_expression_type == ExpressionType.AGGREGATE_COUNT || agg_expression_type == ExpressionType.AGGREGATE_SUM) {
if (is_distinct && !(m_parsedSelect.hasPartitionColumnInGroupby() || canPushDownDistinctAggregation((AggregateExpression) rootExpr))) {
topAggNode = null;
} else {
// for aggregate distinct when group by
// partition column, the top aggregate node
// will be dropped later, thus there is no
// effect to assign the top_expression_type.
top_expression_type = ExpressionType.AGGREGATE_SUM;
}
} else /*
* For min() and max(), the pushed-down aggregate node
* doesn't change. An extra aggregate node of the same
* type is added to the coordinator. The input schema
* and the output schema of the top aggregate node is
* the same as the output schema of the pushed-down
* aggregate node.
*
* APPROX_COUNT_DISTINCT can be similarly pushed down, but
* must be split into two different functions, which is
* done later, from pushDownAggregate().
*/
if (agg_expression_type != ExpressionType.AGGREGATE_MIN && agg_expression_type != ExpressionType.AGGREGATE_MAX && agg_expression_type != ExpressionType.AGGREGATE_APPROX_COUNT_DISTINCT) {
/*
* Unsupported aggregate for push-down (AVG for example).
*/
topAggNode = null;
}
if (topAggNode != null) {
/*
* Input column of the top aggregate node is the
* output column of the push-down aggregate node
*/
boolean topDistinctFalse = false;
topAggNode.addAggregate(top_expression_type, topDistinctFalse, outputColumnIndex, tve);
}
}
// end if we have a top agg node
} else {
// has already been broken down.
assert (!rootExpr.hasAnySubexpressionOfClass(AggregateExpression.class));
/*
* These columns are the pass through columns that are not being
* aggregated on. These are the ones from the SELECT list. They
* MUST already exist in the child node's output. Find them and
* add them to the aggregate's output.
*/
schema_col = new SchemaColumn(col.tableName, col.tableAlias, col.columnName, col.alias, col.expression, outputColumnIndex);
AbstractExpression topExpr = null;
if (col.groupBy) {
topExpr = m_parsedSelect.m_groupByExpressions.get(col.alias);
} else {
topExpr = col.expression;
}
top_schema_col = new SchemaColumn(col.tableName, col.tableAlias, col.columnName, col.alias, topExpr, outputColumnIndex);
}
agg_schema.addColumn(schema_col);
top_agg_schema.addColumn(top_schema_col);
}
for (ParsedColInfo col : m_parsedSelect.groupByColumns()) {
aggNode.addGroupByExpression(col.expression);
if (topAggNode != null) {
topAggNode.addGroupByExpression(m_parsedSelect.m_groupByExpressions.get(col.alias));
}
}
aggNode.setOutputSchema(agg_schema);
if (topAggNode != null) {
if (m_parsedSelect.hasComplexGroupby()) {
topAggNode.setOutputSchema(top_agg_schema);
} else {
topAggNode.setOutputSchema(agg_schema);
}
}
// Never push down aggregation for MV fix case.
root = pushDownAggregate(root, aggNode, topAggNode, m_parsedSelect);
}
return handleDistinctWithGroupby(root);
}
use of org.voltdb.plannodes.NodeSchema in project voltdb by VoltDB.
the class TestPlansGroupBy method testGroupByOnly.
public void testGroupByOnly() {
List<AbstractPlanNode> pns;
System.out.println("Starting testGroupByOnly");
/**
* Serial Aggregate cases
*/
// Replicated Table
// only GROUP BY cols in SELECT clause
pns = compileToFragments("SELECT F_D1 FROM RF GROUP BY F_D1");
checkGroupByOnlyPlan(pns, false, S_AGG, true);
// SELECT cols in GROUP BY and other aggregate cols
pns = compileToFragments("SELECT F_D1, COUNT(*) FROM RF GROUP BY F_D1");
checkGroupByOnlyPlan(pns, false, S_AGG, true);
// aggregate cols are part of keys of used index
pns = compileToFragments("SELECT F_VAL1, SUM(F_VAL2) FROM RF GROUP BY F_VAL1");
checkGroupByOnlyPlan(pns, false, S_AGG, true);
// expr index, full indexed case
pns = compileToFragments("SELECT F_D1 + F_D2, COUNT(*) FROM RF GROUP BY F_D1 + F_D2");
checkGroupByOnlyPlan(pns, false, S_AGG, true);
// function index, prefix indexed case
pns = compileToFragments("SELECT ABS(F_D1), COUNT(*) FROM RF GROUP BY ABS(F_D1)");
checkGroupByOnlyPlan(pns, false, S_AGG, true);
// order of GROUP BY cols is different of them in index definition
// index on (ABS(F_D1), F_D2 - F_D3), GROUP BY on (F_D2 - F_D3, ABS(F_D1))
pns = compileToFragments("SELECT F_D2 - F_D3, ABS(F_D1), COUNT(*) FROM RF GROUP BY F_D2 - F_D3, ABS(F_D1)");
checkGroupByOnlyPlan(pns, false, S_AGG, true);
pns = compileToFragments("SELECT F_VAL1, F_VAL2, COUNT(*) FROM RF GROUP BY F_VAL2, F_VAL1");
//* enable to debug */ System.out.println(pns, "DEBUG: " + pns.get(0).toExplainPlanString());
checkGroupByOnlyPlan(pns, false, S_AGG, true);
// Partitioned Table
pns = compileToFragments("SELECT F_D1 FROM F GROUP BY F_D1");
// index scan for group by only, no need using hash aggregate
checkGroupByOnlyPlan(pns, true, S_AGG, true);
pns = compileToFragments("SELECT F_D1, COUNT(*) FROM F GROUP BY F_D1");
checkGroupByOnlyPlan(pns, true, S_AGG, true);
pns = compileToFragments("SELECT F_VAL1, SUM(F_VAL2) FROM F GROUP BY F_VAL1");
checkGroupByOnlyPlan(pns, true, S_AGG, true);
pns = compileToFragments("SELECT F_D1 + F_D2, COUNT(*) FROM F GROUP BY F_D1 + F_D2");
checkGroupByOnlyPlan(pns, true, S_AGG, true);
pns = compileToFragments("SELECT ABS(F_D1), COUNT(*) FROM F GROUP BY ABS(F_D1)");
checkGroupByOnlyPlan(pns, true, S_AGG, true);
pns = compileToFragments("SELECT F_D2 - F_D3, ABS(F_D1), COUNT(*) FROM F GROUP BY F_D2 - F_D3, ABS(F_D1)");
checkGroupByOnlyPlan(pns, true, S_AGG, true);
/**
* Hash Aggregate cases
*/
// unoptimized case (only use second col of the index), but will be replaced in
// SeqScanToIndexScan optimization for deterministic reason
// use EXPR_RF_TREE1 not EXPR_RF_TREE2
pns = compileToFragments("SELECT F_D2 - F_D3, COUNT(*) FROM RF GROUP BY F_D2 - F_D3");
checkGroupByOnlyPlan(pns, false, H_AGG, true);
// unoptimized case: index is not scannable
pns = compileToFragments("SELECT F_VAL3, COUNT(*) FROM RF GROUP BY F_VAL3");
checkGroupByOnlyPlan(pns, false, H_AGG, true);
// unoptimized case: F_D2 is not prefix indexable
pns = compileToFragments("SELECT F_D2, COUNT(*) FROM RF GROUP BY F_D2");
checkGroupByOnlyPlan(pns, false, H_AGG, true);
// unoptimized case (only uses second col of the index), will not be replaced in
// SeqScanToIndexScan for determinism because of non-deterministic receive.
// Use primary key index
pns = compileToFragments("SELECT F_D2 - F_D3, COUNT(*) FROM F GROUP BY F_D2 - F_D3");
checkGroupByOnlyPlan(pns, true, H_AGG, true);
// unoptimized case (only uses second col of the index), will be replaced in
// SeqScanToIndexScan for determinism.
// use EXPR_F_TREE1 not EXPR_F_TREE2
pns = compileToFragments("SELECT F_D2 - F_D3, COUNT(*) FROM RF GROUP BY F_D2 - F_D3");
//* enable to debug */ System.out.println(pns, pns.get(0).toExplainPlanString());
checkGroupByOnlyPlan(pns, false, H_AGG, true);
/**
* Partial Aggregate cases
*/
// unoptimized case: no prefix index found for (F_D1, F_D2)
pns = compileToFragments("SELECT F_D1, F_D2, COUNT(*) FROM RF GROUP BY F_D1, F_D2");
checkGroupByOnlyPlan(pns, false, P_AGG, true);
pns = compileToFragments("SELECT ABS(F_D1), F_D3, COUNT(*) FROM RF GROUP BY ABS(F_D1), F_D3");
checkGroupByOnlyPlan(pns, false, P_AGG, true);
// partition table
pns = compileToFragments("SELECT F_D1, F_D2, COUNT(*) FROM F GROUP BY F_D1, F_D2");
checkGroupByOnlyPlan(pns, true, P_AGG, true);
pns = compileToFragments("SELECT ABS(F_D1), F_D3, COUNT(*) FROM F GROUP BY ABS(F_D1), F_D3");
checkGroupByOnlyPlan(pns, true, P_AGG, true);
/**
* Regression case
*/
// ENG-9990 Repeating GROUP BY partition key in SELECT corrupts output schema.
//* enable to debug */ boolean was = AbstractPlanNode.enableVerboseExplainForDebugging();
pns = compileToFragments("SELECT G_PKEY, COUNT(*) C, G_PKEY FROM G GROUP BY G_PKEY");
//* enable to debug */ System.out.println(pns.get(0).toExplainPlanString());
//* enable to debug */ System.out.println(pns.get(1).toExplainPlanString());
//* enable to debug */ AbstractPlanNode.restoreVerboseExplainForDebugging(was);
AbstractPlanNode pn = pns.get(0);
pn = pn.getChild(0);
NodeSchema os = pn.getOutputSchema();
// The problem was a mismatch between the output schema
// of the coordinator's send node and its feeding receive node
// that had incorrectly rearranged its columns.
SchemaColumn middleCol = os.getColumns().get(1);
System.out.println(middleCol.toString());
assertTrue(middleCol.getColumnAlias().equals("C"));
}
use of org.voltdb.plannodes.NodeSchema in project voltdb by VoltDB.
the class TestPlansInExistsSubQueries method verifyOutputSchema.
private void verifyOutputSchema(AbstractPlanNode pn, String... columns) {
NodeSchema ns = pn.getOutputSchema();
List<SchemaColumn> scs = ns.getColumns();
for (int i = 0; i < scs.size(); ++i) {
SchemaColumn col = scs.get(i);
assertEquals(columns[i], col.getColumnName());
assertEquals(4, col.getSize());
assertEquals(VoltType.INTEGER, col.getType());
assertTrue(col.getExpression() instanceof TupleValueExpression);
assertTrue(((TupleValueExpression) col.getExpression()).getColumnIndex() != -1);
}
}
use of org.voltdb.plannodes.NodeSchema in project voltdb by VoltDB.
the class TestPlansScalarSubQueries method testSelectCorrelatedScalar.
public void testSelectCorrelatedScalar() {
AbstractPlanNode pn = compile("select r2.c, (select d from r1 where r1.c = r2.c ) scalar from r2");
pn = pn.getChild(0);
assertTrue(pn instanceof AbstractScanPlanNode);
AbstractPlanNode proj = pn.getInlinePlanNode(PlanNodeType.PROJECTION);
NodeSchema schema = proj.getOutputSchema();
assertEquals(2, schema.size());
SchemaColumn col = schema.getColumns().get(1);
assertTrue(col != null);
assertEquals("SCALAR", col.getColumnName());
AbstractExpression colExpr = col.getExpression();
assertEquals(ExpressionType.VALUE_SCALAR, colExpr.getExpressionType());
assertTrue(colExpr.getLeft() instanceof AbstractSubqueryExpression);
AbstractSubqueryExpression subqueryExpr = (AbstractSubqueryExpression) colExpr.getLeft();
List<Integer> params = subqueryExpr.getParameterIdxList();
assertEquals(1, params.size());
assertEquals(new Integer(0), params.get(0));
}
Aggregations