use of org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate in project hive by apache.
the class ASTConverter method convert.
private ASTNode convert() throws CalciteSemanticException {
/*
* 1. Walk RelNode Graph; note from, where, gBy.. nodes.
*/
new QBVisitor().go(root);
/*
* 2. convert from node.
*/
QueryBlockInfo qb = convertSource(from);
schema = qb.schema;
hiveAST.from = ASTBuilder.construct(HiveParser.TOK_FROM, "TOK_FROM").add(qb.ast).node();
/*
* 3. convert filterNode
*/
if (where != null) {
ASTNode cond = where.getCondition().accept(new RexVisitor(schema, false, root.getCluster().getRexBuilder()));
hiveAST.where = ASTBuilder.where(cond);
planMapper.link(cond, where);
planMapper.link(cond, RelTreeSignature.of(where));
}
/*
* 4. GBy
*/
if (groupBy != null) {
ASTBuilder b;
boolean groupingSetsExpression = false;
Group aggregateType = groupBy.getGroupType();
switch(aggregateType) {
case SIMPLE:
b = ASTBuilder.construct(HiveParser.TOK_GROUPBY, "TOK_GROUPBY");
break;
case ROLLUP:
case CUBE:
case OTHER:
b = ASTBuilder.construct(HiveParser.TOK_GROUPING_SETS, "TOK_GROUPING_SETS");
groupingSetsExpression = true;
break;
default:
throw new CalciteSemanticException("Group type not recognized");
}
HiveAggregate hiveAgg = (HiveAggregate) groupBy;
if (hiveAgg.getAggregateColumnsOrder() != null) {
// Aggregation columns may have been sorted in specific order
for (int pos : hiveAgg.getAggregateColumnsOrder()) {
addRefToBuilder(b, groupBy.getGroupSet().nth(pos));
}
for (int pos = 0; pos < groupBy.getGroupCount(); pos++) {
if (!hiveAgg.getAggregateColumnsOrder().contains(pos)) {
addRefToBuilder(b, groupBy.getGroupSet().nth(pos));
}
}
} else {
// Aggregation columns have not been reordered
for (int i : groupBy.getGroupSet()) {
addRefToBuilder(b, i);
}
}
// Grouping sets expressions
if (groupingSetsExpression) {
for (ImmutableBitSet groupSet : groupBy.getGroupSets()) {
ASTBuilder expression = ASTBuilder.construct(HiveParser.TOK_GROUPING_SETS_EXPRESSION, "TOK_GROUPING_SETS_EXPRESSION");
for (int i : groupSet) {
addRefToBuilder(expression, i);
}
b.add(expression);
}
}
if (!groupBy.getGroupSet().isEmpty()) {
hiveAST.groupBy = b.node();
}
schema = new Schema(schema, groupBy);
}
/*
* 5. Having
*/
if (having != null) {
ASTNode cond = having.getCondition().accept(new RexVisitor(schema, false, root.getCluster().getRexBuilder()));
hiveAST.having = ASTBuilder.having(cond);
}
/*
* 6. Project
*/
ASTBuilder b = ASTBuilder.construct(HiveParser.TOK_SELECT, "TOK_SELECT");
if (select instanceof Project) {
List<RexNode> childExps = ((Project) select).getProjects();
if (childExps.isEmpty()) {
RexLiteral r = select.getCluster().getRexBuilder().makeExactLiteral(new BigDecimal(1));
ASTNode selectExpr = ASTBuilder.selectExpr(ASTBuilder.literal(r), "1");
b.add(selectExpr);
} else {
int i = 0;
for (RexNode r : childExps) {
ASTNode expr = r.accept(new RexVisitor(schema, r instanceof RexLiteral, select.getCluster().getRexBuilder()));
String alias = select.getRowType().getFieldNames().get(i++);
ASTNode selectExpr = ASTBuilder.selectExpr(expr, alias);
b.add(selectExpr);
}
}
hiveAST.select = b.node();
} else {
// select is UDTF
HiveTableFunctionScan udtf = (HiveTableFunctionScan) select;
List<ASTNode> children = new ArrayList<>();
RexCall call = (RexCall) udtf.getCall();
for (RexNode r : call.getOperands()) {
ASTNode expr = r.accept(new RexVisitor(schema, r instanceof RexLiteral, select.getCluster().getRexBuilder()));
children.add(expr);
}
ASTBuilder sel = ASTBuilder.construct(HiveParser.TOK_SELEXPR, "TOK_SELEXPR");
ASTNode function = buildUDTFAST(call.getOperator().getName(), children);
sel.add(function);
for (String alias : udtf.getRowType().getFieldNames()) {
sel.add(HiveParser.Identifier, alias);
}
b.add(sel);
hiveAST.select = b.node();
}
/*
* 7. Order Use in Order By from the block above. RelNode has no pointer to
* parent hence we need to go top down; but OB at each block really belong
* to its src/from. Hence the need to pass in sort for each block from
* its parent.
* 8. Limit
*/
convertOrderToASTNode(orderLimit);
return hiveAST.getAST();
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate in project hive by apache.
the class JDBCAggregationPushDownRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
LOG.debug("JDBCAggregationPushDownRule has been called");
final HiveAggregate aggregate = call.rel(0);
final HiveJdbcConverter converter = call.rel(1);
JdbcAggregate jdbcAggregate;
try {
jdbcAggregate = new JdbcAggregate(aggregate.getCluster(), aggregate.getTraitSet().replace(converter.getJdbcConvention()), converter.getInput(), aggregate.indicator, aggregate.getGroupSet(), aggregate.getGroupSets(), aggregate.getAggCallList());
} catch (InvalidRelException e) {
LOG.warn(e.toString());
return;
}
call.transformTo(converter.copy(converter.getTraitSet(), jdbcAggregate));
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate in project hive by apache.
the class HiveExpandDistinctAggregatesRule method createCount.
/**
* @param aggr: the original aggregate
* @param argList: the original argList in aggregate
* @param cleanArgList: the new argList without duplicates
* @param map: the mapping from the original argList to the new argList
* @param newGroupSet: the sorted positions of groupset
* @return
* @throws CalciteSemanticException
*/
private RelNode createCount(Aggregate aggr, List<List<Integer>> argList, List<List<Integer>> cleanArgList, Map<Integer, Integer> map, ImmutableBitSet originalGroupSet, ImmutableBitSet newGroupSet) throws CalciteSemanticException {
final List<RexNode> originalInputRefs = aggr.getRowType().getFieldList().stream().map(input -> new RexInputRef(input.getIndex(), input.getType())).collect(Collectors.toList());
final List<RexNode> gbChildProjLst = Lists.newArrayList();
// for non-singular args, count can include null, i.e. (,) is counted as 1
for (List<Integer> list : cleanArgList) {
RexNode condition = rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, originalInputRefs.get(originalInputRefs.size() - 1), rexBuilder.makeExactLiteral(new BigDecimal(getGroupingIdValue(list, originalGroupSet, newGroupSet, aggr.getGroupCount()))));
if (list.size() == 1) {
int pos = list.get(0);
RexNode notNull = rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, originalInputRefs.get(pos));
condition = rexBuilder.makeCall(SqlStdOperatorTable.AND, condition, notNull);
}
RexNode caseExpr1 = rexBuilder.makeExactLiteral(BigDecimal.ONE);
RexNode caseExpr2 = rexBuilder.makeNullLiteral(caseExpr1.getType());
RexNode when = rexBuilder.makeCall(SqlStdOperatorTable.CASE, condition, caseExpr1, caseExpr2);
gbChildProjLst.add(when);
}
for (int pos : originalGroupSet) {
gbChildProjLst.add(originalInputRefs.get(newGroupSet.indexOf(pos)));
}
// create the project before GB
RelNode gbInputRel = HiveProject.create(aggr, gbChildProjLst, null);
// create the aggregate
List<AggregateCall> aggregateCalls = Lists.newArrayList();
RelDataType aggFnRetType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory());
for (int i = 0; i < cleanArgList.size(); i++) {
AggregateCall aggregateCall = HiveCalciteUtil.createSingleArgAggCall("count", cluster, TypeInfoFactory.longTypeInfo, i, aggFnRetType);
aggregateCalls.add(aggregateCall);
}
ImmutableBitSet groupSet = ImmutableBitSet.range(cleanArgList.size(), cleanArgList.size() + originalGroupSet.cardinality());
Aggregate aggregate = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), gbInputRel, groupSet, null, aggregateCalls);
// count(distinct x, y), count(distinct y, x), we find the correct mapping.
if (map.isEmpty()) {
return aggregate;
} else {
final List<RexNode> originalAggrRefs = aggregate.getRowType().getFieldList().stream().map(input -> new RexInputRef(input.getIndex(), input.getType())).collect(Collectors.toList());
final List<RexNode> projLst = Lists.newArrayList();
int index = 0;
for (int i = 0; i < groupSet.cardinality(); i++) {
projLst.add(originalAggrRefs.get(index++));
}
for (int i = 0; i < argList.size(); i++) {
if (map.containsKey(i)) {
projLst.add(originalAggrRefs.get(map.get(i)));
} else {
projLst.add(originalAggrRefs.get(index++));
}
}
return HiveProject.create(aggregate, projLst, null);
}
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate in project hive by apache.
the class HiveExceptRewriteRule method onMatch.
// ~ Methods ----------------------------------------------------------------
public void onMatch(RelOptRuleCall call) {
final HiveExcept hiveExcept = call.rel(0);
final RelOptCluster cluster = hiveExcept.getCluster();
final RexBuilder rexBuilder = cluster.getRexBuilder();
Builder<RelNode> bldr = new ImmutableList.Builder<RelNode>();
// branch
try {
bldr.add(createFirstGB(hiveExcept.getInputs().get(0), true, cluster, rexBuilder));
bldr.add(createFirstGB(hiveExcept.getInputs().get(1), false, cluster, rexBuilder));
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
// create a union above all the branches
// the schema of union looks like this
// all keys + VCol + c
HiveRelNode union = new HiveUnion(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build());
// 2nd level GB: create a GB (all keys + sum(c) as a + sum(VCol*c) as b) for
// each branch
final List<RexNode> gbChildProjLst = Lists.newArrayList();
final List<Integer> groupSetPositions = Lists.newArrayList();
int unionColumnSize = union.getRowType().getFieldList().size();
for (int cInd = 0; cInd < unionColumnSize; cInd++) {
gbChildProjLst.add(rexBuilder.makeInputRef(union, cInd));
// the last 2 columns are VCol and c
if (cInd < unionColumnSize - 2) {
groupSetPositions.add(cInd);
}
}
try {
gbChildProjLst.add(multiply(rexBuilder.makeInputRef(union, unionColumnSize - 2), rexBuilder.makeInputRef(union, unionColumnSize - 1), cluster, rexBuilder));
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
RelNode gbInputRel = null;
try {
// Here we create a project for the following reasons:
// (1) GBy only accepts arg as a position of the input, however, we need to sum on VCol*c
// (2) This can better reuse the function createSingleArgAggCall.
gbInputRel = HiveProject.create(union, gbChildProjLst, null);
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
// gbInputRel's schema is like this
// all keys + VCol + c + VCol*c
List<AggregateCall> aggregateCalls = Lists.newArrayList();
RelDataType aggFnRetType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory());
// sum(c)
AggregateCall aggregateCall = HiveCalciteUtil.createSingleArgAggCall("sum", cluster, TypeInfoFactory.longTypeInfo, unionColumnSize - 1, aggFnRetType);
aggregateCalls.add(aggregateCall);
// sum(VCol*c)
aggregateCall = HiveCalciteUtil.createSingleArgAggCall("sum", cluster, TypeInfoFactory.longTypeInfo, unionColumnSize, aggFnRetType);
aggregateCalls.add(aggregateCall);
final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions);
HiveRelNode aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), gbInputRel, groupSet, null, aggregateCalls);
if (!hiveExcept.all) {
RelNode filterRel = null;
try {
filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), aggregateRel, makeFilterExprForExceptDistinct(aggregateRel, unionColumnSize, cluster, rexBuilder));
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
// finally add a project to project out the last 2 columns
Set<Integer> projectOutColumnPositions = new HashSet<>();
projectOutColumnPositions.add(filterRel.getRowType().getFieldList().size() - 2);
projectOutColumnPositions.add(filterRel.getRowType().getFieldList().size() - 1);
try {
call.transformTo(HiveCalciteUtil.createProjectWithoutColumn(filterRel, projectOutColumnPositions));
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
} else {
List<RexNode> originalInputRefs = Lists.transform(aggregateRel.getRowType().getFieldList(), new Function<RelDataTypeField, RexNode>() {
@Override
public RexNode apply(RelDataTypeField input) {
return new RexInputRef(input.getIndex(), input.getType());
}
});
List<RexNode> copyInputRefs = new ArrayList<>();
try {
copyInputRefs.add(makeExprForExceptAll(aggregateRel, unionColumnSize, cluster, rexBuilder));
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
for (int i = 0; i < originalInputRefs.size() - 2; i++) {
copyInputRefs.add(originalInputRefs.get(i));
}
RelNode srcRel = null;
try {
srcRel = HiveProject.create(aggregateRel, copyInputRefs, null);
HiveTableFunctionScan udtf = HiveCalciteUtil.createUDTFForSetOp(cluster, srcRel);
// finally add a project to project out the 1st columns
Set<Integer> projectOutColumnPositions = new HashSet<>();
projectOutColumnPositions.add(0);
call.transformTo(HiveCalciteUtil.createProjectWithoutColumn(udtf, projectOutColumnPositions));
} catch (SemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
}
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate in project hive by apache.
the class HiveExceptRewriteRule method createFirstGB.
private RelNode createFirstGB(RelNode input, boolean left, RelOptCluster cluster, RexBuilder rexBuilder) throws CalciteSemanticException {
final List<RexNode> gbChildProjLst = Lists.newArrayList();
final List<Integer> groupSetPositions = Lists.newArrayList();
for (int cInd = 0; cInd < input.getRowType().getFieldList().size(); cInd++) {
gbChildProjLst.add(rexBuilder.makeInputRef(input, cInd));
groupSetPositions.add(cInd);
}
if (left) {
gbChildProjLst.add(rexBuilder.makeBigintLiteral(new BigDecimal(2)));
} else {
gbChildProjLst.add(rexBuilder.makeBigintLiteral(new BigDecimal(1)));
}
// also add the last VCol
groupSetPositions.add(input.getRowType().getFieldList().size());
// create the project before GB
RelNode gbInputRel = HiveProject.create(input, gbChildProjLst, null);
// groupSetPosition includes all the positions
final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions);
List<AggregateCall> aggregateCalls = Lists.newArrayList();
RelDataType aggFnRetType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory());
AggregateCall aggregateCall = HiveCalciteUtil.createSingleArgAggCall("count", cluster, TypeInfoFactory.longTypeInfo, input.getRowType().getFieldList().size(), aggFnRetType);
aggregateCalls.add(aggregateCall);
return new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), gbInputRel, groupSet, null, aggregateCalls);
}
Aggregations