use of org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.AggInfo in project flink by apache.
the class HiveParserCalcitePlanner method genGBLogicalPlan.
// Generate GB plan.
private RelNode genGBLogicalPlan(HiveParserQB qb, RelNode srcRel) throws SemanticException {
RelNode gbRel = null;
HiveParserQBParseInfo qbp = qb.getParseInfo();
// 1. Gather GB Expressions (AST) (GB + Aggregations)
// NOTE: Multi Insert is not supported
String detsClauseName = qbp.getClauseNames().iterator().next();
HiveParserASTNode selExprList = qb.getParseInfo().getSelForClause(detsClauseName);
HiveParserSubQueryUtils.checkForTopLevelSubqueries(selExprList);
if (selExprList.getToken().getType() == HiveASTParser.TOK_SELECTDI && selExprList.getChildCount() == 1 && selExprList.getChild(0).getChildCount() == 1) {
HiveParserASTNode node = (HiveParserASTNode) selExprList.getChild(0).getChild(0);
if (node.getToken().getType() == HiveASTParser.TOK_ALLCOLREF) {
srcRel = genSelectLogicalPlan(qb, srcRel, srcRel, null, null);
HiveParserRowResolver rr = relToRowResolver.get(srcRel);
qbp.setSelExprForClause(detsClauseName, HiveParserUtils.genSelectDIAST(rr));
}
}
// Select DISTINCT + windowing; GBy handled by genSelectForWindowing
if (selExprList.getToken().getType() == HiveASTParser.TOK_SELECTDI && !qb.getAllWindowingSpecs().isEmpty()) {
return null;
}
List<HiveParserASTNode> gbAstExprs = getGroupByForClause(qbp, detsClauseName);
HashMap<String, HiveParserASTNode> aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName);
boolean hasGrpByAstExprs = !gbAstExprs.isEmpty();
boolean hasAggregationTrees = aggregationTrees != null && !aggregationTrees.isEmpty();
final boolean cubeRollupGrpSetPresent = !qbp.getDestRollups().isEmpty() || !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty();
// 2. Sanity check
if (semanticAnalyzer.getConf().getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) && qbp.getDistinctFuncExprsForClause(detsClauseName).size() > 1) {
throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS.getMsg());
}
if (hasGrpByAstExprs || hasAggregationTrees) {
ArrayList<ExprNodeDesc> gbExprNodeDescs = new ArrayList<>();
ArrayList<String> outputColNames = new ArrayList<>();
// 3. Input, Output Row Resolvers
HiveParserRowResolver inputRR = relToRowResolver.get(srcRel);
HiveParserRowResolver outputRR = new HiveParserRowResolver();
outputRR.setIsExprResolver(true);
if (hasGrpByAstExprs) {
// 4. Construct GB Keys (ExprNode)
for (HiveParserASTNode gbAstExpr : gbAstExprs) {
Map<HiveParserASTNode, ExprNodeDesc> astToExprNodeDesc = semanticAnalyzer.genAllExprNodeDesc(gbAstExpr, inputRR);
ExprNodeDesc grpbyExprNDesc = astToExprNodeDesc.get(gbAstExpr);
if (grpbyExprNDesc == null) {
throw new SemanticException("Invalid Column Reference: " + gbAstExpr.dump());
}
addToGBExpr(outputRR, inputRR, gbAstExpr, grpbyExprNDesc, gbExprNodeDescs, outputColNames);
}
}
// 5. GroupingSets, Cube, Rollup
int numGroupCols = gbExprNodeDescs.size();
List<Integer> groupingSets = null;
if (cubeRollupGrpSetPresent) {
if (qbp.getDestRollups().contains(detsClauseName)) {
groupingSets = getGroupingSetsForRollup(gbAstExprs.size());
} else if (qbp.getDestCubes().contains(detsClauseName)) {
groupingSets = getGroupingSetsForCube(gbAstExprs.size());
} else if (qbp.getDestGroupingSets().contains(detsClauseName)) {
groupingSets = getGroupingSets(gbAstExprs, qbp, detsClauseName);
}
}
// 6. Construct aggregation function Info
ArrayList<AggInfo> aggInfos = new ArrayList<>();
if (hasAggregationTrees) {
for (HiveParserASTNode value : aggregationTrees.values()) {
// 6.1 Determine type of UDAF
// This is the GenericUDAF name
String aggName = unescapeIdentifier(value.getChild(0).getText());
boolean isDistinct = value.getType() == HiveASTParser.TOK_FUNCTIONDI;
boolean isAllColumns = value.getType() == HiveASTParser.TOK_FUNCTIONSTAR;
// 6.2 Convert UDAF Params to ExprNodeDesc
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<>();
for (int i = 1; i < value.getChildCount(); i++) {
HiveParserASTNode paraExpr = (HiveParserASTNode) value.getChild(i);
ExprNodeDesc paraExprNode = semanticAnalyzer.genExprNodeDesc(paraExpr, inputRR);
aggParameters.add(paraExprNode);
}
GenericUDAFEvaluator.Mode aggMode = HiveParserUtils.groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = HiveParserUtils.getGenericUDAFEvaluator(aggName, aggParameters, value, isDistinct, isAllColumns, frameworkConfig.getOperatorTable());
assert (genericUDAFEvaluator != null);
HiveParserBaseSemanticAnalyzer.GenericUDAFInfo udaf = HiveParserUtils.getGenericUDAFInfo(genericUDAFEvaluator, aggMode, aggParameters);
String aggAlias = null;
if (value.getParent().getType() == HiveASTParser.TOK_SELEXPR && value.getParent().getChildCount() == 2) {
aggAlias = unescapeIdentifier(value.getParent().getChild(1).getText().toLowerCase());
}
AggInfo aggInfo = new AggInfo(aggParameters, udaf.returnType, aggName, isDistinct, isAllColumns, aggAlias);
aggInfos.add(aggInfo);
String field = aggAlias == null ? getColumnInternalName(numGroupCols + aggInfos.size() - 1) : aggAlias;
outputColNames.add(field);
outputRR.putExpression(value, new ColumnInfo(field, aggInfo.getReturnType(), "", false));
}
}
// grouping sets
if (groupingSets != null && !groupingSets.isEmpty()) {
String field = getColumnInternalName(numGroupCols + aggInfos.size());
outputColNames.add(field);
outputRR.put(null, VirtualColumn.GROUPINGID.getName(), new ColumnInfo(field, // flink grouping_id's return type is bigint
TypeInfoFactory.longTypeInfo, null, true));
}
// 8. We create the group_by operator
gbRel = genGBRelNode(gbExprNodeDescs, aggInfos, groupingSets, srcRel);
relToHiveColNameCalcitePosMap.put(gbRel, buildHiveToCalciteColumnMap(outputRR));
relToRowResolver.put(gbRel, outputRR);
}
return gbRel;
}
use of org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.AggInfo in project flink by apache.
the class HiveParserCalcitePlanner method genGBRelNode.
private RelNode genGBRelNode(List<ExprNodeDesc> gbExprs, List<AggInfo> aggInfos, List<Integer> groupSets, RelNode srcRel) throws SemanticException {
Map<String, Integer> colNameToPos = relToHiveColNameCalcitePosMap.get(srcRel);
HiveParserRexNodeConverter converter = new HiveParserRexNodeConverter(cluster, srcRel.getRowType(), colNameToPos, 0, false, funcConverter);
final boolean hasGroupSets = groupSets != null && !groupSets.isEmpty();
final List<RexNode> gbInputRexNodes = new ArrayList<>();
final HashMap<String, Integer> inputRexNodeToIndex = new HashMap<>();
final List<Integer> gbKeyIndices = new ArrayList<>();
int inputIndex = 0;
for (ExprNodeDesc key : gbExprs) {
// also convert null literal here to support grouping by NULLs
RexNode keyRex = convertNullLiteral(converter.convert(key)).accept(funcConverter);
gbInputRexNodes.add(keyRex);
gbKeyIndices.add(inputIndex);
inputRexNodeToIndex.put(keyRex.toString(), inputIndex);
inputIndex++;
}
final ImmutableBitSet groupSet = ImmutableBitSet.of(gbKeyIndices);
// Grouping sets: we need to transform them into ImmutableBitSet objects for Calcite
List<ImmutableBitSet> transformedGroupSets = null;
if (hasGroupSets) {
Set<ImmutableBitSet> set = new HashSet<>(groupSets.size());
for (int val : groupSets) {
set.add(convert(val, groupSet.cardinality()));
}
// Calcite expects the grouping sets sorted and without duplicates
transformedGroupSets = new ArrayList<>(set);
transformedGroupSets.sort(ImmutableBitSet.COMPARATOR);
}
// add Agg parameters to inputs
for (AggInfo aggInfo : aggInfos) {
for (ExprNodeDesc expr : aggInfo.getAggParams()) {
RexNode paramRex = converter.convert(expr).accept(funcConverter);
Integer argIndex = inputRexNodeToIndex.get(paramRex.toString());
if (argIndex == null) {
argIndex = gbInputRexNodes.size();
inputRexNodeToIndex.put(paramRex.toString(), argIndex);
gbInputRexNodes.add(paramRex);
}
}
}
// create the actual input before creating agg calls so that the calls can properly infer
// return type
RelNode gbInputRel = LogicalProject.create(srcRel, Collections.emptyList(), gbInputRexNodes, (List<String>) null);
List<AggregateCall> aggregateCalls = new ArrayList<>();
for (AggInfo aggInfo : aggInfos) {
aggregateCalls.add(HiveParserUtils.toAggCall(aggInfo, converter, inputRexNodeToIndex, groupSet.cardinality(), gbInputRel, cluster, funcConverter));
}
// GROUPING__ID is a virtual col in Hive, so we use Flink's function
if (hasGroupSets) {
// Create GroupingID column
AggregateCall aggCall = AggregateCall.create(SqlStdOperatorTable.GROUPING_ID, false, false, false, gbKeyIndices, -1, RelCollations.EMPTY, groupSet.cardinality(), gbInputRel, null, null);
aggregateCalls.add(aggCall);
}
if (gbInputRexNodes.isEmpty()) {
// This will happen for count(*), in such cases we arbitrarily pick
// first element from srcRel
gbInputRexNodes.add(cluster.getRexBuilder().makeInputRef(srcRel, 0));
}
return LogicalAggregate.create(gbInputRel, groupSet, transformedGroupSets, aggregateCalls);
}
use of org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.AggInfo in project flink by apache.
the class HiveParserCalcitePlanner method getWindowRexAndType.
private Pair<RexNode, TypeInfo> getWindowRexAndType(HiveParserWindowingSpec.WindowExpressionSpec winExprSpec, RelNode srcRel) throws SemanticException {
RexNode window;
if (winExprSpec instanceof HiveParserWindowingSpec.WindowFunctionSpec) {
HiveParserWindowingSpec.WindowFunctionSpec wFnSpec = (HiveParserWindowingSpec.WindowFunctionSpec) winExprSpec;
HiveParserASTNode windowProjAst = wFnSpec.getExpression();
// TODO: do we need to get to child?
int wndSpecASTIndx = getWindowSpecIndx(windowProjAst);
// 2. Get Hive Aggregate Info
AggInfo hiveAggInfo = getHiveAggInfo(windowProjAst, wndSpecASTIndx - 1, relToRowResolver.get(srcRel), (HiveParserWindowingSpec.WindowFunctionSpec) winExprSpec, semanticAnalyzer, frameworkConfig, cluster);
// 3. Get Calcite Return type for Agg Fn
RelDataType calciteAggFnRetType = HiveParserUtils.toRelDataType(hiveAggInfo.getReturnType(), cluster.getTypeFactory());
// 4. Convert Agg Fn args to Calcite
Map<String, Integer> posMap = relToHiveColNameCalcitePosMap.get(srcRel);
HiveParserRexNodeConverter converter = new HiveParserRexNodeConverter(cluster, srcRel.getRowType(), posMap, 0, false, funcConverter);
List<RexNode> calciteAggFnArgs = new ArrayList<>();
List<RelDataType> calciteAggFnArgTypes = new ArrayList<>();
for (int i = 0; i < hiveAggInfo.getAggParams().size(); i++) {
calciteAggFnArgs.add(converter.convert(hiveAggInfo.getAggParams().get(i)));
calciteAggFnArgTypes.add(HiveParserUtils.toRelDataType(hiveAggInfo.getAggParams().get(i).getTypeInfo(), cluster.getTypeFactory()));
}
// 5. Get Calcite Agg Fn
final SqlAggFunction calciteAggFn = HiveParserSqlFunctionConverter.getCalciteAggFn(hiveAggInfo.getUdfName(), hiveAggInfo.isDistinct(), calciteAggFnArgTypes, calciteAggFnRetType);
// 6. Translate Window spec
HiveParserRowResolver inputRR = relToRowResolver.get(srcRel);
HiveParserWindowingSpec.WindowSpec wndSpec = ((HiveParserWindowingSpec.WindowFunctionSpec) winExprSpec).getWindowSpec();
List<RexNode> partitionKeys = getPartitionKeys(wndSpec.getPartition(), converter, inputRR, new HiveParserTypeCheckCtx(inputRR, frameworkConfig, cluster), semanticAnalyzer);
List<RexFieldCollation> orderKeys = getOrderKeys(wndSpec.getOrder(), converter, inputRR, new HiveParserTypeCheckCtx(inputRR, frameworkConfig, cluster), semanticAnalyzer);
RexWindowBound lowerBound = getBound(wndSpec.getWindowFrame().getStart(), cluster);
RexWindowBound upperBound = getBound(wndSpec.getWindowFrame().getEnd(), cluster);
boolean isRows = wndSpec.getWindowFrame().getWindowType() == HiveParserWindowingSpec.WindowType.ROWS;
window = HiveParserUtils.makeOver(cluster.getRexBuilder(), calciteAggFnRetType, calciteAggFn, calciteAggFnArgs, partitionKeys, orderKeys, lowerBound, upperBound, isRows, true, false, false, false);
window = window.accept(funcConverter);
} else {
throw new SemanticException("Unsupported window Spec");
}
return new Pair<>(window, HiveParserTypeConverter.convert(window.getType()));
}
Aggregations