Search in sources :

Example 1 with AggInfo

use of org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.AggInfo in project flink by apache.

the class HiveParserCalcitePlanner method genGBLogicalPlan.

// Generate GB plan.
private RelNode genGBLogicalPlan(HiveParserQB qb, RelNode srcRel) throws SemanticException {
    RelNode gbRel = null;
    HiveParserQBParseInfo qbp = qb.getParseInfo();
    // 1. Gather GB Expressions (AST) (GB + Aggregations)
    // NOTE: Multi Insert is not supported
    String detsClauseName = qbp.getClauseNames().iterator().next();
    HiveParserASTNode selExprList = qb.getParseInfo().getSelForClause(detsClauseName);
    HiveParserSubQueryUtils.checkForTopLevelSubqueries(selExprList);
    if (selExprList.getToken().getType() == HiveASTParser.TOK_SELECTDI && selExprList.getChildCount() == 1 && selExprList.getChild(0).getChildCount() == 1) {
        HiveParserASTNode node = (HiveParserASTNode) selExprList.getChild(0).getChild(0);
        if (node.getToken().getType() == HiveASTParser.TOK_ALLCOLREF) {
            srcRel = genSelectLogicalPlan(qb, srcRel, srcRel, null, null);
            HiveParserRowResolver rr = relToRowResolver.get(srcRel);
            qbp.setSelExprForClause(detsClauseName, HiveParserUtils.genSelectDIAST(rr));
        }
    }
    // Select DISTINCT + windowing; GBy handled by genSelectForWindowing
    if (selExprList.getToken().getType() == HiveASTParser.TOK_SELECTDI && !qb.getAllWindowingSpecs().isEmpty()) {
        return null;
    }
    List<HiveParserASTNode> gbAstExprs = getGroupByForClause(qbp, detsClauseName);
    HashMap<String, HiveParserASTNode> aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName);
    boolean hasGrpByAstExprs = !gbAstExprs.isEmpty();
    boolean hasAggregationTrees = aggregationTrees != null && !aggregationTrees.isEmpty();
    final boolean cubeRollupGrpSetPresent = !qbp.getDestRollups().isEmpty() || !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty();
    // 2. Sanity check
    if (semanticAnalyzer.getConf().getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) && qbp.getDistinctFuncExprsForClause(detsClauseName).size() > 1) {
        throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS.getMsg());
    }
    if (hasGrpByAstExprs || hasAggregationTrees) {
        ArrayList<ExprNodeDesc> gbExprNodeDescs = new ArrayList<>();
        ArrayList<String> outputColNames = new ArrayList<>();
        // 3. Input, Output Row Resolvers
        HiveParserRowResolver inputRR = relToRowResolver.get(srcRel);
        HiveParserRowResolver outputRR = new HiveParserRowResolver();
        outputRR.setIsExprResolver(true);
        if (hasGrpByAstExprs) {
            // 4. Construct GB Keys (ExprNode)
            for (HiveParserASTNode gbAstExpr : gbAstExprs) {
                Map<HiveParserASTNode, ExprNodeDesc> astToExprNodeDesc = semanticAnalyzer.genAllExprNodeDesc(gbAstExpr, inputRR);
                ExprNodeDesc grpbyExprNDesc = astToExprNodeDesc.get(gbAstExpr);
                if (grpbyExprNDesc == null) {
                    throw new SemanticException("Invalid Column Reference: " + gbAstExpr.dump());
                }
                addToGBExpr(outputRR, inputRR, gbAstExpr, grpbyExprNDesc, gbExprNodeDescs, outputColNames);
            }
        }
        // 5. GroupingSets, Cube, Rollup
        int numGroupCols = gbExprNodeDescs.size();
        List<Integer> groupingSets = null;
        if (cubeRollupGrpSetPresent) {
            if (qbp.getDestRollups().contains(detsClauseName)) {
                groupingSets = getGroupingSetsForRollup(gbAstExprs.size());
            } else if (qbp.getDestCubes().contains(detsClauseName)) {
                groupingSets = getGroupingSetsForCube(gbAstExprs.size());
            } else if (qbp.getDestGroupingSets().contains(detsClauseName)) {
                groupingSets = getGroupingSets(gbAstExprs, qbp, detsClauseName);
            }
        }
        // 6. Construct aggregation function Info
        ArrayList<AggInfo> aggInfos = new ArrayList<>();
        if (hasAggregationTrees) {
            for (HiveParserASTNode value : aggregationTrees.values()) {
                // 6.1 Determine type of UDAF
                // This is the GenericUDAF name
                String aggName = unescapeIdentifier(value.getChild(0).getText());
                boolean isDistinct = value.getType() == HiveASTParser.TOK_FUNCTIONDI;
                boolean isAllColumns = value.getType() == HiveASTParser.TOK_FUNCTIONSTAR;
                // 6.2 Convert UDAF Params to ExprNodeDesc
                ArrayList<ExprNodeDesc> aggParameters = new ArrayList<>();
                for (int i = 1; i < value.getChildCount(); i++) {
                    HiveParserASTNode paraExpr = (HiveParserASTNode) value.getChild(i);
                    ExprNodeDesc paraExprNode = semanticAnalyzer.genExprNodeDesc(paraExpr, inputRR);
                    aggParameters.add(paraExprNode);
                }
                GenericUDAFEvaluator.Mode aggMode = HiveParserUtils.groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, isDistinct);
                GenericUDAFEvaluator genericUDAFEvaluator = HiveParserUtils.getGenericUDAFEvaluator(aggName, aggParameters, value, isDistinct, isAllColumns, frameworkConfig.getOperatorTable());
                assert (genericUDAFEvaluator != null);
                HiveParserBaseSemanticAnalyzer.GenericUDAFInfo udaf = HiveParserUtils.getGenericUDAFInfo(genericUDAFEvaluator, aggMode, aggParameters);
                String aggAlias = null;
                if (value.getParent().getType() == HiveASTParser.TOK_SELEXPR && value.getParent().getChildCount() == 2) {
                    aggAlias = unescapeIdentifier(value.getParent().getChild(1).getText().toLowerCase());
                }
                AggInfo aggInfo = new AggInfo(aggParameters, udaf.returnType, aggName, isDistinct, isAllColumns, aggAlias);
                aggInfos.add(aggInfo);
                String field = aggAlias == null ? getColumnInternalName(numGroupCols + aggInfos.size() - 1) : aggAlias;
                outputColNames.add(field);
                outputRR.putExpression(value, new ColumnInfo(field, aggInfo.getReturnType(), "", false));
            }
        }
        // grouping sets
        if (groupingSets != null && !groupingSets.isEmpty()) {
            String field = getColumnInternalName(numGroupCols + aggInfos.size());
            outputColNames.add(field);
            outputRR.put(null, VirtualColumn.GROUPINGID.getName(), new ColumnInfo(field, // flink grouping_id's return type is bigint
            TypeInfoFactory.longTypeInfo, null, true));
        }
        // 8. We create the group_by operator
        gbRel = genGBRelNode(gbExprNodeDescs, aggInfos, groupingSets, srcRel);
        relToHiveColNameCalcitePosMap.put(gbRel, buildHiveToCalciteColumnMap(outputRR));
        relToRowResolver.put(gbRel, outputRR);
    }
    return gbRel;
}
Also used : HiveParserASTNode(org.apache.flink.table.planner.delegation.hive.copy.HiveParserASTNode) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ArrayList(java.util.ArrayList) HiveParserBaseSemanticAnalyzer.getHiveAggInfo(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.getHiveAggInfo) AggInfo(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.AggInfo) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) HiveParserQBParseInfo(org.apache.flink.table.planner.delegation.hive.copy.HiveParserQBParseInfo) RelNode(org.apache.calcite.rel.RelNode) HiveParserRowResolver(org.apache.flink.table.planner.delegation.hive.copy.HiveParserRowResolver) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HiveParserBaseSemanticAnalyzer(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 2 with AggInfo

use of org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.AggInfo in project flink by apache.

the class HiveParserCalcitePlanner method genGBRelNode.

private RelNode genGBRelNode(List<ExprNodeDesc> gbExprs, List<AggInfo> aggInfos, List<Integer> groupSets, RelNode srcRel) throws SemanticException {
    Map<String, Integer> colNameToPos = relToHiveColNameCalcitePosMap.get(srcRel);
    HiveParserRexNodeConverter converter = new HiveParserRexNodeConverter(cluster, srcRel.getRowType(), colNameToPos, 0, false, funcConverter);
    final boolean hasGroupSets = groupSets != null && !groupSets.isEmpty();
    final List<RexNode> gbInputRexNodes = new ArrayList<>();
    final HashMap<String, Integer> inputRexNodeToIndex = new HashMap<>();
    final List<Integer> gbKeyIndices = new ArrayList<>();
    int inputIndex = 0;
    for (ExprNodeDesc key : gbExprs) {
        // also convert null literal here to support grouping by NULLs
        RexNode keyRex = convertNullLiteral(converter.convert(key)).accept(funcConverter);
        gbInputRexNodes.add(keyRex);
        gbKeyIndices.add(inputIndex);
        inputRexNodeToIndex.put(keyRex.toString(), inputIndex);
        inputIndex++;
    }
    final ImmutableBitSet groupSet = ImmutableBitSet.of(gbKeyIndices);
    // Grouping sets: we need to transform them into ImmutableBitSet objects for Calcite
    List<ImmutableBitSet> transformedGroupSets = null;
    if (hasGroupSets) {
        Set<ImmutableBitSet> set = new HashSet<>(groupSets.size());
        for (int val : groupSets) {
            set.add(convert(val, groupSet.cardinality()));
        }
        // Calcite expects the grouping sets sorted and without duplicates
        transformedGroupSets = new ArrayList<>(set);
        transformedGroupSets.sort(ImmutableBitSet.COMPARATOR);
    }
    // add Agg parameters to inputs
    for (AggInfo aggInfo : aggInfos) {
        for (ExprNodeDesc expr : aggInfo.getAggParams()) {
            RexNode paramRex = converter.convert(expr).accept(funcConverter);
            Integer argIndex = inputRexNodeToIndex.get(paramRex.toString());
            if (argIndex == null) {
                argIndex = gbInputRexNodes.size();
                inputRexNodeToIndex.put(paramRex.toString(), argIndex);
                gbInputRexNodes.add(paramRex);
            }
        }
    }
    // create the actual input before creating agg calls so that the calls can properly infer
    // return type
    RelNode gbInputRel = LogicalProject.create(srcRel, Collections.emptyList(), gbInputRexNodes, (List<String>) null);
    List<AggregateCall> aggregateCalls = new ArrayList<>();
    for (AggInfo aggInfo : aggInfos) {
        aggregateCalls.add(HiveParserUtils.toAggCall(aggInfo, converter, inputRexNodeToIndex, groupSet.cardinality(), gbInputRel, cluster, funcConverter));
    }
    // GROUPING__ID is a virtual col in Hive, so we use Flink's function
    if (hasGroupSets) {
        // Create GroupingID column
        AggregateCall aggCall = AggregateCall.create(SqlStdOperatorTable.GROUPING_ID, false, false, false, gbKeyIndices, -1, RelCollations.EMPTY, groupSet.cardinality(), gbInputRel, null, null);
        aggregateCalls.add(aggCall);
    }
    if (gbInputRexNodes.isEmpty()) {
        // This will happen for count(*), in such cases we arbitrarily pick
        // first element from srcRel
        gbInputRexNodes.add(cluster.getRexBuilder().makeInputRef(srcRel, 0));
    }
    return LogicalAggregate.create(gbInputRel, groupSet, transformedGroupSets, aggregateCalls);
}
Also used : ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HiveParserBaseSemanticAnalyzer.getHiveAggInfo(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.getHiveAggInfo) AggInfo(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.AggInfo) AggregateCall(org.apache.calcite.rel.core.AggregateCall) RelNode(org.apache.calcite.rel.RelNode) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RexNode(org.apache.calcite.rex.RexNode) HashSet(java.util.HashSet)

Example 3 with AggInfo

use of org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.AggInfo in project flink by apache.

the class HiveParserCalcitePlanner method getWindowRexAndType.

private Pair<RexNode, TypeInfo> getWindowRexAndType(HiveParserWindowingSpec.WindowExpressionSpec winExprSpec, RelNode srcRel) throws SemanticException {
    RexNode window;
    if (winExprSpec instanceof HiveParserWindowingSpec.WindowFunctionSpec) {
        HiveParserWindowingSpec.WindowFunctionSpec wFnSpec = (HiveParserWindowingSpec.WindowFunctionSpec) winExprSpec;
        HiveParserASTNode windowProjAst = wFnSpec.getExpression();
        // TODO: do we need to get to child?
        int wndSpecASTIndx = getWindowSpecIndx(windowProjAst);
        // 2. Get Hive Aggregate Info
        AggInfo hiveAggInfo = getHiveAggInfo(windowProjAst, wndSpecASTIndx - 1, relToRowResolver.get(srcRel), (HiveParserWindowingSpec.WindowFunctionSpec) winExprSpec, semanticAnalyzer, frameworkConfig, cluster);
        // 3. Get Calcite Return type for Agg Fn
        RelDataType calciteAggFnRetType = HiveParserUtils.toRelDataType(hiveAggInfo.getReturnType(), cluster.getTypeFactory());
        // 4. Convert Agg Fn args to Calcite
        Map<String, Integer> posMap = relToHiveColNameCalcitePosMap.get(srcRel);
        HiveParserRexNodeConverter converter = new HiveParserRexNodeConverter(cluster, srcRel.getRowType(), posMap, 0, false, funcConverter);
        List<RexNode> calciteAggFnArgs = new ArrayList<>();
        List<RelDataType> calciteAggFnArgTypes = new ArrayList<>();
        for (int i = 0; i < hiveAggInfo.getAggParams().size(); i++) {
            calciteAggFnArgs.add(converter.convert(hiveAggInfo.getAggParams().get(i)));
            calciteAggFnArgTypes.add(HiveParserUtils.toRelDataType(hiveAggInfo.getAggParams().get(i).getTypeInfo(), cluster.getTypeFactory()));
        }
        // 5. Get Calcite Agg Fn
        final SqlAggFunction calciteAggFn = HiveParserSqlFunctionConverter.getCalciteAggFn(hiveAggInfo.getUdfName(), hiveAggInfo.isDistinct(), calciteAggFnArgTypes, calciteAggFnRetType);
        // 6. Translate Window spec
        HiveParserRowResolver inputRR = relToRowResolver.get(srcRel);
        HiveParserWindowingSpec.WindowSpec wndSpec = ((HiveParserWindowingSpec.WindowFunctionSpec) winExprSpec).getWindowSpec();
        List<RexNode> partitionKeys = getPartitionKeys(wndSpec.getPartition(), converter, inputRR, new HiveParserTypeCheckCtx(inputRR, frameworkConfig, cluster), semanticAnalyzer);
        List<RexFieldCollation> orderKeys = getOrderKeys(wndSpec.getOrder(), converter, inputRR, new HiveParserTypeCheckCtx(inputRR, frameworkConfig, cluster), semanticAnalyzer);
        RexWindowBound lowerBound = getBound(wndSpec.getWindowFrame().getStart(), cluster);
        RexWindowBound upperBound = getBound(wndSpec.getWindowFrame().getEnd(), cluster);
        boolean isRows = wndSpec.getWindowFrame().getWindowType() == HiveParserWindowingSpec.WindowType.ROWS;
        window = HiveParserUtils.makeOver(cluster.getRexBuilder(), calciteAggFnRetType, calciteAggFn, calciteAggFnArgs, partitionKeys, orderKeys, lowerBound, upperBound, isRows, true, false, false, false);
        window = window.accept(funcConverter);
    } else {
        throw new SemanticException("Unsupported window Spec");
    }
    return new Pair<>(window, HiveParserTypeConverter.convert(window.getType()));
}
Also used : ArrayList(java.util.ArrayList) RelDataType(org.apache.calcite.rel.type.RelDataType) HiveParserRowResolver(org.apache.flink.table.planner.delegation.hive.copy.HiveParserRowResolver) RexWindowBound(org.apache.calcite.rex.RexWindowBound) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) Pair(org.apache.calcite.util.Pair) ObjectPair(org.apache.hadoop.hive.common.ObjectPair) HiveParserASTNode(org.apache.flink.table.planner.delegation.hive.copy.HiveParserASTNode) HiveParserBaseSemanticAnalyzer.getHiveAggInfo(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.getHiveAggInfo) AggInfo(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.AggInfo) HiveParserWindowingSpec(org.apache.flink.table.planner.delegation.hive.copy.HiveParserWindowingSpec) SqlAggFunction(org.apache.calcite.sql.SqlAggFunction) RexFieldCollation(org.apache.calcite.rex.RexFieldCollation) HiveParserTypeCheckCtx(org.apache.flink.table.planner.delegation.hive.copy.HiveParserTypeCheckCtx) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

ArrayList (java.util.ArrayList)3 AggInfo (org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.AggInfo)3 HiveParserBaseSemanticAnalyzer.getHiveAggInfo (org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.getHiveAggInfo)3 RelNode (org.apache.calcite.rel.RelNode)2 RexNode (org.apache.calcite.rex.RexNode)2 HiveParserASTNode (org.apache.flink.table.planner.delegation.hive.copy.HiveParserASTNode)2 HiveParserRowResolver (org.apache.flink.table.planner.delegation.hive.copy.HiveParserRowResolver)2 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)2 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)2 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 LinkedHashMap (java.util.LinkedHashMap)1 AggregateCall (org.apache.calcite.rel.core.AggregateCall)1 RelDataType (org.apache.calcite.rel.type.RelDataType)1 RexFieldCollation (org.apache.calcite.rex.RexFieldCollation)1 RexWindowBound (org.apache.calcite.rex.RexWindowBound)1 SqlAggFunction (org.apache.calcite.sql.SqlAggFunction)1 ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)1 Pair (org.apache.calcite.util.Pair)1 HiveParserBaseSemanticAnalyzer (org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer)1