Search in sources :

Example 76 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class HiveOpConverter method createColInfos.

private static Pair<ArrayList<ColumnInfo>, Set<Integer>> createColInfos(List<RexNode> calciteExprs, List<ExprNodeDesc> hiveExprs, List<String> projNames, OpAttr inpOpAf) {
    if (hiveExprs.size() != projNames.size()) {
        throw new RuntimeException("Column expressions list doesn't match Column Names list");
    }
    RexNode rexN;
    ExprNodeDesc pe;
    ArrayList<ColumnInfo> colInfos = new ArrayList<ColumnInfo>();
    boolean vc;
    Set<Integer> newVColSet = new HashSet<Integer>();
    for (int i = 0; i < hiveExprs.size(); i++) {
        pe = hiveExprs.get(i);
        rexN = calciteExprs.get(i);
        vc = false;
        if (rexN instanceof RexInputRef) {
            if (inpOpAf.vcolsInCalcite.contains(((RexInputRef) rexN).getIndex())) {
                newVColSet.add(i);
                vc = true;
            }
        }
        colInfos.add(new ColumnInfo(projNames.get(i), pe.getTypeInfo(), inpOpAf.tabAlias, vc));
    }
    return new Pair<ArrayList<ColumnInfo>, Set<Integer>>(colInfos, newVColSet);
}
Also used : ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) RexInputRef(org.apache.calcite.rex.RexInputRef) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RexNode(org.apache.calcite.rex.RexNode) HashSet(java.util.HashSet) Pair(org.apache.calcite.util.Pair)

Example 77 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class HiveOpConverter method visit.

OpAttr visit(HiveSortExchange exchangeRel) throws SemanticException {
    OpAttr inputOpAf = dispatch(exchangeRel.getInput());
    String tabAlias = inputOpAf.tabAlias;
    if (tabAlias == null || tabAlias.length() == 0) {
        tabAlias = getHiveDerivedTableAlias();
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + exchangeRel.getId() + ":" + exchangeRel.getRelTypeName() + " with row type: [" + exchangeRel.getRowType() + "]");
    }
    RelDistribution distribution = exchangeRel.getDistribution();
    if (distribution.getType() != Type.HASH_DISTRIBUTED) {
        throw new SemanticException("Only hash distribution supported for LogicalExchange");
    }
    ExprNodeDesc[] expressions = new ExprNodeDesc[exchangeRel.getJoinKeys().size()];
    for (int index = 0; index < exchangeRel.getJoinKeys().size(); index++) {
        expressions[index] = convertToExprNode(exchangeRel.getJoinKeys().get(index), exchangeRel.getInput(), inputOpAf.tabAlias, inputOpAf);
    }
    exchangeRel.setJoinExpressions(expressions);
    ReduceSinkOperator rsOp = genReduceSink(inputOpAf.inputs.get(0), tabAlias, expressions, -1, -1, Operation.NOT_ACID, hiveConf);
    return new OpAttr(tabAlias, inputOpAf.vcolsInCalcite, rsOp);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RelDistribution(org.apache.calcite.rel.RelDistribution) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 78 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class RewriteQueryUsingAggregateIndexCtx method replaceGroupByOperatorProcess.

/**
   * We need to replace the count(indexed_column_key) GenericUDAF aggregation
   * function for group-by construct to "sum" GenericUDAF. This method creates a
   * new operator tree for a sample query that creates a GroupByOperator with
   * sum aggregation function and uses that GroupByOperator information to
   * replace the original GroupByOperator aggregation information. It replaces
   * the AggregationDesc (aggregation descriptor) of the old GroupByOperator
   * with the new Aggregation Desc of the new GroupByOperator.
   * @return
   */
private void replaceGroupByOperatorProcess(GroupByOperator operator, int index) throws SemanticException {
    RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this;
    // We need to replace the GroupByOperator which is before RS
    if (index == 0) {
        // the query contains the sum aggregation GenericUDAF
        String selReplacementCommand = "select sum(`" + rewriteQueryCtx.getAggregateFunction() + "`)" + " from `" + rewriteQueryCtx.getIndexName() + "` group by " + rewriteQueryCtx.getIndexKey() + " ";
        // retrieve the operator tree for the query, and the required GroupByOperator from it
        Operator<?> newOperatorTree = RewriteParseContextGenerator.generateOperatorTree(rewriteQueryCtx.getParseContext().getQueryState(), selReplacementCommand);
        // we get our new GroupByOperator here
        GroupByOperator newGbyOperator = OperatorUtils.findLastOperatorUpstream(newOperatorTree, GroupByOperator.class);
        if (newGbyOperator == null) {
            throw new SemanticException("Error replacing GroupBy operator.");
        }
        // we need this information to set the correct colList, outputColumnNames
        // in SelectOperator
        ExprNodeColumnDesc aggrExprNode = null;
        // Construct the new AggregationDesc to get rid of the current
        // internal names and replace them with new internal names
        // as required by the operator tree
        GroupByDesc newConf = newGbyOperator.getConf();
        List<AggregationDesc> newAggrList = newConf.getAggregators();
        if (newAggrList != null && newAggrList.size() > 0) {
            for (AggregationDesc aggregationDesc : newAggrList) {
                rewriteQueryCtx.setEval(aggregationDesc.getGenericUDAFEvaluator());
                aggrExprNode = (ExprNodeColumnDesc) aggregationDesc.getParameters().get(0);
                rewriteQueryCtx.setAggrExprNode(aggrExprNode);
            }
        }
        // Now the GroupByOperator has the new AggregationList;
        // sum(`_count_of_indexed_key`)
        // instead of count(indexed_key)
        GroupByDesc oldConf = operator.getConf();
        oldConf.setAggregators((ArrayList<AggregationDesc>) newAggrList);
        operator.setConf(oldConf);
    } else {
        // we just need to reset the GenericUDAFEvaluator and its name for this
        // GroupByOperator whose parent is the ReduceSinkOperator
        GroupByDesc childConf = operator.getConf();
        List<AggregationDesc> childAggrList = childConf.getAggregators();
        if (childAggrList != null && childAggrList.size() > 0) {
            for (AggregationDesc aggregationDesc : childAggrList) {
                List<ExprNodeDesc> paraList = aggregationDesc.getParameters();
                List<ObjectInspector> parametersOIList = new ArrayList<ObjectInspector>();
                for (ExprNodeDesc expr : paraList) {
                    parametersOIList.add(expr.getWritableObjectInspector());
                }
                GenericUDAFEvaluator evaluator = FunctionRegistry.getGenericUDAFEvaluator("sum", parametersOIList, false, false);
                aggregationDesc.setGenericUDAFEvaluator(evaluator);
                aggregationDesc.setGenericUDAFName("sum");
            }
        }
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ArrayList(java.util.ArrayList) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 79 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class HiveGBOpConvUtil method genMapSideGB.

@SuppressWarnings("unchecked")
private static OpAttr genMapSideGB(OpAttr inputOpAf, GBInfo gbAttrs) throws SemanticException {
    ArrayList<String> outputColNames = new ArrayList<String>();
    ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    Set<String> gbKeyColsAsNamesFrmIn = new HashSet<String>();
    String colOutputName = null;
    // 1. Build GB Keys, grouping set starting position
    // 1.1 First Add original GB Keys
    ArrayList<ExprNodeDesc> gbKeys = new ArrayList<ExprNodeDesc>();
    for (int i = 0; i < gbAttrs.gbKeys.size(); i++) {
        gbKeys.add(gbAttrs.gbKeys.get(i));
        colOutputName = SemanticAnalyzer.getColumnInternalName(i);
        colInfoLst.add(new ColumnInfo(colOutputName, gbAttrs.gbKeyTypes.get(i), "", false));
        outputColNames.add(colOutputName);
        gbKeyColsAsNamesFrmIn.add(gbAttrs.gbKeyColNamesInInput.get(i));
        colExprMap.put(colOutputName, gbKeys.get(i));
    }
    // 1.2. Adjust GroupingSet Position, GBKeys for GroupingSet Position if
    // needed. NOTE: GroupingID is added to map side GB only if we don't GrpSet
    // doesn't require additional MR Jobs
    int groupingSetsPosition = -1;
    boolean inclGrpID = inclGrpSetInMapSide(gbAttrs);
    if (inclGrpID) {
        groupingSetsPosition = gbKeys.size();
        addGrpSetCol(true, null, false, gbKeys, outputColNames, colInfoLst, colExprMap);
    }
    // gen would have prevented it)
    for (int i = 0; i < gbAttrs.distExprNodes.size(); i++) {
        if (!gbKeyColsAsNamesFrmIn.contains(gbAttrs.distExprNames.get(i))) {
            gbKeys.add(gbAttrs.distExprNodes.get(i));
            colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() - 1);
            colInfoLst.add(new ColumnInfo(colOutputName, gbAttrs.distExprTypes.get(i), "", false));
            outputColNames.add(colOutputName);
            gbKeyColsAsNamesFrmIn.add(gbAttrs.distExprNames.get(i));
            colExprMap.put(colOutputName, gbKeys.get(gbKeys.size() - 1));
        }
    }
    // 2. Build Aggregations
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    for (UDAFAttrs udafAttr : gbAttrs.udafAttrs) {
        Mode amode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.HASH, udafAttr.isDistinctUDAF);
        aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udafAttr.udafEvaluator, udafAttr.udafParams, udafAttr.isDistinctUDAF, amode));
        GenericUDAFInfo udafInfo;
        try {
            udafInfo = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, amode, udafAttr.udafParams);
        } catch (SemanticException e) {
            throw new RuntimeException(e);
        }
        colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() - 1);
        colInfoLst.add(new ColumnInfo(colOutputName, udafInfo.returnType, "", false));
        outputColNames.add(colOutputName);
    }
    // 3. Create GB
    @SuppressWarnings("rawtypes") Operator gbOp = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.HASH, outputColNames, gbKeys, aggregations, false, gbAttrs.groupByMemoryUsage, gbAttrs.memoryThreshold, gbAttrs.grpSets, inclGrpID, groupingSetsPosition, gbAttrs.containsDistinctAggr), new RowSchema(colInfoLst), inputOpAf.inputs.get(0));
    // 5. Setup Expr Col Map
    // NOTE: UDAF is not included in ExprColMap
    gbOp.setColumnExprMap(colExprMap);
    return new OpAttr("", new HashSet<Integer>(), gbOp);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) HashMap(java.util.HashMap) GenericUDAFInfo(org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) HashSet(java.util.HashSet) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc)

Example 80 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class HiveGBOpConvUtil method getReduceKeysForRS.

/**
   * Get Reduce Keys for RS following MapSide GB
   *
   * @param reduceKeys
   *          assumed to be deduped list of exprs
   * @param outputKeyColumnNames
   * @param colExprMap
   * @return List of ExprNodeDesc of ReduceKeys
   * @throws SemanticException
   */
private static ArrayList<ExprNodeDesc> getReduceKeysForRS(Operator inOp, int startPos, int endPos, List<String> outputKeyColumnNames, boolean addOnlyOneKeyColName, ArrayList<ColumnInfo> colInfoLst, Map<String, ExprNodeDesc> colExprMap, boolean addEmptyTabAlias, boolean setColToNonVirtual) throws SemanticException {
    ArrayList<ExprNodeDesc> reduceKeys = null;
    if (endPos < 0) {
        reduceKeys = new ArrayList<ExprNodeDesc>();
    } else {
        reduceKeys = ExprNodeDescUtils.genExprNodeDesc(inOp, startPos, endPos, addEmptyTabAlias, setColToNonVirtual);
        int outColNameIndx = startPos;
        for (int i = 0; i < reduceKeys.size(); ++i) {
            String outputColName = SemanticAnalyzer.getColumnInternalName(outColNameIndx);
            outColNameIndx++;
            if (!addOnlyOneKeyColName || i == 0) {
                outputKeyColumnNames.add(outputColName);
            }
            // TODO: Verify if this is needed (Why can't it be always null/empty
            String tabAlias = addEmptyTabAlias ? "" : null;
            ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + outputColName, reduceKeys.get(i).getTypeInfo(), tabAlias, false);
            colInfoLst.add(colInfo);
            colExprMap.put(colInfo.getInternalName(), reduceKeys.get(i));
        }
    }
    return reduceKeys;
}
Also used : ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)321 ArrayList (java.util.ArrayList)179 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)146 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)110 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)101 Test (org.junit.Test)74 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)69 HashMap (java.util.HashMap)67 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)57 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)47 LinkedHashMap (java.util.LinkedHashMap)43 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)42 List (java.util.List)40 Operator (org.apache.hadoop.hive.ql.exec.Operator)39 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)35 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)34 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)34 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)34 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)33 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)32