Search in sources :

Example 16 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class TypeCheckProcFactory method processGByExpr.

/**
   * Function to do groupby subexpression elimination. This is called by all the
   * processors initially. As an example, consider the query select a+b,
   * count(1) from T group by a+b; Then a+b is already precomputed in the group
   * by operators key, so we substitute a+b in the select list with the internal
   * column name of the a+b expression that appears in the in input row
   * resolver.
   *
   * @param nd
   *          The node that is being inspected.
   * @param procCtx
   *          The processor context.
   *
   * @return exprNodeColumnDesc.
   */
public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) throws SemanticException {
    // We recursively create the exprNodeDesc. Base cases: when we encounter
    // a column ref, we convert that into an exprNodeColumnDesc; when we
    // encounter
    // a constant, we convert that into an exprNodeConstantDesc. For others we
    // just
    // build the exprNodeFuncDesc with recursively built children.
    ASTNode expr = (ASTNode) nd;
    TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
    //                                  having key in (select .. where a = min(b.value)
    if (!ctx.isUseCaching() && ctx.getOuterRR() == null) {
        return null;
    }
    RowResolver input = ctx.getInputRR();
    ExprNodeDesc desc = null;
    if ((ctx == null) || (input == null) || (!ctx.getAllowGBExprElimination())) {
        return null;
    }
    // If the current subExpression is pre-calculated, as in Group-By etc.
    ColumnInfo colInfo = input.getExpression(expr);
    // try outer row resolver
    RowResolver outerRR = ctx.getOuterRR();
    if (colInfo == null && outerRR != null) {
        colInfo = outerRR.getExpression(expr);
    }
    if (colInfo != null) {
        desc = new ExprNodeColumnDesc(colInfo);
        ASTNode source = input.getExpressionSource(expr);
        if (source != null) {
            ctx.getUnparseTranslator().addCopyTranslation(expr, source);
        }
        return desc;
    }
    return desc;
}
Also used : ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 17 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method genGroupByPlanGroupByOperator2MR.

/**
   * Generate the second GroupByOperator for the Group By Plan
   * (parseInfo.getXXX(dest)). The new GroupByOperator will do the second
   * aggregation based on the partial aggregation results.
   *
   * @param mode
   *          the mode of aggregation (FINAL)
   * @param genericUDAFEvaluators
   *          The mapping from Aggregation StringTree to the
   *          genericUDAFEvaluator.
   * @return the new GroupByOperator
   * @throws SemanticException
   */
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator2MR(QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo2, GroupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, boolean groupingSetsPresent) throws SemanticException {
    RowResolver groupByInputRowResolver2 = opParseCtx.get(reduceSinkOperatorInfo2).getRowResolver();
    RowResolver groupByOutputRowResolver2 = new RowResolver();
    groupByOutputRowResolver2.setIsExprResolver(true);
    ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    for (int i = 0; i < grpByExprs.size(); ++i) {
        ASTNode grpbyExpr = grpByExprs.get(i);
        ColumnInfo exprInfo = groupByInputRowResolver2.getExpression(grpbyExpr);
        if (exprInfo == null) {
            throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
        }
        String expression = exprInfo.getInternalName();
        groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), expression, exprInfo.getTabAlias(), exprInfo.getIsVirtualCol()));
        String field = getColumnInternalName(i);
        outputColumnNames.add(field);
        ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false);
        groupByOutputRowResolver2.putExpression(grpbyExpr, oColInfo);
        addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo2, groupByOutputRowResolver2);
        colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
    }
    int groupingSetsPosition = -1;
    // For grouping sets, add a dummy grouping key
    if (groupingSetsPresent) {
        groupingSetsPosition = groupByKeys.size();
        addGroupingSetKey(groupByKeys, groupByInputRowResolver2, groupByOutputRowResolver2, outputColumnNames, colExprMap);
    }
    HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
    boolean containsDistinctAggr = false;
    for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
        ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
        ASTNode value = entry.getValue();
        ColumnInfo paraExprInfo = groupByInputRowResolver2.getExpression(value);
        if (paraExprInfo == null) {
            throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value));
        }
        String paraExpression = paraExprInfo.getInternalName();
        assert (paraExpression != null);
        aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo.getIsVirtualCol()));
        String aggName = unescapeIdentifier(value.getChild(0).getText());
        boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
        containsDistinctAggr = containsDistinctAggr || isDistinct;
        boolean isStar = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
        Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
        GenericUDAFEvaluator genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey());
        assert (genericUDAFEvaluator != null);
        GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
        aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, (mode != GroupByDesc.Mode.FINAL && value.getToken().getType() == HiveParser.TOK_FUNCTIONDI), amode));
        String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
        outputColumnNames.add(field);
        groupByOutputRowResolver2.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, null, false, groupingSetsPosition, containsDistinctAggr), new RowSchema(groupByOutputRowResolver2.getColumnInfos()), reduceSinkOperatorInfo2), groupByOutputRowResolver2);
    op.setColumnExprMap(colExprMap);
    return op;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap)

Example 18 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class ExprNodeDescUtils method genExprNodeDesc.

/**
   * Build ExprNodeColumnDesc for the projections in the input operator from
   * sartpos to endpos(both included). Operator must have an associated
   * colExprMap.
   * 
   * @param inputOp
   *          Input Hive Operator
   * @param startPos
   *          starting position in the input operator schema; must be >=0 and <=
   *          endPos
   * @param endPos
   *          end position in the input operator schema; must be >=0.
   * @return List of ExprNodeDesc
   */
public static ArrayList<ExprNodeDesc> genExprNodeDesc(Operator inputOp, int startPos, int endPos, boolean addEmptyTabAlias, boolean setColToNonVirtual) {
    ArrayList<ExprNodeDesc> exprColLst = new ArrayList<ExprNodeDesc>();
    List<ColumnInfo> colInfoLst = inputOp.getSchema().getSignature();
    String tabAlias;
    boolean vc;
    ColumnInfo ci;
    for (int i = startPos; i <= endPos; i++) {
        ci = colInfoLst.get(i);
        tabAlias = ci.getTabAlias();
        if (addEmptyTabAlias) {
            tabAlias = "";
        }
        vc = ci.getIsVirtualCol();
        if (setColToNonVirtual) {
            vc = false;
        }
        exprColLst.add(new ExprNodeColumnDesc(ci.getType(), ci.getInternalName(), tabAlias, vc));
    }
    return exprColLst;
}
Also used : ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo)

Example 19 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class ColumnPrunerProcCtx method getColsFromSelectExpr.

/**
   * Creates the list of internal column names (represented by field nodes)
   * from select expressions in a select operator. This function is used for the
   * select operator instead of the genColLists function (which is used by
   * the rest of the operators).
   *
   * @param op The select operator.
   * @return a list of field nodes representing the internal column names.
   */
public List<FieldNode> getColsFromSelectExpr(SelectOperator op) {
    List<FieldNode> cols = new ArrayList<>();
    SelectDesc conf = op.getConf();
    if (conf.isSelStarNoCompute()) {
        for (ColumnInfo colInfo : op.getSchema().getSignature()) {
            cols.add(new FieldNode(colInfo.getInternalName()));
        }
    } else {
        List<ExprNodeDesc> exprList = conf.getColList();
        for (ExprNodeDesc expr : exprList) {
            cols = mergeFieldNodesWithDesc(cols, expr);
        }
    }
    return cols;
}
Also used : ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 20 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class ColumnPrunerProcCtx method handleFilterUnionChildren.

/**
   * If the input filter operator has direct child(ren) which are union operator,
   * and the filter's column is not the same as union's
   * create select operator between them. The select operator has same number of columns as
   * pruned child operator.
   *
   * @param curOp
   *          The filter operator which need to handle children.
   * @throws SemanticException
   */
public void handleFilterUnionChildren(Operator<? extends OperatorDesc> curOp) throws SemanticException {
    if (curOp.getChildOperators() == null || !(curOp instanceof FilterOperator)) {
        return;
    }
    List<FieldNode> parentPrunList = prunedColLists.get(curOp);
    if (parentPrunList == null || parentPrunList.size() == 0) {
        return;
    }
    List<FieldNode> prunList = null;
    for (Operator<? extends OperatorDesc> child : curOp.getChildOperators()) {
        if (child instanceof UnionOperator) {
            prunList = genColLists(child);
            if (prunList == null || prunList.size() == 0 || parentPrunList.size() == prunList.size()) {
                continue;
            }
            ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
            ArrayList<String> outputColNames = new ArrayList<String>();
            Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
            ArrayList<ColumnInfo> outputRS = new ArrayList<ColumnInfo>();
            for (ColumnInfo colInfo : child.getSchema().getSignature()) {
                if (lookupColumn(prunList, colInfo.getInternalName()) == null) {
                    continue;
                }
                ExprNodeDesc colDesc = new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), colInfo.getTabAlias(), colInfo.getIsVirtualCol());
                exprs.add(colDesc);
                outputColNames.add(colInfo.getInternalName());
                ColumnInfo newCol = new ColumnInfo(colInfo.getInternalName(), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
                newCol.setAlias(colInfo.getAlias());
                outputRS.add(newCol);
                colExprMap.put(colInfo.getInternalName(), colDesc);
            }
            SelectDesc select = new SelectDesc(exprs, outputColNames, false);
            curOp.removeChild(child);
            SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(outputRS), curOp);
            OperatorFactory.makeChild(sel, child);
            sel.setColumnExprMap(colExprMap);
        }
    }
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Aggregations

ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)117 ArrayList (java.util.ArrayList)75 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)69 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)56 HashMap (java.util.HashMap)46 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)43 LinkedHashMap (java.util.LinkedHashMap)35 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)34 Operator (org.apache.hadoop.hive.ql.exec.Operator)28 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)27 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)27 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)26 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)25 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)24 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)21 Map (java.util.Map)20 CalciteSemanticException (org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)20 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)19 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)19 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)18