Search in sources :

Example 11 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class HiveGBOpConvUtil method genMapSideGB.

@SuppressWarnings("unchecked")
private static OpAttr genMapSideGB(OpAttr inputOpAf, GBInfo gbAttrs) throws SemanticException {
    ArrayList<String> outputColNames = new ArrayList<String>();
    ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    Set<String> gbKeyColsAsNamesFrmIn = new HashSet<String>();
    String colOutputName = null;
    // 1. Build GB Keys, grouping set starting position
    // 1.1 First Add original GB Keys
    ArrayList<ExprNodeDesc> gbKeys = new ArrayList<ExprNodeDesc>();
    for (int i = 0; i < gbAttrs.gbKeys.size(); i++) {
        gbKeys.add(gbAttrs.gbKeys.get(i));
        colOutputName = SemanticAnalyzer.getColumnInternalName(i);
        colInfoLst.add(new ColumnInfo(colOutputName, gbAttrs.gbKeyTypes.get(i), "", false));
        outputColNames.add(colOutputName);
        gbKeyColsAsNamesFrmIn.add(gbAttrs.gbKeyColNamesInInput.get(i));
        colExprMap.put(colOutputName, gbKeys.get(i));
    }
    // 1.2. Adjust GroupingSet Position, GBKeys for GroupingSet Position if
    // needed. NOTE: GroupingID is added to map side GB only if we don't GrpSet
    // doesn't require additional MR Jobs
    int groupingSetsPosition = -1;
    boolean inclGrpID = inclGrpSetInMapSide(gbAttrs);
    if (inclGrpID) {
        groupingSetsPosition = gbKeys.size();
        addGrpSetCol(true, null, false, gbKeys, outputColNames, colInfoLst, colExprMap);
    }
    // gen would have prevented it)
    for (int i = 0; i < gbAttrs.distExprNodes.size(); i++) {
        if (!gbKeyColsAsNamesFrmIn.contains(gbAttrs.distExprNames.get(i))) {
            gbKeys.add(gbAttrs.distExprNodes.get(i));
            colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() - 1);
            colInfoLst.add(new ColumnInfo(colOutputName, gbAttrs.distExprTypes.get(i), "", false));
            outputColNames.add(colOutputName);
            gbKeyColsAsNamesFrmIn.add(gbAttrs.distExprNames.get(i));
            colExprMap.put(colOutputName, gbKeys.get(gbKeys.size() - 1));
        }
    }
    // 2. Build Aggregations
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    for (UDAFAttrs udafAttr : gbAttrs.udafAttrs) {
        Mode amode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.HASH, udafAttr.isDistinctUDAF);
        aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udafAttr.udafEvaluator, udafAttr.udafParams, udafAttr.isDistinctUDAF, amode));
        GenericUDAFInfo udafInfo;
        try {
            udafInfo = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, amode, udafAttr.udafParams);
        } catch (SemanticException e) {
            throw new RuntimeException(e);
        }
        colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() - 1);
        colInfoLst.add(new ColumnInfo(colOutputName, udafInfo.returnType, "", false));
        outputColNames.add(colOutputName);
    }
    // 3. Create GB
    @SuppressWarnings("rawtypes") Operator gbOp = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.HASH, outputColNames, gbKeys, aggregations, false, gbAttrs.groupByMemoryUsage, gbAttrs.memoryThreshold, gbAttrs.grpSets, inclGrpID, groupingSetsPosition, gbAttrs.containsDistinctAggr), new RowSchema(colInfoLst), inputOpAf.inputs.get(0));
    // 5. Setup Expr Col Map
    // NOTE: UDAF is not included in ExprColMap
    gbOp.setColumnExprMap(colExprMap);
    return new OpAttr("", new HashSet<Integer>(), gbOp);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) HashMap(java.util.HashMap) GenericUDAFInfo(org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) HashSet(java.util.HashSet) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc)

Example 12 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class HiveGBOpConvUtil method getReduceKeysForRS.

/**
   * Get Reduce Keys for RS following MapSide GB
   *
   * @param reduceKeys
   *          assumed to be deduped list of exprs
   * @param outputKeyColumnNames
   * @param colExprMap
   * @return List of ExprNodeDesc of ReduceKeys
   * @throws SemanticException
   */
private static ArrayList<ExprNodeDesc> getReduceKeysForRS(Operator inOp, int startPos, int endPos, List<String> outputKeyColumnNames, boolean addOnlyOneKeyColName, ArrayList<ColumnInfo> colInfoLst, Map<String, ExprNodeDesc> colExprMap, boolean addEmptyTabAlias, boolean setColToNonVirtual) throws SemanticException {
    ArrayList<ExprNodeDesc> reduceKeys = null;
    if (endPos < 0) {
        reduceKeys = new ArrayList<ExprNodeDesc>();
    } else {
        reduceKeys = ExprNodeDescUtils.genExprNodeDesc(inOp, startPos, endPos, addEmptyTabAlias, setColToNonVirtual);
        int outColNameIndx = startPos;
        for (int i = 0; i < reduceKeys.size(); ++i) {
            String outputColName = SemanticAnalyzer.getColumnInternalName(outColNameIndx);
            outColNameIndx++;
            if (!addOnlyOneKeyColName || i == 0) {
                outputKeyColumnNames.add(outputColName);
            }
            // TODO: Verify if this is needed (Why can't it be always null/empty
            String tabAlias = addEmptyTabAlias ? "" : null;
            ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + outputColName, reduceKeys.get(i).getTypeInfo(), tabAlias, false);
            colInfoLst.add(colInfo);
            colExprMap.put(colInfo.getInternalName(), reduceKeys.get(i));
        }
    }
    return reduceKeys;
}
Also used : ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 13 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class HiveGBOpConvUtil method getValueKeysForRS.

/**
   * Get Value Keys for RS following MapSide GB
   *
   * @param GroupByOperator
   *          MapSide GB
   * @param outputKeyColumnNames
   * @param colExprMap
   * @return List of ExprNodeDesc of Values
   * @throws SemanticException
   */
private static ArrayList<ExprNodeDesc> getValueKeysForRS(Operator inOp, int aggStartPos, List<String> outputKeyColumnNames, ArrayList<ColumnInfo> colInfoLst, Map<String, ExprNodeDesc> colExprMap, boolean addEmptyTabAlias, boolean setColToNonVirtual) throws SemanticException {
    List<ColumnInfo> mapGBColInfoLst = inOp.getSchema().getSignature();
    ArrayList<ExprNodeDesc> valueKeys = null;
    if (aggStartPos >= mapGBColInfoLst.size()) {
        valueKeys = new ArrayList<ExprNodeDesc>();
    } else {
        valueKeys = ExprNodeDescUtils.genExprNodeDesc(inOp, aggStartPos, mapGBColInfoLst.size() - 1, true, setColToNonVirtual);
        for (int i = 0; i < valueKeys.size(); ++i) {
            String outputColName = SemanticAnalyzer.getColumnInternalName(i);
            outputKeyColumnNames.add(outputColName);
            // TODO: Verify if this is needed (Why can't it be always null/empty
            String tabAlias = addEmptyTabAlias ? "" : null;
            ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + outputColName, valueKeys.get(i).getTypeInfo(), tabAlias, false);
            colInfoLst.add(colInfo);
            colExprMap.put(colInfo.getInternalName(), valueKeys.get(i));
        }
    }
    return valueKeys;
}
Also used : ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 14 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class HiveGBOpConvUtil method genReduceGBRS.

private static OpAttr genReduceGBRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
    GroupByOperator reduceSideGB1 = (GroupByOperator) inputOpAf.inputs.get(0);
    List<ColumnInfo> gb1ColInfoLst = reduceSideGB1.getSchema().getSignature();
    ArrayList<ExprNodeDesc> reduceKeys = getReduceKeysForRS(reduceSideGB1, 0, gbInfo.gbKeys.size() - 1, outputColumnNames, false, colInfoLst, colExprMap, true, true);
    if (inclGrpSetInReduceSide(gbInfo)) {
        addGrpSetCol(false, gb1ColInfoLst.get(reduceKeys.size()).getInternalName(), true, reduceKeys, outputColumnNames, colInfoLst, colExprMap);
    }
    ArrayList<ExprNodeDesc> reduceValues = getValueKeysForRS(reduceSideGB1, reduceSideGB1.getConf().getKeys().size(), outputColumnNames, colInfoLst, colExprMap, true, true);
    ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, getNumPartFieldsForReduceSideRS(gbInfo), getParallelismForReduceSideRS(gbInfo), AcidUtils.Operation.NOT_ACID), new RowSchema(colInfoLst), reduceSideGB1);
    rsOp.setColumnExprMap(colExprMap);
    return new OpAttr("", new HashSet<Integer>(), rsOp);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 15 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SubQueryUtils method buildSQJoinExpr.

/*
   * construct the ASTNode for the SQ column that will join with the OuterQuery Expression.
   * So for 'select ... from R1 where A in (select B from R2...)'
   * this will build (. (TOK_TABLE_OR_COL Identifier[SQ_1]) Identifier[B])
   * where 'SQ_1' is the alias generated for the SubQuery.
   */
static ASTNode buildSQJoinExpr(String sqAlias, RowResolver sqRR) {
    List<ColumnInfo> signature = sqRR.getRowSchema().getSignature();
    ColumnInfo joinColumn = signature.get(0);
    String[] joinColName = sqRR.reverseLookup(joinColumn.getInternalName());
    return createColRefAST(sqAlias, joinColName[1]);
}
Also used : ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo)

Aggregations

ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)117 ArrayList (java.util.ArrayList)75 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)69 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)56 HashMap (java.util.HashMap)46 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)43 LinkedHashMap (java.util.LinkedHashMap)35 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)34 Operator (org.apache.hadoop.hive.ql.exec.Operator)28 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)27 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)27 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)26 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)25 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)24 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)21 Map (java.util.Map)20 CalciteSemanticException (org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)20 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)19 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)19 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)18