Search in sources :

Example 91 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method genMapGroupByForSemijoin.

private // the
Operator genMapGroupByForSemijoin(// the
QB qb, // the
ArrayList<ASTNode> fields, // "tab.col"
Operator inputOperatorInfo, GroupByDesc.Mode mode) throws SemanticException {
    RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo).getRowResolver();
    RowResolver groupByOutputRowResolver = new RowResolver();
    ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    qb.getParseInfo();
    // join keys should only
    groupByOutputRowResolver.setIsExprResolver(true);
    for (int i = 0; i < fields.size(); ++i) {
        // get the group by keys to ColumnInfo
        ASTNode colName = fields.get(i);
        ExprNodeDesc grpByExprNode = genExprNodeDesc(colName, groupByInputRowResolver);
        groupByKeys.add(grpByExprNode);
        // generate output column names
        String field = getColumnInternalName(i);
        outputColumnNames.add(field);
        ColumnInfo colInfo2 = new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false);
        groupByOutputRowResolver.putExpression(colName, colInfo2);
        // establish mapping from the output column to the input column
        colExprMap.put(field, grpByExprNode);
    }
    // Generate group-by operator
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, null, false, -1, false), new RowSchema(groupByOutputRowResolver.getColumnInfos()), inputOperatorInfo), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Example 92 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class SemanticAnalyzer method insertSelectForSemijoin.

/**
   * Construct a selection operator for semijoin that filter out all fields
   * other than the group by keys.
   *
   * @param fields
   *          list of fields need to be output
   * @param input
   *          input operator
   * @return the selection operator.
   * @throws SemanticException
   */
private Operator insertSelectForSemijoin(ArrayList<ASTNode> fields, Operator input) throws SemanticException {
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    RowResolver outputRR = new RowResolver();
    // construct the list of columns that need to be projected
    for (int i = 0; i < fields.size(); ++i) {
        ASTNode field = fields.get(i);
        ExprNodeDesc exprNode = genExprNodeDesc(field, inputRR);
        String colName = getColumnInternalName(i);
        outputColumnNames.add(colName);
        ColumnInfo colInfo = new ColumnInfo(colName, exprNode.getTypeInfo(), "", false);
        outputRR.putExpression(field, colInfo);
        colList.add(exprNode);
        colExprMap.put(colName, exprNode);
    }
    // create selection operator
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList, outputColumnNames, false), new RowSchema(outputRR.getColumnInfos()), input), outputRR);
    output.setColumnExprMap(colExprMap);
    return output;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 93 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class HiveGBOpConvUtil method genReduceSideGB2.

private static OpAttr genReduceSideGB2(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
    ArrayList<String> outputColNames = new ArrayList<String>();
    ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    String colOutputName = null;
    ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0);
    List<ColumnInfo> rsColInfoLst = rs.getSchema().getSignature();
    ColumnInfo ci;
    // 1. Build GB Keys, grouping set starting position
    // 1.1 First Add original GB Keys
    ArrayList<ExprNodeDesc> gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, gbInfo.gbKeys.size() - 1, false, false);
    for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
        ci = rsColInfoLst.get(i);
        colOutputName = gbInfo.outputColNames.get(i);
        outputColNames.add(colOutputName);
        colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), "", false));
        colExprMap.put(colOutputName, gbKeys.get(i));
    }
    // 1.2 Add GrpSet Col
    int groupingSetsPosition = -1;
    if (inclGrpSetInReduceSide(gbInfo) && gbInfo.grpIdFunctionNeeded) {
        groupingSetsPosition = gbKeys.size();
        ExprNodeDesc grpSetColExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, rsColInfoLst.get(groupingSetsPosition).getInternalName(), null, false);
        gbKeys.add(grpSetColExpr);
        colOutputName = gbInfo.outputColNames.get(gbInfo.outputColNames.size() - 1);
        ;
        outputColNames.add(colOutputName);
        colInfoLst.add(new ColumnInfo(colOutputName, TypeInfoFactory.stringTypeInfo, null, true));
        colExprMap.put(colOutputName, grpSetColExpr);
    }
    // 2. Add UDAF
    UDAFAttrs udafAttr;
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    int udafStartPosInGBInfOutputColNames = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() : gbInfo.gbKeys.size() * 2;
    int udafStartPosInInputRS = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() : gbInfo.gbKeys.size() + 1;
    for (int i = 0; i < gbInfo.udafAttrs.size(); i++) {
        udafAttr = gbInfo.udafAttrs.get(i);
        ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
        aggParameters.add(new ExprNodeColumnDesc(rsColInfoLst.get(udafStartPosInInputRS + i)));
        colOutputName = gbInfo.outputColNames.get(udafStartPosInGBInfOutputColNames + i);
        outputColNames.add(colOutputName);
        Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.FINAL, udafAttr.isDistinctUDAF);
        GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, aggParameters);
        aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, false, udafMode));
        colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false));
    }
    Operator rsGBOp2 = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.FINAL, outputColNames, gbKeys, aggregations, false, gbInfo.groupByMemoryUsage, gbInfo.memoryThreshold, null, false, groupingSetsPosition, gbInfo.containsDistinctAggr), new RowSchema(colInfoLst), rs);
    rsGBOp2.setColumnExprMap(colExprMap);
    // TODO: Shouldn't we propgate vc? is it vc col from tab or all vc
    return new OpAttr("", new HashSet<Integer>(), rsGBOp2);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) GenericUDAFInfo(org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Example 94 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class HiveOpConverter method genInputSelectForUnion.

private Operator<? extends OperatorDesc> genInputSelectForUnion(Operator<? extends OperatorDesc> origInputOp, ArrayList<ColumnInfo> uColumnInfo) throws SemanticException {
    Iterator<ColumnInfo> oIter = origInputOp.getSchema().getSignature().iterator();
    Iterator<ColumnInfo> uIter = uColumnInfo.iterator();
    List<ExprNodeDesc> columns = new ArrayList<ExprNodeDesc>();
    List<String> colName = new ArrayList<String>();
    Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
    boolean needSelectOp = false;
    while (oIter.hasNext()) {
        ColumnInfo oInfo = oIter.next();
        ColumnInfo uInfo = uIter.next();
        if (!oInfo.isSameColumnForRR(uInfo)) {
            needSelectOp = true;
        }
        ExprNodeDesc column = new ExprNodeColumnDesc(oInfo.getType(), oInfo.getInternalName(), oInfo.getTabAlias(), oInfo.getIsVirtualCol(), oInfo.isSkewedCol());
        if (!oInfo.getType().equals(uInfo.getType())) {
            column = ParseUtils.createConversionCast(column, (PrimitiveTypeInfo) uInfo.getType());
        }
        columns.add(column);
        colName.add(uInfo.getInternalName());
        columnExprMap.put(uInfo.getInternalName(), column);
    }
    if (needSelectOp) {
        return OperatorFactory.getAndMakeChild(new SelectDesc(columns, colName), new RowSchema(uColumnInfo), columnExprMap, origInputOp);
    } else {
        return origInputOp;
    }
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 95 with ColumnInfo

use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.

the class HiveOpConverter method buildBacktrackFromReduceSinkForJoin.

private static Map<String, ExprNodeDesc> buildBacktrackFromReduceSinkForJoin(int initialPos, List<String> outputColumnNames, List<String> keyColNames, List<String> valueColNames, int[] index, Operator<?> inputOp, String tabAlias) {
    Map<String, ExprNodeDesc> columnDescriptors = new LinkedHashMap<String, ExprNodeDesc>();
    for (int i = 0; i < index.length; i++) {
        ColumnInfo info = new ColumnInfo(inputOp.getSchema().getSignature().get(i));
        String field;
        if (index[i] >= 0) {
            field = Utilities.ReduceField.KEY + "." + keyColNames.get(index[i]);
        } else {
            field = Utilities.ReduceField.VALUE + "." + valueColNames.get(-index[i] - 1);
        }
        ExprNodeColumnDesc desc = new ExprNodeColumnDesc(info.getType(), field, tabAlias, info.getIsVirtualCol());
        columnDescriptors.put(outputColumnNames.get(initialPos + i), desc);
    }
    return columnDescriptors;
}
Also used : ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) LinkedHashMap(java.util.LinkedHashMap)

Aggregations

ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)225 ArrayList (java.util.ArrayList)140 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)138 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)100 HashMap (java.util.HashMap)93 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)86 LinkedHashMap (java.util.LinkedHashMap)71 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)59 Operator (org.apache.hadoop.hive.ql.exec.Operator)48 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)47 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)47 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)45 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)45 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)45 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)45 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)45 DefaultConstraint (org.apache.hadoop.hive.ql.metadata.DefaultConstraint)45 Map (java.util.Map)41 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)39 CalciteSemanticException (org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)38