Search in sources :

Example 36 with GroupByDesc

use of org.apache.hadoop.hive.ql.plan.GroupByDesc in project hive by apache.

the class SemanticAnalyzer method genMapGroupByForSemijoin.

private Operator genMapGroupByForSemijoin(QB qb, ArrayList<ASTNode> fields, Operator<?> input, GroupByDesc.Mode mode) throws SemanticException {
    RowResolver groupByInputRowResolver = opParseCtx.get(input).getRowResolver();
    RowResolver groupByOutputRowResolver = new RowResolver();
    ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < fields.size(); ++i) {
        // get the group by keys to ColumnInfo
        ASTNode colName = fields.get(i);
        String[] nm;
        String[] nm2;
        ExprNodeDesc grpByExprNode = genExprNodeDesc(colName, groupByInputRowResolver);
        if (grpByExprNode instanceof ExprNodeColumnDesc) {
            // In most of the cases, this is a column reference
            ExprNodeColumnDesc columnExpr = (ExprNodeColumnDesc) grpByExprNode;
            nm = groupByInputRowResolver.reverseLookup(columnExpr.getColumn());
            nm2 = groupByInputRowResolver.getAlternateMappings(columnExpr.getColumn());
        } else if (grpByExprNode instanceof ExprNodeConstantDesc) {
            // However, it can be a constant too. In that case, we need to track
            // the column that it originated from in the input operator so we can
            // propagate the aliases.
            ExprNodeConstantDesc constantExpr = (ExprNodeConstantDesc) grpByExprNode;
            String inputCol = constantExpr.getFoldedFromCol();
            nm = groupByInputRowResolver.reverseLookup(inputCol);
            nm2 = groupByInputRowResolver.getAlternateMappings(inputCol);
        } else {
            // of the left semijoin
            return input;
        }
        groupByKeys.add(grpByExprNode);
        // generate output column names
        String field = getColumnInternalName(i);
        outputColumnNames.add(field);
        ColumnInfo colInfo2 = new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false);
        groupByOutputRowResolver.put(nm[0], nm[1], colInfo2);
        if (nm2 != null) {
            groupByOutputRowResolver.addMappingOnly(nm2[0], nm2[1], colInfo2);
        }
        groupByOutputRowResolver.putExpression(colName, colInfo2);
        // establish mapping from the output column to the input column
        colExprMap.put(field, grpByExprNode);
    }
    // Generate group-by operator
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, null, false, -1, false), new RowSchema(groupByOutputRowResolver.getColumnInfos()), input), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Aggregations

GroupByDesc (org.apache.hadoop.hive.ql.plan.GroupByDesc)36 ArrayList (java.util.ArrayList)30 AggregationDesc (org.apache.hadoop.hive.ql.plan.AggregationDesc)22 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)20 VectorGroupByDesc (org.apache.hadoop.hive.ql.plan.VectorGroupByDesc)20 HashMap (java.util.HashMap)18 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)16 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)14 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)13 Operator (org.apache.hadoop.hive.ql.exec.Operator)13 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)13 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)13 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)12 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)10 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)10 FakeCaptureVectorToRowOutputOperator (org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator)10 Mode (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode)10 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)9 LinkedHashMap (java.util.LinkedHashMap)8 Map (java.util.Map)8