Search in sources :

Example 26 with GroupByDesc

use of org.apache.hadoop.hive.ql.plan.GroupByDesc in project hive by apache.

the class TestVectorGroupByOperator method buildGroupByDescType.

private static GroupByDesc buildGroupByDescType(VectorizationContext ctx, String aggregate, GenericUDAFEvaluator.Mode mode, String column, TypeInfo dataType) {
    AggregationDesc agg = buildAggregationDesc(ctx, aggregate, mode, column, dataType);
    ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>();
    aggs.add(agg);
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    outputColumnNames.add("_col0");
    GroupByDesc desc = new GroupByDesc();
    desc.setVectorDesc(new VectorGroupByDesc());
    desc.setOutputColumnNames(outputColumnNames);
    desc.setAggregators(aggs);
    ((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(ProcessingMode.GLOBAL);
    return desc;
}
Also used : ArrayList(java.util.ArrayList) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Example 27 with GroupByDesc

use of org.apache.hadoop.hive.ql.plan.GroupByDesc in project hive by apache.

the class SemanticAnalyzer method genMapGroupByForSemijoin.

private // the
Operator genMapGroupByForSemijoin(// the
QB qb, // the
ArrayList<ASTNode> fields, // "tab.col"
Operator inputOperatorInfo, GroupByDesc.Mode mode) throws SemanticException {
    RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo).getRowResolver();
    RowResolver groupByOutputRowResolver = new RowResolver();
    ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    qb.getParseInfo();
    // join keys should only
    groupByOutputRowResolver.setIsExprResolver(true);
    for (int i = 0; i < fields.size(); ++i) {
        // get the group by keys to ColumnInfo
        ASTNode colName = fields.get(i);
        ExprNodeDesc grpByExprNode = genExprNodeDesc(colName, groupByInputRowResolver);
        groupByKeys.add(grpByExprNode);
        // generate output column names
        String field = getColumnInternalName(i);
        outputColumnNames.add(field);
        ColumnInfo colInfo2 = new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false);
        groupByOutputRowResolver.putExpression(colName, colInfo2);
        // establish mapping from the output column to the input column
        colExprMap.put(field, grpByExprNode);
    }
    // Generate group-by operator
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, null, false, -1, false), new RowSchema(groupByOutputRowResolver.getColumnInfos()), inputOperatorInfo), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Aggregations

GroupByDesc (org.apache.hadoop.hive.ql.plan.GroupByDesc)27 ArrayList (java.util.ArrayList)24 VectorGroupByDesc (org.apache.hadoop.hive.ql.plan.VectorGroupByDesc)16 AggregationDesc (org.apache.hadoop.hive.ql.plan.AggregationDesc)15 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)14 HashMap (java.util.HashMap)13 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)11 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)11 FakeCaptureOutputOperator (org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureOutputOperator)11 Operator (org.apache.hadoop.hive.ql.exec.Operator)10 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)10 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)9 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)9 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)8 Mode (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode)8 Map (java.util.Map)6 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)6 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)6 HashSet (java.util.HashSet)5 LinkedHashMap (java.util.LinkedHashMap)5