Search in sources :

Example 21 with GroupByDesc

use of org.apache.hadoop.hive.ql.plan.GroupByDesc in project hive by apache.

the class HiveGBOpConvUtil method genReduceSideGB2.

private static OpAttr genReduceSideGB2(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
    ArrayList<String> outputColNames = new ArrayList<String>();
    ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    String colOutputName = null;
    ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0);
    List<ColumnInfo> rsColInfoLst = rs.getSchema().getSignature();
    ColumnInfo ci;
    // 1. Build GB Keys, grouping set starting position
    // 1.1 First Add original GB Keys
    ArrayList<ExprNodeDesc> gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, gbInfo.gbKeys.size() - 1, false, false);
    for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
        ci = rsColInfoLst.get(i);
        colOutputName = gbInfo.outputColNames.get(i);
        outputColNames.add(colOutputName);
        colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), "", false));
        colExprMap.put(colOutputName, gbKeys.get(i));
    }
    // 1.2 Add GrpSet Col
    int groupingSetsPosition = -1;
    if (inclGrpSetInReduceSide(gbInfo) && gbInfo.grpIdFunctionNeeded) {
        groupingSetsPosition = gbKeys.size();
        ExprNodeDesc grpSetColExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, rsColInfoLst.get(groupingSetsPosition).getInternalName(), null, false);
        gbKeys.add(grpSetColExpr);
        colOutputName = gbInfo.outputColNames.get(gbInfo.outputColNames.size() - 1);
        ;
        outputColNames.add(colOutputName);
        colInfoLst.add(new ColumnInfo(colOutputName, TypeInfoFactory.stringTypeInfo, null, true));
        colExprMap.put(colOutputName, grpSetColExpr);
    }
    // 2. Add UDAF
    UDAFAttrs udafAttr;
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    int udafStartPosInGBInfOutputColNames = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() : gbInfo.gbKeys.size() * 2;
    int udafStartPosInInputRS = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() : gbInfo.gbKeys.size() + 1;
    for (int i = 0; i < gbInfo.udafAttrs.size(); i++) {
        udafAttr = gbInfo.udafAttrs.get(i);
        ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
        aggParameters.add(new ExprNodeColumnDesc(rsColInfoLst.get(udafStartPosInInputRS + i)));
        colOutputName = gbInfo.outputColNames.get(udafStartPosInGBInfOutputColNames + i);
        outputColNames.add(colOutputName);
        Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.FINAL, udafAttr.isDistinctUDAF);
        GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, aggParameters);
        aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, false, udafMode));
        colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false));
    }
    Operator rsGBOp2 = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.FINAL, outputColNames, gbKeys, aggregations, false, gbInfo.groupByMemoryUsage, gbInfo.memoryThreshold, null, false, groupingSetsPosition, gbInfo.containsDistinctAggr), new RowSchema(colInfoLst), rs);
    rsGBOp2.setColumnExprMap(colExprMap);
    // TODO: Shouldn't we propgate vc? is it vc col from tab or all vc
    return new OpAttr("", new HashSet<Integer>(), rsGBOp2);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) GenericUDAFInfo(org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Example 22 with GroupByDesc

use of org.apache.hadoop.hive.ql.plan.GroupByDesc in project hive by apache.

the class HiveGBOpConvUtil method genReduceSideGB1.

private static OpAttr genReduceSideGB1(OpAttr inputOpAf, GBInfo gbInfo, boolean computeGrpSet, boolean propagateConstInDistinctUDAF, GroupByDesc.Mode gbMode) throws SemanticException {
    ArrayList<String> outputColNames = new ArrayList<String>();
    ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    String colOutputName = null;
    ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0);
    List<ColumnInfo> rsColInfoLst = rs.getSchema().getSignature();
    ColumnInfo ci;
    boolean finalGB = (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB);
    // 1. Build GB Keys, grouping set starting position
    // 1.1 First Add original GB Keys
    ArrayList<ExprNodeDesc> gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, gbInfo.gbKeys.size() - 1, false, false);
    for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
        ci = rsColInfoLst.get(i);
        if (finalGB) {
            colOutputName = gbInfo.outputColNames.get(i);
        } else {
            colOutputName = SemanticAnalyzer.getColumnInternalName(i);
        }
        outputColNames.add(colOutputName);
        colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), "", false));
        colExprMap.put(colOutputName, gbKeys.get(i));
    }
    // 1.2 Add GrpSet Col
    int groupingSetsColPosition = -1;
    if ((!finalGB && gbInfo.grpSets.size() > 0) || (finalGB && gbInfo.grpIdFunctionNeeded)) {
        groupingSetsColPosition = gbInfo.gbKeys.size();
        if (computeGrpSet) {
            // GrpSet Col needs to be constructed
            gbKeys.add(new ExprNodeConstantDesc("0"));
        } else {
            // GrpSet Col already part of input RS
            // TODO: Can't we just copy the ExprNodeDEsc from input (Do we need to
            // explicitly set table alias to null & VC to false
            gbKeys.addAll(ExprNodeDescUtils.genExprNodeDesc(rs, groupingSetsColPosition, groupingSetsColPosition, false, true));
        }
        colOutputName = SemanticAnalyzer.getColumnInternalName(groupingSetsColPosition);
        if (finalGB) {
            colOutputName = gbInfo.outputColNames.get(gbInfo.outputColNames.size() - 1);
        }
        outputColNames.add(colOutputName);
        colInfoLst.add(new ColumnInfo(colOutputName, TypeInfoFactory.stringTypeInfo, null, true));
        colExprMap.put(colOutputName, gbKeys.get(groupingSetsColPosition));
    }
    // 2. Walk through UDAF and add them to GB
    String lastReduceKeyColName = null;
    if (!rs.getConf().getOutputKeyColumnNames().isEmpty()) {
        lastReduceKeyColName = rs.getConf().getOutputKeyColumnNames().get(rs.getConf().getOutputKeyColumnNames().size() - 1);
    }
    int numDistinctUDFs = 0;
    int distinctStartPosInReduceKeys = gbKeys.size();
    List<ExprNodeDesc> reduceValues = rs.getConf().getValueCols();
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    int udafColStartPosInOriginalGB = (gbInfo.grpSets.size() > 0) ? gbInfo.gbKeys.size() * 2 : gbInfo.gbKeys.size();
    int udafColStartPosInRS = rs.getConf().getKeyCols().size();
    for (int i = 0; i < gbInfo.udafAttrs.size(); i++) {
        UDAFAttrs udafAttr = gbInfo.udafAttrs.get(i);
        ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
        if (udafAttr.isDistinctUDAF) {
            ColumnInfo rsDistUDAFParamColInfo;
            ExprNodeDesc distinctUDAFParam;
            ExprNodeDesc constantPropDistinctUDAFParam;
            for (int j = 0; j < udafAttr.udafParamsIndxInGBInfoDistExprs.size(); j++) {
                rsDistUDAFParamColInfo = rsColInfoLst.get(distinctStartPosInReduceKeys + j);
                String rsDistUDAFParamName = rsDistUDAFParamColInfo.getInternalName();
                // TODO: verify if this is needed
                if (lastReduceKeyColName != null) {
                    rsDistUDAFParamName = Utilities.ReduceField.KEY.name() + "." + lastReduceKeyColName + ":" + numDistinctUDFs + "." + SemanticAnalyzer.getColumnInternalName(j);
                }
                distinctUDAFParam = new ExprNodeColumnDesc(rsDistUDAFParamColInfo.getType(), rsDistUDAFParamName, rsDistUDAFParamColInfo.getTabAlias(), rsDistUDAFParamColInfo.getIsVirtualCol());
                if (propagateConstInDistinctUDAF) {
                    // TODO: Implement propConstDistUDAFParams
                    constantPropDistinctUDAFParam = SemanticAnalyzer.isConstantParameterInAggregationParameters(rsDistUDAFParamColInfo.getInternalName(), reduceValues);
                    if (constantPropDistinctUDAFParam != null) {
                        distinctUDAFParam = constantPropDistinctUDAFParam;
                    }
                }
                aggParameters.add(distinctUDAFParam);
            }
            numDistinctUDFs++;
        } else {
            aggParameters.add(new ExprNodeColumnDesc(rsColInfoLst.get(udafColStartPosInRS + i)));
        }
        Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(gbMode, udafAttr.isDistinctUDAF);
        GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, aggParameters);
        aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, (gbMode != GroupByDesc.Mode.FINAL && udafAttr.isDistinctUDAF), udafMode));
        if (finalGB) {
            colOutputName = gbInfo.outputColNames.get(udafColStartPosInOriginalGB + i);
        } else {
            colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() - 1);
        }
        colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false));
        outputColNames.add(colOutputName);
    }
    // Nothing special needs to be done for grouping sets if
    // this is the final group by operator, and multiple rows corresponding to
    // the
    // grouping sets have been generated upstream.
    // However, if an addition MR job has been created to handle grouping sets,
    // additional rows corresponding to grouping sets need to be created here.
    //TODO: Clean up/refactor assumptions
    boolean includeGrpSetInGBDesc = (gbInfo.grpSets.size() > 0) && !finalGB && !(gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT);
    Operator rsGBOp = OperatorFactory.getAndMakeChild(new GroupByDesc(gbMode, outputColNames, gbKeys, aggregations, gbInfo.groupByMemoryUsage, gbInfo.memoryThreshold, gbInfo.grpSets, includeGrpSetInGBDesc, groupingSetsColPosition, gbInfo.containsDistinctAggr), new RowSchema(colInfoLst), rs);
    rsGBOp.setColumnExprMap(colExprMap);
    return new OpAttr("", new HashSet<Integer>(), rsGBOp);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) HashMap(java.util.HashMap) GenericUDAFInfo(org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc)

Example 23 with GroupByDesc

use of org.apache.hadoop.hive.ql.plan.GroupByDesc in project hive by apache.

the class HiveGBOpConvUtil method genReduceSideGB1NoMapGB.

/**
   * RS-GB0
   *
   * @param inputOpAf
   * @param gbInfo
   * @param gbMode
   * @return
   * @throws SemanticException
   */
private static OpAttr genReduceSideGB1NoMapGB(OpAttr inputOpAf, GBInfo gbInfo, GroupByDesc.Mode gbMode) throws SemanticException {
    ArrayList<String> outputColNames = new ArrayList<String>();
    ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    String colOutputName = null;
    ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0);
    List<ColumnInfo> rsColInfoLst = rs.getSchema().getSignature();
    ColumnInfo ci;
    boolean useOriginalGBNames = (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.NO_MAP_SIDE_GB_NO_SKEW);
    // 1. Build GB Keys, grouping set starting position
    // 1.1 First Add original GB Keys
    ArrayList<ExprNodeDesc> gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, gbInfo.gbKeys.size() - 1, true, false);
    for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
        ci = rsColInfoLst.get(i);
        if (useOriginalGBNames) {
            colOutputName = gbInfo.outputColNames.get(i);
        } else {
            colOutputName = SemanticAnalyzer.getColumnInternalName(i);
        }
        outputColNames.add(colOutputName);
        colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), null, false));
        colExprMap.put(colOutputName, gbKeys.get(i));
    }
    // 2. Walk through UDAF and add them to GB
    String lastReduceKeyColName = null;
    if (!rs.getConf().getOutputKeyColumnNames().isEmpty()) {
        lastReduceKeyColName = rs.getConf().getOutputKeyColumnNames().get(rs.getConf().getOutputKeyColumnNames().size() - 1);
    }
    int numDistinctUDFs = 0;
    List<ExprNodeDesc> reduceValues = rs.getConf().getValueCols();
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    int udafColStartPosInOriginalGB = gbInfo.gbKeys.size();
    // the positions in rsColInfoLst are as follows
    // --grpkey--,--distkey--,--values--
    // but distUDAF may be before/after some non-distUDAF,
    // i.e., their positions can be mixed.
    // so for all UDAF we first check to see if it is groupby key, if not is it distinct key
    // if not it should be value
    List<Integer> distinctPositions = new ArrayList<>();
    Map<Integer, ArrayList<ExprNodeDesc>> indexToParameter = new TreeMap<>();
    for (int i = 0; i < gbInfo.udafAttrs.size(); i++) {
        UDAFAttrs udafAttr = gbInfo.udafAttrs.get(i);
        ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
        ColumnInfo rsUDAFParamColInfo;
        ExprNodeDesc udafParam;
        ExprNodeDesc constantPropDistinctUDAFParam;
        for (int j = 0; j < udafAttr.udafParams.size(); j++) {
            int argPos = getColInfoPos(udafAttr.udafParams.get(j), gbInfo);
            rsUDAFParamColInfo = rsColInfoLst.get(argPos);
            String rsUDAFParamName = rsUDAFParamColInfo.getInternalName();
            if (udafAttr.isDistinctUDAF && lastReduceKeyColName != null) {
                rsUDAFParamName = Utilities.ReduceField.KEY.name() + "." + lastReduceKeyColName + ":" + numDistinctUDFs + "." + SemanticAnalyzer.getColumnInternalName(j);
            }
            udafParam = new ExprNodeColumnDesc(rsUDAFParamColInfo.getType(), rsUDAFParamName, rsUDAFParamColInfo.getTabAlias(), rsUDAFParamColInfo.getIsVirtualCol());
            constantPropDistinctUDAFParam = SemanticAnalyzer.isConstantParameterInAggregationParameters(rsUDAFParamColInfo.getInternalName(), reduceValues);
            if (constantPropDistinctUDAFParam != null) {
                udafParam = constantPropDistinctUDAFParam;
            }
            aggParameters.add(udafParam);
        }
        indexToParameter.put(i, aggParameters);
        if (udafAttr.isDistinctUDAF) {
            numDistinctUDFs++;
        }
    }
    for (int index : indexToParameter.keySet()) {
        UDAFAttrs udafAttr = gbInfo.udafAttrs.get(index);
        Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(gbMode, udafAttr.isDistinctUDAF);
        GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, indexToParameter.get(index));
        aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, udafAttr.isDistinctUDAF, udafMode));
        if (useOriginalGBNames) {
            colOutputName = gbInfo.outputColNames.get(udafColStartPosInOriginalGB + index);
        } else {
            colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() - 1);
        }
        colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false));
        outputColNames.add(colOutputName);
    }
    Operator rsGB1 = OperatorFactory.getAndMakeChild(new GroupByDesc(gbMode, outputColNames, gbKeys, aggregations, false, gbInfo.groupByMemoryUsage, gbInfo.memoryThreshold, null, false, -1, numDistinctUDFs > 0), new RowSchema(colInfoLst), rs);
    rsGB1.setColumnExprMap(colExprMap);
    return new OpAttr("", new HashSet<Integer>(), rsGB1);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) HashMap(java.util.HashMap) GenericUDAFInfo(org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) TreeMap(java.util.TreeMap) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc)

Example 24 with GroupByDesc

use of org.apache.hadoop.hive.ql.plan.GroupByDesc in project hive by apache.

the class TestVectorGroupByOperator method buildKeyGroupByDesc.

private static GroupByDesc buildKeyGroupByDesc(VectorizationContext ctx, String aggregate, String column, TypeInfo dataTypeInfo, String key, TypeInfo keyTypeInfo) {
    GroupByDesc desc = buildGroupByDescType(ctx, aggregate, GenericUDAFEvaluator.Mode.PARTIAL1, column, dataTypeInfo);
    ((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(ProcessingMode.HASH);
    ExprNodeDesc keyExp = buildColumnDesc(ctx, key, keyTypeInfo);
    ArrayList<ExprNodeDesc> keys = new ArrayList<ExprNodeDesc>();
    keys.add(keyExp);
    desc.setKeys(keys);
    desc.getOutputColumnNames().add("_col1");
    return desc;
}
Also used : VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Example 25 with GroupByDesc

use of org.apache.hadoop.hive.ql.plan.GroupByDesc in project hive by apache.

the class TestVectorGroupByOperator method testAggregateStringIterable.

public void testAggregateStringIterable(String aggregateName, Iterable<VectorizedRowBatch> data, Object expected) throws HiveException {
    List<String> mapColumnNames = new ArrayList<String>();
    mapColumnNames.add("A");
    VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
    GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.stringTypeInfo);
    CompilationOpContext cCtx = new CompilationOpContext();
    Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
    VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx);
    FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo);
    vgo.initialize(hconf, null);
    for (VectorizedRowBatch unit : data) {
        vgo.process(unit, 0);
    }
    vgo.close(false);
    List<Object> outBatchList = out.getCapturedRows();
    assertNotNull(outBatchList);
    assertEquals(1, outBatchList.size());
    Object result = outBatchList.get(0);
    Validator validator = getValidator(aggregateName);
    validator.validate("_total", expected, result);
}
Also used : ArrayList(java.util.ArrayList) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) FakeCaptureOutputOperator(org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureOutputOperator)

Aggregations

GroupByDesc (org.apache.hadoop.hive.ql.plan.GroupByDesc)27 ArrayList (java.util.ArrayList)24 VectorGroupByDesc (org.apache.hadoop.hive.ql.plan.VectorGroupByDesc)16 AggregationDesc (org.apache.hadoop.hive.ql.plan.AggregationDesc)15 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)14 HashMap (java.util.HashMap)13 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)11 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)11 FakeCaptureOutputOperator (org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureOutputOperator)11 Operator (org.apache.hadoop.hive.ql.exec.Operator)10 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)10 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)9 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)9 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)8 Mode (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode)8 Map (java.util.Map)6 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)6 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)6 HashSet (java.util.HashSet)5 LinkedHashMap (java.util.LinkedHashMap)5