Search in sources :

Example 6 with OpAttr

use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr in project hive by apache.

the class HiveGBOpConvUtil method translateGB.

static OpAttr translateGB(OpAttr inputOpAf, HiveAggregate aggRel, HiveConf hc) throws SemanticException {
    OpAttr translatedGBOpAttr = null;
    GBInfo gbInfo = getGBInfo(aggRel, inputOpAf, hc);
    switch(gbInfo.gbPhysicalPipelineMode) {
        case MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB:
            translatedGBOpAttr = genMapSideGBNoSkewNoAddMRJob(inputOpAf, aggRel, gbInfo);
            break;
        case MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB:
            translatedGBOpAttr = genMapSideGBNoSkewAddMRJob(inputOpAf, aggRel, gbInfo);
            break;
        case MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT:
            translatedGBOpAttr = genMapSideGBSkewGBKeysOrDistUDAFPresent(inputOpAf, aggRel, gbInfo);
            break;
        case MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT:
            translatedGBOpAttr = genMapSideGBSkewGBKeysAndDistUDAFNotPresent(inputOpAf, aggRel, gbInfo);
            break;
        case NO_MAP_SIDE_GB_NO_SKEW:
            translatedGBOpAttr = genNoMapSideGBNoSkew(inputOpAf, aggRel, gbInfo);
            break;
        case NO_MAP_SIDE_GB_SKEW:
            translatedGBOpAttr = genNoMapSideGBSkew(inputOpAf, aggRel, gbInfo);
            break;
    }
    return translatedGBOpAttr;
}
Also used : OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr)

Example 7 with OpAttr

use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr in project hive by apache.

the class HiveGBOpConvUtil method genGBRSGBRSGBOpPipeLine.

/**
   * GB-RS-GB1-RS-GB2
   */
private static OpAttr genGBRSGBRSGBOpPipeLine(OpAttr inputOpAf, HiveAggregate aggRel, GBInfo gbInfo) throws SemanticException {
    OpAttr mapSideGB = null;
    OpAttr mapSideRS = null;
    OpAttr reduceSideGB1 = null;
    OpAttr reduceSideRS = null;
    OpAttr reduceSideGB2 = null;
    // 1. Insert MapSide GB
    mapSideGB = genMapSideGB(inputOpAf, gbInfo);
    // 2. Insert MapSide RS
    mapSideRS = genMapSideGBRS(mapSideGB, gbInfo);
    // 3. Insert ReduceSide GB1
    boolean computeGrpSet = (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT) ? false : true;
    reduceSideGB1 = genReduceSideGB1(mapSideRS, gbInfo, computeGrpSet, false, GroupByDesc.Mode.PARTIALS);
    // 4. Insert RS on reduce side with Reduce side GB as input
    reduceSideRS = genReduceGBRS(reduceSideGB1, gbInfo);
    // 5. Insert ReduceSide GB2
    reduceSideGB2 = genReduceSideGB2(reduceSideRS, gbInfo);
    return reduceSideGB2;
}
Also used : OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr)

Example 8 with OpAttr

use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr in project hive by apache.

the class HiveGBOpConvUtil method genMapSideGBSkewGBKeysAndDistUDAFNotPresent.

/**
   * GB-RS-GB2
   *
   * @param inputOpAf
   * @param aggRel
   * @param gbInfo
   * @return
   * @throws SemanticException
   */
private static OpAttr genMapSideGBSkewGBKeysAndDistUDAFNotPresent(OpAttr inputOpAf, HiveAggregate aggRel, GBInfo gbInfo) throws SemanticException {
    OpAttr mapSideGB = null;
    OpAttr mapSideRS = null;
    OpAttr reduceSideGB2 = null;
    // 1. Sanity check
    if (gbInfo.grpSetRqrAdditionalMRJob) {
        String errorMsg = "The number of rows per input row due to grouping sets is " + gbInfo.grpSets.size();
        throw new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_THRESHOLD_NOT_ALLOWED_WITH_SKEW.getMsg(errorMsg));
    }
    // 1. Insert MapSide GB
    mapSideGB = genMapSideGB(inputOpAf, gbInfo);
    // 2. Insert MapSide RS
    mapSideRS = genMapSideGBRS(mapSideGB, gbInfo);
    // 3. Insert ReduceSide GB2
    reduceSideGB2 = genReduceSideGB2(mapSideRS, gbInfo);
    return reduceSideGB2;
}
Also used : OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 9 with OpAttr

use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr in project hive by apache.

the class HiveGBOpConvUtil method genMapSideGBRS.

private static OpAttr genMapSideGBRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    List<String> outputKeyColumnNames = new ArrayList<String>();
    List<String> outputValueColumnNames = new ArrayList<String>();
    ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
    GroupByOperator mapGB = (GroupByOperator) inputOpAf.inputs.get(0);
    ArrayList<ExprNodeDesc> reduceKeys = getReduceKeysForRS(mapGB, 0, gbInfo.gbKeys.size() - 1, outputKeyColumnNames, false, colInfoLst, colExprMap, false, false);
    int keyLength = reduceKeys.size();
    if (inclGrpSetInMapSide(gbInfo)) {
        addGrpSetCol(false, SemanticAnalyzer.getColumnInternalName(reduceKeys.size()), true, reduceKeys, outputKeyColumnNames, colInfoLst, colExprMap);
        keyLength++;
    }
    if (mapGB.getConf().getKeys().size() > reduceKeys.size()) {
        // NOTE: All dist cols have single output col name;
        reduceKeys.addAll(getReduceKeysForRS(mapGB, reduceKeys.size(), mapGB.getConf().getKeys().size() - 1, outputKeyColumnNames, true, colInfoLst, colExprMap, false, false));
    } else if (!gbInfo.distColIndices.isEmpty()) {
        // This is the case where distinct cols are part of GB Keys in which case
        // we still need to add it to out put col names
        outputKeyColumnNames.add(SemanticAnalyzer.getColumnInternalName(reduceKeys.size()));
    }
    ArrayList<ExprNodeDesc> reduceValues = getValueKeysForRS(mapGB, mapGB.getConf().getKeys().size(), outputValueColumnNames, colInfoLst, colExprMap, false, false);
    ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, keyLength, reduceValues, gbInfo.distColIndices, outputKeyColumnNames, outputValueColumnNames, true, -1, getNumPartFieldsForMapSideRS(gbInfo), getParallelismForMapSideRS(gbInfo), AcidUtils.Operation.NOT_ACID), new RowSchema(colInfoLst), mapGB);
    rsOp.setColumnExprMap(colExprMap);
    return new OpAttr("", new HashSet<Integer>(), rsOp);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 10 with OpAttr

use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr in project hive by apache.

the class HiveGBOpConvUtil method genNoMapSideGBSkew.

/**
   * RS-GB1-RS-GB2
   *
   * @param inputOpAf
   * @param aggRel
   * @param gbInfo
   * @return
   * @throws SemanticException
   */
private static OpAttr genNoMapSideGBSkew(OpAttr inputOpAf, HiveAggregate aggRel, GBInfo gbInfo) throws SemanticException {
    OpAttr mapSideRS = null;
    OpAttr reduceSideGB1NoMapGB = null;
    OpAttr reduceSideRS = null;
    OpAttr reduceSideGB2 = null;
    // 1. Insert MapSide RS
    mapSideRS = genMapSideRS(inputOpAf, gbInfo);
    // 2. Insert ReduceSide GB
    reduceSideGB1NoMapGB = genReduceSideGB1NoMapGB(mapSideRS, gbInfo, GroupByDesc.Mode.PARTIAL1);
    // 3. Insert RS on reduce side with Reduce side GB as input
    reduceSideRS = genReduceGBRS(reduceSideGB1NoMapGB, gbInfo);
    // 4. Insert ReduceSide GB2
    reduceSideGB2 = genReduceSideGB2(reduceSideRS, gbInfo);
    return reduceSideGB2;
}
Also used : OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr)

Aggregations

OpAttr (org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr)13 ArrayList (java.util.ArrayList)7 HashMap (java.util.HashMap)7 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)7 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)7 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)7 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)7 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)6 Operator (org.apache.hadoop.hive.ql.exec.Operator)4 GenericUDAFInfo (org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo)4 AggregationDesc (org.apache.hadoop.hive.ql.plan.AggregationDesc)4 GroupByDesc (org.apache.hadoop.hive.ql.plan.GroupByDesc)4 Mode (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode)4 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)3 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)2 HashSet (java.util.HashSet)1 TreeMap (java.util.TreeMap)1 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)1