use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr in project hive by apache.
the class HiveGBOpConvUtil method translateGB.
static OpAttr translateGB(OpAttr inputOpAf, HiveAggregate aggRel, HiveConf hc) throws SemanticException {
OpAttr translatedGBOpAttr = null;
GBInfo gbInfo = getGBInfo(aggRel, inputOpAf, hc);
switch(gbInfo.gbPhysicalPipelineMode) {
case MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB:
translatedGBOpAttr = genMapSideGBNoSkewNoAddMRJob(inputOpAf, aggRel, gbInfo);
break;
case MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB:
translatedGBOpAttr = genMapSideGBNoSkewAddMRJob(inputOpAf, aggRel, gbInfo);
break;
case MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT:
translatedGBOpAttr = genMapSideGBSkewGBKeysOrDistUDAFPresent(inputOpAf, aggRel, gbInfo);
break;
case MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT:
translatedGBOpAttr = genMapSideGBSkewGBKeysAndDistUDAFNotPresent(inputOpAf, aggRel, gbInfo);
break;
case NO_MAP_SIDE_GB_NO_SKEW:
translatedGBOpAttr = genNoMapSideGBNoSkew(inputOpAf, aggRel, gbInfo);
break;
case NO_MAP_SIDE_GB_SKEW:
translatedGBOpAttr = genNoMapSideGBSkew(inputOpAf, aggRel, gbInfo);
break;
}
return translatedGBOpAttr;
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr in project hive by apache.
the class HiveGBOpConvUtil method genGBRSGBRSGBOpPipeLine.
/**
* GB-RS-GB1-RS-GB2
*/
private static OpAttr genGBRSGBRSGBOpPipeLine(OpAttr inputOpAf, HiveAggregate aggRel, GBInfo gbInfo) throws SemanticException {
OpAttr mapSideGB = null;
OpAttr mapSideRS = null;
OpAttr reduceSideGB1 = null;
OpAttr reduceSideRS = null;
OpAttr reduceSideGB2 = null;
// 1. Insert MapSide GB
mapSideGB = genMapSideGB(inputOpAf, gbInfo);
// 2. Insert MapSide RS
mapSideRS = genMapSideGBRS(mapSideGB, gbInfo);
// 3. Insert ReduceSide GB1
boolean computeGrpSet = (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT) ? false : true;
reduceSideGB1 = genReduceSideGB1(mapSideRS, gbInfo, computeGrpSet, false, GroupByDesc.Mode.PARTIALS);
// 4. Insert RS on reduce side with Reduce side GB as input
reduceSideRS = genReduceGBRS(reduceSideGB1, gbInfo);
// 5. Insert ReduceSide GB2
reduceSideGB2 = genReduceSideGB2(reduceSideRS, gbInfo);
return reduceSideGB2;
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr in project hive by apache.
the class HiveGBOpConvUtil method genMapSideGBSkewGBKeysAndDistUDAFNotPresent.
/**
* GB-RS-GB2
*
* @param inputOpAf
* @param aggRel
* @param gbInfo
* @return
* @throws SemanticException
*/
private static OpAttr genMapSideGBSkewGBKeysAndDistUDAFNotPresent(OpAttr inputOpAf, HiveAggregate aggRel, GBInfo gbInfo) throws SemanticException {
OpAttr mapSideGB = null;
OpAttr mapSideRS = null;
OpAttr reduceSideGB2 = null;
// 1. Sanity check
if (gbInfo.grpSetRqrAdditionalMRJob) {
String errorMsg = "The number of rows per input row due to grouping sets is " + gbInfo.grpSets.size();
throw new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_THRESHOLD_NOT_ALLOWED_WITH_SKEW.getMsg(errorMsg));
}
// 1. Insert MapSide GB
mapSideGB = genMapSideGB(inputOpAf, gbInfo);
// 2. Insert MapSide RS
mapSideRS = genMapSideGBRS(mapSideGB, gbInfo);
// 3. Insert ReduceSide GB2
reduceSideGB2 = genReduceSideGB2(mapSideRS, gbInfo);
return reduceSideGB2;
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr in project hive by apache.
the class HiveGBOpConvUtil method genMapSideGBRS.
private static OpAttr genMapSideGBRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
List<String> outputKeyColumnNames = new ArrayList<String>();
List<String> outputValueColumnNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
GroupByOperator mapGB = (GroupByOperator) inputOpAf.inputs.get(0);
ArrayList<ExprNodeDesc> reduceKeys = getReduceKeysForRS(mapGB, 0, gbInfo.gbKeys.size() - 1, outputKeyColumnNames, false, colInfoLst, colExprMap, false, false);
int keyLength = reduceKeys.size();
if (inclGrpSetInMapSide(gbInfo)) {
addGrpSetCol(false, SemanticAnalyzer.getColumnInternalName(reduceKeys.size()), true, reduceKeys, outputKeyColumnNames, colInfoLst, colExprMap);
keyLength++;
}
if (mapGB.getConf().getKeys().size() > reduceKeys.size()) {
// NOTE: All dist cols have single output col name;
reduceKeys.addAll(getReduceKeysForRS(mapGB, reduceKeys.size(), mapGB.getConf().getKeys().size() - 1, outputKeyColumnNames, true, colInfoLst, colExprMap, false, false));
} else if (!gbInfo.distColIndices.isEmpty()) {
// This is the case where distinct cols are part of GB Keys in which case
// we still need to add it to out put col names
outputKeyColumnNames.add(SemanticAnalyzer.getColumnInternalName(reduceKeys.size()));
}
ArrayList<ExprNodeDesc> reduceValues = getValueKeysForRS(mapGB, mapGB.getConf().getKeys().size(), outputValueColumnNames, colInfoLst, colExprMap, false, false);
ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, keyLength, reduceValues, gbInfo.distColIndices, outputKeyColumnNames, outputValueColumnNames, true, -1, getNumPartFieldsForMapSideRS(gbInfo), getParallelismForMapSideRS(gbInfo), AcidUtils.Operation.NOT_ACID), new RowSchema(colInfoLst), mapGB);
rsOp.setColumnExprMap(colExprMap);
return new OpAttr("", new HashSet<Integer>(), rsOp);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr in project hive by apache.
the class HiveGBOpConvUtil method genNoMapSideGBSkew.
/**
* RS-GB1-RS-GB2
*
* @param inputOpAf
* @param aggRel
* @param gbInfo
* @return
* @throws SemanticException
*/
private static OpAttr genNoMapSideGBSkew(OpAttr inputOpAf, HiveAggregate aggRel, GBInfo gbInfo) throws SemanticException {
OpAttr mapSideRS = null;
OpAttr reduceSideGB1NoMapGB = null;
OpAttr reduceSideRS = null;
OpAttr reduceSideGB2 = null;
// 1. Insert MapSide RS
mapSideRS = genMapSideRS(inputOpAf, gbInfo);
// 2. Insert ReduceSide GB
reduceSideGB1NoMapGB = genReduceSideGB1NoMapGB(mapSideRS, gbInfo, GroupByDesc.Mode.PARTIAL1);
// 3. Insert RS on reduce side with Reduce side GB as input
reduceSideRS = genReduceGBRS(reduceSideGB1NoMapGB, gbInfo);
// 4. Insert ReduceSide GB2
reduceSideGB2 = genReduceSideGB2(reduceSideRS, gbInfo);
return reduceSideGB2;
}
Aggregations