use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.
the class HiveGBOpConvUtil method genMapSideGBNoSkewNoAddMRJob.
/**
* GB-RS-GB1
*
* Construct GB-RS-GB Pipe line. User has enabled Map Side GB, specified no
* skew and Grp Set is below the threshold.
*
* @param inputOpAf
* @param aggRel
* @param gbInfo
* @return
* @throws SemanticException
*/
private static OpAttr genMapSideGBNoSkewNoAddMRJob(OpAttr inputOpAf, HiveAggregate aggRel, GBInfo gbInfo) throws SemanticException {
OpAttr mapSideGB = null;
OpAttr mapSideRS = null;
OpAttr reduceSideGB = null;
// 1. Insert MapSide GB
mapSideGB = genMapSideGB(inputOpAf, gbInfo);
// 2. Insert MapSide RS
mapSideRS = genMapSideGBRS(mapSideGB, gbInfo);
// 3. Insert ReduceSide GB
reduceSideGB = genReduceSideGB1(mapSideRS, gbInfo, false, false, GroupByDesc.Mode.MERGEPARTIAL);
return reduceSideGB;
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.
the class HiveGBOpConvUtil method genMapSideGB.
@SuppressWarnings("unchecked")
private static OpAttr genMapSideGB(OpAttr inputOpAf, GBInfo gbAttrs) throws SemanticException {
ArrayList<String> outputColNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
Set<String> gbKeyColsAsNamesFrmIn = new HashSet<String>();
String colOutputName = null;
// 1. Build GB Keys, grouping set starting position
// 1.1 First Add original GB Keys
ArrayList<ExprNodeDesc> gbKeys = new ArrayList<ExprNodeDesc>();
for (int i = 0; i < gbAttrs.gbKeys.size(); i++) {
gbKeys.add(gbAttrs.gbKeys.get(i));
colOutputName = SemanticAnalyzer.getColumnInternalName(i);
colInfoLst.add(new ColumnInfo(colOutputName, gbAttrs.gbKeyTypes.get(i), "", false));
outputColNames.add(colOutputName);
gbKeyColsAsNamesFrmIn.add(gbAttrs.gbKeyColNamesInInput.get(i));
colExprMap.put(colOutputName, gbKeys.get(i));
}
// 1.2. Adjust GroupingSet Position, GBKeys for GroupingSet Position if
// needed. NOTE: GroupingID is added to map side GB only if we don't GrpSet
// doesn't require additional MR Jobs
int groupingSetsPosition = -1;
boolean inclGrpID = inclGrpSetInMapSide(gbAttrs);
if (inclGrpID) {
groupingSetsPosition = gbKeys.size();
addGrpSetCol(true, null, false, gbKeys, outputColNames, colInfoLst, colExprMap);
}
// gen would have prevented it)
for (int i = 0; i < gbAttrs.distExprNodes.size(); i++) {
if (!gbKeyColsAsNamesFrmIn.contains(gbAttrs.distExprNames.get(i))) {
gbKeys.add(gbAttrs.distExprNodes.get(i));
colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() - 1);
colInfoLst.add(new ColumnInfo(colOutputName, gbAttrs.distExprTypes.get(i), "", false));
outputColNames.add(colOutputName);
gbKeyColsAsNamesFrmIn.add(gbAttrs.distExprNames.get(i));
colExprMap.put(colOutputName, gbKeys.get(gbKeys.size() - 1));
}
}
// 2. Build Aggregations
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
for (UDAFAttrs udafAttr : gbAttrs.udafAttrs) {
Mode amode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.HASH, udafAttr.isDistinctUDAF);
aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udafAttr.udafEvaluator, udafAttr.udafParams, udafAttr.isDistinctUDAF, amode));
GenericUDAFInfo udafInfo;
try {
udafInfo = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, amode, udafAttr.udafParams);
} catch (SemanticException e) {
throw new RuntimeException(e);
}
colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() - 1);
colInfoLst.add(new ColumnInfo(colOutputName, udafInfo.returnType, "", false));
outputColNames.add(colOutputName);
}
// 3. Create GB
@SuppressWarnings("rawtypes") Operator gbOp = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.HASH, outputColNames, gbKeys, aggregations, false, gbAttrs.groupByMemoryUsage, gbAttrs.memoryThreshold, gbAttrs.minReductionHashAggr, gbAttrs.minReductionHashAggrLowerBound, gbAttrs.grpSets, inclGrpID, groupingSetsPosition, gbAttrs.containsDistinctAggr), new RowSchema(colInfoLst), inputOpAf.inputs.get(0));
// 5. Setup Expr Col Map
// NOTE: UDAF is not included in ExprColMap
gbOp.setColumnExprMap(colExprMap);
return new OpAttr("", new HashSet<Integer>(), gbOp);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.
the class HiveGBOpConvUtil method genGBRSGBRSGBOpPipeLine.
/**
* GB-RS-GB1-RS-GB2
*/
private static OpAttr genGBRSGBRSGBOpPipeLine(OpAttr inputOpAf, HiveAggregate aggRel, GBInfo gbInfo) throws SemanticException {
OpAttr mapSideGB = null;
OpAttr mapSideRS = null;
OpAttr reduceSideGB1 = null;
OpAttr reduceSideRS = null;
OpAttr reduceSideGB2 = null;
// 1. Insert MapSide GB
mapSideGB = genMapSideGB(inputOpAf, gbInfo);
// 2. Insert MapSide RS
mapSideRS = genMapSideGBRS(mapSideGB, gbInfo);
// 3. Insert ReduceSide GB1
boolean computeGrpSet = (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT) ? false : true;
reduceSideGB1 = genReduceSideGB1(mapSideRS, gbInfo, computeGrpSet, false, GroupByDesc.Mode.PARTIALS);
// 4. Insert RS on reduce side with Reduce side GB as input
reduceSideRS = genReduceGBRS(reduceSideGB1, gbInfo);
// 5. Insert ReduceSide GB2
reduceSideGB2 = genReduceSideGB2(reduceSideRS, gbInfo);
return reduceSideGB2;
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.
the class HiveGBOpConvUtil method genReduceSideGB1.
private static OpAttr genReduceSideGB1(OpAttr inputOpAf, GBInfo gbInfo, boolean computeGrpSet, boolean propagateConstInDistinctUDAF, GroupByDesc.Mode gbMode) throws SemanticException {
ArrayList<String> outputColNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
String colOutputName = null;
ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0);
List<ColumnInfo> rsColInfoLst = rs.getSchema().getSignature();
ColumnInfo ci;
boolean finalGB = (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB);
// 1. Build GB Keys, grouping set starting position
// 1.1 First Add original GB Keys
ArrayList<ExprNodeDesc> gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, gbInfo.gbKeys.size() - 1, false, false);
for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
ci = rsColInfoLst.get(i);
if (finalGB) {
colOutputName = gbInfo.outputColNames.get(i);
} else {
colOutputName = SemanticAnalyzer.getColumnInternalName(i);
}
outputColNames.add(colOutputName);
colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), "", false));
colExprMap.put(colOutputName, gbKeys.get(i));
}
// 1.2 Add GrpSet Col
int groupingSetsColPosition = -1;
if ((!finalGB && gbInfo.grpSets.size() > 0) || (finalGB && gbInfo.grpIdFunctionNeeded)) {
groupingSetsColPosition = gbInfo.gbKeys.size();
if (computeGrpSet) {
// GrpSet Col needs to be constructed
gbKeys.add(new ExprNodeConstantDesc("0L"));
} else {
// GrpSet Col already part of input RS
// TODO: Can't we just copy the ExprNodeDEsc from input (Do we need to
// explicitly set table alias to null & VC to false
gbKeys.addAll(ExprNodeDescUtils.genExprNodeDesc(rs, groupingSetsColPosition, groupingSetsColPosition, false, true));
}
colOutputName = SemanticAnalyzer.getColumnInternalName(groupingSetsColPosition);
if (finalGB) {
colOutputName = gbInfo.outputColNames.get(gbInfo.outputColNames.size() - 1);
}
outputColNames.add(colOutputName);
colInfoLst.add(new ColumnInfo(colOutputName, TypeInfoFactory.stringTypeInfo, null, true));
colExprMap.put(colOutputName, gbKeys.get(groupingSetsColPosition));
}
// 2. Walk through UDAF and add them to GB
String lastReduceKeyColName = null;
if (!rs.getConf().getOutputKeyColumnNames().isEmpty()) {
lastReduceKeyColName = rs.getConf().getOutputKeyColumnNames().get(rs.getConf().getOutputKeyColumnNames().size() - 1);
}
int numDistinctUDFs = 0;
int distinctStartPosInReduceKeys = gbKeys.size();
List<ExprNodeDesc> reduceValues = rs.getConf().getValueCols();
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
int udafColStartPosInOriginalGB = (gbInfo.grpSets.size() > 0) ? gbInfo.gbKeys.size() * 2 : gbInfo.gbKeys.size();
int udafColStartPosInRS = rs.getConf().getKeyCols().size();
for (int i = 0; i < gbInfo.udafAttrs.size(); i++) {
UDAFAttrs udafAttr = gbInfo.udafAttrs.get(i);
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
if (udafAttr.isDistinctUDAF) {
ColumnInfo rsDistUDAFParamColInfo;
ExprNodeDesc distinctUDAFParam;
ExprNodeDesc constantPropDistinctUDAFParam;
for (int j = 0; j < udafAttr.udafParamsIndxInGBInfoDistExprs.size(); j++) {
rsDistUDAFParamColInfo = rsColInfoLst.get(distinctStartPosInReduceKeys + j);
String rsDistUDAFParamName = rsDistUDAFParamColInfo.getInternalName();
// TODO: verify if this is needed
if (lastReduceKeyColName != null) {
rsDistUDAFParamName = Utilities.ReduceField.KEY.name() + "." + lastReduceKeyColName + ":" + numDistinctUDFs + "." + SemanticAnalyzer.getColumnInternalName(j);
}
distinctUDAFParam = new ExprNodeColumnDesc(rsDistUDAFParamColInfo.getType(), rsDistUDAFParamName, rsDistUDAFParamColInfo.getTabAlias(), rsDistUDAFParamColInfo.getIsVirtualCol());
if (propagateConstInDistinctUDAF) {
// TODO: Implement propConstDistUDAFParams
constantPropDistinctUDAFParam = SemanticAnalyzer.isConstantParameterInAggregationParameters(rsDistUDAFParamColInfo.getInternalName(), reduceValues);
if (constantPropDistinctUDAFParam != null) {
distinctUDAFParam = constantPropDistinctUDAFParam;
}
}
aggParameters.add(distinctUDAFParam);
}
numDistinctUDFs++;
} else {
aggParameters.add(new ExprNodeColumnDesc(rsColInfoLst.get(udafColStartPosInRS + i)));
}
Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(gbMode, udafAttr.isDistinctUDAF);
GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, aggParameters);
aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, (gbMode != GroupByDesc.Mode.FINAL && udafAttr.isDistinctUDAF), udafMode));
if (finalGB) {
colOutputName = gbInfo.outputColNames.get(udafColStartPosInOriginalGB + i);
} else {
colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() - 1);
}
colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false));
outputColNames.add(colOutputName);
}
// Nothing special needs to be done for grouping sets if
// this is the final group by operator, and multiple rows corresponding to
// the
// grouping sets have been generated upstream.
// However, if an addition MR job has been created to handle grouping sets,
// additional rows corresponding to grouping sets need to be created here.
// TODO: Clean up/refactor assumptions
boolean includeGrpSetInGBDesc = (gbInfo.grpSets.size() > 0) && !finalGB && !(gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT);
Operator rsGBOp = OperatorFactory.getAndMakeChild(new GroupByDesc(gbMode, outputColNames, gbKeys, aggregations, gbInfo.groupByMemoryUsage, gbInfo.memoryThreshold, gbInfo.minReductionHashAggr, gbInfo.minReductionHashAggrLowerBound, gbInfo.grpSets, includeGrpSetInGBDesc, groupingSetsColPosition, gbInfo.containsDistinctAggr), new RowSchema(colInfoLst), rs);
rsGBOp.setColumnExprMap(colExprMap);
return new OpAttr("", new HashSet<Integer>(), rsGBOp);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.
the class HiveGBOpConvUtil method genNoMapSideGBNoSkew.
/**
* RS-Gb1
*
* @param inputOpAf
* @param aggRel
* @param gbInfo
* @return
* @throws SemanticException
*/
private static OpAttr genNoMapSideGBNoSkew(OpAttr inputOpAf, HiveAggregate aggRel, GBInfo gbInfo) throws SemanticException {
OpAttr mapSideRS = null;
OpAttr reduceSideGB1NoMapGB = null;
// 1. Insert MapSide RS
mapSideRS = genMapSideRS(inputOpAf, gbInfo);
// 2. Insert ReduceSide GB
reduceSideGB1NoMapGB = genReduceSideGB1NoMapGB(mapSideRS, gbInfo, GroupByDesc.Mode.COMPLETE);
return reduceSideGB1NoMapGB;
}
Aggregations