use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class HiveGBOpConvUtil method genReduceSideGB1NoMapGB.
/**
* RS-GB0
*
* @param inputOpAf
* @param gbInfo
* @param gbMode
* @return
* @throws SemanticException
*/
private static OpAttr genReduceSideGB1NoMapGB(OpAttr inputOpAf, GBInfo gbInfo, GroupByDesc.Mode gbMode) throws SemanticException {
ArrayList<String> outputColNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
String colOutputName = null;
ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0);
List<ColumnInfo> rsColInfoLst = rs.getSchema().getSignature();
ColumnInfo ci;
boolean useOriginalGBNames = (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.NO_MAP_SIDE_GB_NO_SKEW);
// 1. Build GB Keys, grouping set starting position
// 1.1 First Add original GB Keys
ArrayList<ExprNodeDesc> gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, gbInfo.gbKeys.size() - 1, true, false);
for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
ci = rsColInfoLst.get(i);
if (useOriginalGBNames) {
colOutputName = gbInfo.outputColNames.get(i);
} else {
colOutputName = SemanticAnalyzer.getColumnInternalName(i);
}
outputColNames.add(colOutputName);
colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), null, false));
colExprMap.put(colOutputName, gbKeys.get(i));
}
// 2. Walk through UDAF and add them to GB
String lastReduceKeyColName = null;
if (!rs.getConf().getOutputKeyColumnNames().isEmpty()) {
lastReduceKeyColName = rs.getConf().getOutputKeyColumnNames().get(rs.getConf().getOutputKeyColumnNames().size() - 1);
}
int numDistinctUDFs = 0;
List<ExprNodeDesc> reduceValues = rs.getConf().getValueCols();
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
int udafColStartPosInOriginalGB = gbInfo.gbKeys.size();
// the positions in rsColInfoLst are as follows
// --grpkey--,--distkey--,--values--
// but distUDAF may be before/after some non-distUDAF,
// i.e., their positions can be mixed.
// so for all UDAF we first check to see if it is groupby key, if not is it distinct key
// if not it should be value
List<Integer> distinctPositions = new ArrayList<>();
Map<Integer, ArrayList<ExprNodeDesc>> indexToParameter = new TreeMap<>();
for (int i = 0; i < gbInfo.udafAttrs.size(); i++) {
UDAFAttrs udafAttr = gbInfo.udafAttrs.get(i);
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
ColumnInfo rsUDAFParamColInfo;
ExprNodeDesc udafParam;
ExprNodeDesc constantPropDistinctUDAFParam;
for (int j = 0; j < udafAttr.udafParams.size(); j++) {
int argPos = getColInfoPos(udafAttr.udafParams.get(j), gbInfo);
rsUDAFParamColInfo = rsColInfoLst.get(argPos);
String rsUDAFParamName = rsUDAFParamColInfo.getInternalName();
if (udafAttr.isDistinctUDAF && lastReduceKeyColName != null) {
rsUDAFParamName = Utilities.ReduceField.KEY.name() + "." + lastReduceKeyColName + ":" + numDistinctUDFs + "." + SemanticAnalyzer.getColumnInternalName(j);
}
udafParam = new ExprNodeColumnDesc(rsUDAFParamColInfo.getType(), rsUDAFParamName, rsUDAFParamColInfo.getTabAlias(), rsUDAFParamColInfo.getIsVirtualCol());
constantPropDistinctUDAFParam = SemanticAnalyzer.isConstantParameterInAggregationParameters(rsUDAFParamColInfo.getInternalName(), reduceValues);
if (constantPropDistinctUDAFParam != null) {
udafParam = constantPropDistinctUDAFParam;
}
aggParameters.add(udafParam);
}
indexToParameter.put(i, aggParameters);
if (udafAttr.isDistinctUDAF) {
numDistinctUDFs++;
}
}
for (int index : indexToParameter.keySet()) {
UDAFAttrs udafAttr = gbInfo.udafAttrs.get(index);
Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(gbMode, udafAttr.isDistinctUDAF);
GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, indexToParameter.get(index));
aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, udafAttr.isDistinctUDAF, udafMode));
if (useOriginalGBNames) {
colOutputName = gbInfo.outputColNames.get(udafColStartPosInOriginalGB + index);
} else {
colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() - 1);
}
colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false));
outputColNames.add(colOutputName);
}
Operator rsGB1 = OperatorFactory.getAndMakeChild(new GroupByDesc(gbMode, outputColNames, gbKeys, aggregations, false, gbInfo.groupByMemoryUsage, gbInfo.memoryThreshold, null, false, -1, numDistinctUDFs > 0), new RowSchema(colInfoLst), rs);
rsGB1.setColumnExprMap(colExprMap);
return new OpAttr("", new HashSet<Integer>(), rsGB1);
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class HiveGBOpConvUtil method genMapSideRS.
private static OpAttr genMapSideRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
List<String> outputKeyColumnNames = new ArrayList<String>();
List<String> outputValueColumnNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
String outputColName;
// 1. Add GB Keys to reduce keys
ArrayList<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>();
for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
// gbInfo already has ExprNode for gbkeys
reduceKeys.add(gbInfo.gbKeys.get(i));
String colOutputName = SemanticAnalyzer.getColumnInternalName(i);
outputKeyColumnNames.add(colOutputName);
colInfoLst.add(new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + colOutputName, gbInfo.gbKeyTypes.get(i), "", false));
colExprMap.put(colOutputName, gbInfo.gbKeys.get(i));
}
// Note: GROUPING SETS are not allowed with map side aggregation set to false so we don't have to worry about it
int keyLength = reduceKeys.size();
// 2. Add Dist UDAF args to reduce keys
if (gbInfo.containsDistinctAggr) {
// TODO: Why is this needed (doesn't represent any cols)
String udafName = SemanticAnalyzer.getColumnInternalName(reduceKeys.size());
outputKeyColumnNames.add(udafName);
for (int i = 0; i < gbInfo.distExprNodes.size(); i++) {
reduceKeys.add(gbInfo.distExprNodes.get(i));
// this part of reduceKeys is later used to create column names strictly for non-distinct aggregates
// with parameters same as distinct keys which expects _col0 at the end. So we always append
// _col0 at the end instead of _col<i>
outputColName = SemanticAnalyzer.getColumnInternalName(0);
String field = Utilities.ReduceField.KEY.toString() + "." + udafName + ":" + i + "." + outputColName;
ColumnInfo colInfo = new ColumnInfo(field, gbInfo.distExprNodes.get(i).getTypeInfo(), null, false);
colInfoLst.add(colInfo);
colExprMap.put(field, gbInfo.distExprNodes.get(i));
}
}
// 3. Add UDAF args deduped to reduce values
ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
for (int i = 0; i < gbInfo.deDupedNonDistIrefs.size(); i++) {
reduceValues.add(gbInfo.deDupedNonDistIrefs.get(i));
outputColName = SemanticAnalyzer.getColumnInternalName(reduceValues.size() - 1);
outputValueColumnNames.add(outputColName);
String field = Utilities.ReduceField.VALUE.toString() + "." + outputColName;
colInfoLst.add(new ColumnInfo(field, reduceValues.get(reduceValues.size() - 1).getTypeInfo(), null, false));
colExprMap.put(field, reduceValues.get(reduceValues.size() - 1));
}
// 4. Gen RS
ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, keyLength, reduceValues, gbInfo.distColIndices, outputKeyColumnNames, outputValueColumnNames, true, -1, getNumPartFieldsForMapSideRS(gbInfo), getParallelismForMapSideRS(gbInfo), AcidUtils.Operation.NOT_ACID), new RowSchema(colInfoLst), inputOpAf.inputs.get(0));
rsOp.setColumnExprMap(colExprMap);
return new OpAttr("", new HashSet<Integer>(), rsOp);
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class HiveGBOpConvUtil method genReduceSideGB1.
private static OpAttr genReduceSideGB1(OpAttr inputOpAf, GBInfo gbInfo, boolean computeGrpSet, boolean propagateConstInDistinctUDAF, GroupByDesc.Mode gbMode) throws SemanticException {
ArrayList<String> outputColNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
String colOutputName = null;
ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0);
List<ColumnInfo> rsColInfoLst = rs.getSchema().getSignature();
ColumnInfo ci;
boolean finalGB = (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB);
// 1. Build GB Keys, grouping set starting position
// 1.1 First Add original GB Keys
ArrayList<ExprNodeDesc> gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, gbInfo.gbKeys.size() - 1, false, false);
for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
ci = rsColInfoLst.get(i);
if (finalGB) {
colOutputName = gbInfo.outputColNames.get(i);
} else {
colOutputName = SemanticAnalyzer.getColumnInternalName(i);
}
outputColNames.add(colOutputName);
colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), "", false));
colExprMap.put(colOutputName, gbKeys.get(i));
}
// 1.2 Add GrpSet Col
int groupingSetsColPosition = -1;
if ((!finalGB && gbInfo.grpSets.size() > 0) || (finalGB && gbInfo.grpIdFunctionNeeded)) {
groupingSetsColPosition = gbInfo.gbKeys.size();
if (computeGrpSet) {
// GrpSet Col needs to be constructed
gbKeys.add(new ExprNodeConstantDesc("0L"));
} else {
// GrpSet Col already part of input RS
// TODO: Can't we just copy the ExprNodeDEsc from input (Do we need to
// explicitly set table alias to null & VC to false
gbKeys.addAll(ExprNodeDescUtils.genExprNodeDesc(rs, groupingSetsColPosition, groupingSetsColPosition, false, true));
}
colOutputName = SemanticAnalyzer.getColumnInternalName(groupingSetsColPosition);
if (finalGB) {
colOutputName = gbInfo.outputColNames.get(gbInfo.outputColNames.size() - 1);
}
outputColNames.add(colOutputName);
colInfoLst.add(new ColumnInfo(colOutputName, TypeInfoFactory.stringTypeInfo, null, true));
colExprMap.put(colOutputName, gbKeys.get(groupingSetsColPosition));
}
// 2. Walk through UDAF and add them to GB
String lastReduceKeyColName = null;
if (!rs.getConf().getOutputKeyColumnNames().isEmpty()) {
lastReduceKeyColName = rs.getConf().getOutputKeyColumnNames().get(rs.getConf().getOutputKeyColumnNames().size() - 1);
}
int numDistinctUDFs = 0;
int distinctStartPosInReduceKeys = gbKeys.size();
List<ExprNodeDesc> reduceValues = rs.getConf().getValueCols();
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
int udafColStartPosInOriginalGB = (gbInfo.grpSets.size() > 0) ? gbInfo.gbKeys.size() * 2 : gbInfo.gbKeys.size();
int udafColStartPosInRS = rs.getConf().getKeyCols().size();
for (int i = 0; i < gbInfo.udafAttrs.size(); i++) {
UDAFAttrs udafAttr = gbInfo.udafAttrs.get(i);
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
if (udafAttr.isDistinctUDAF) {
ColumnInfo rsDistUDAFParamColInfo;
ExprNodeDesc distinctUDAFParam;
ExprNodeDesc constantPropDistinctUDAFParam;
for (int j = 0; j < udafAttr.udafParamsIndxInGBInfoDistExprs.size(); j++) {
rsDistUDAFParamColInfo = rsColInfoLst.get(distinctStartPosInReduceKeys + j);
String rsDistUDAFParamName = rsDistUDAFParamColInfo.getInternalName();
// TODO: verify if this is needed
if (lastReduceKeyColName != null) {
rsDistUDAFParamName = Utilities.ReduceField.KEY.name() + "." + lastReduceKeyColName + ":" + numDistinctUDFs + "." + SemanticAnalyzer.getColumnInternalName(j);
}
distinctUDAFParam = new ExprNodeColumnDesc(rsDistUDAFParamColInfo.getType(), rsDistUDAFParamName, rsDistUDAFParamColInfo.getTabAlias(), rsDistUDAFParamColInfo.getIsVirtualCol());
if (propagateConstInDistinctUDAF) {
// TODO: Implement propConstDistUDAFParams
constantPropDistinctUDAFParam = SemanticAnalyzer.isConstantParameterInAggregationParameters(rsDistUDAFParamColInfo.getInternalName(), reduceValues);
if (constantPropDistinctUDAFParam != null) {
distinctUDAFParam = constantPropDistinctUDAFParam;
}
}
aggParameters.add(distinctUDAFParam);
}
numDistinctUDFs++;
} else {
aggParameters.add(new ExprNodeColumnDesc(rsColInfoLst.get(udafColStartPosInRS + i)));
}
Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(gbMode, udafAttr.isDistinctUDAF);
GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, aggParameters);
aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, (gbMode != GroupByDesc.Mode.FINAL && udafAttr.isDistinctUDAF), udafMode));
if (finalGB) {
colOutputName = gbInfo.outputColNames.get(udafColStartPosInOriginalGB + i);
} else {
colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() - 1);
}
colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false));
outputColNames.add(colOutputName);
}
// Nothing special needs to be done for grouping sets if
// this is the final group by operator, and multiple rows corresponding to
// the
// grouping sets have been generated upstream.
// However, if an addition MR job has been created to handle grouping sets,
// additional rows corresponding to grouping sets need to be created here.
// TODO: Clean up/refactor assumptions
boolean includeGrpSetInGBDesc = (gbInfo.grpSets.size() > 0) && !finalGB && !(gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT);
Operator rsGBOp = OperatorFactory.getAndMakeChild(new GroupByDesc(gbMode, outputColNames, gbKeys, aggregations, gbInfo.groupByMemoryUsage, gbInfo.memoryThreshold, gbInfo.grpSets, includeGrpSetInGBDesc, groupingSetsColPosition, gbInfo.containsDistinctAggr), new RowSchema(colInfoLst), rs);
rsGBOp.setColumnExprMap(colExprMap);
return new OpAttr("", new HashSet<Integer>(), rsGBOp);
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class HiveOpConverter method visit.
OpAttr visit(HiveProject projectRel) throws SemanticException {
OpAttr inputOpAf = dispatch(projectRel.getInput());
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + projectRel.getId() + ":" + projectRel.getRelTypeName() + " with row type: [" + projectRel.getRowType() + "]");
}
WindowingSpec windowingSpec = new WindowingSpec();
List<String> exprNames = new ArrayList<String>(projectRel.getRowType().getFieldNames());
List<ExprNodeDesc> exprCols = new ArrayList<ExprNodeDesc>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
for (int pos = 0; pos < projectRel.getChildExps().size(); pos++) {
ExprNodeConverter converter = new ExprNodeConverter(inputOpAf.tabAlias, projectRel.getRowType().getFieldNames().get(pos), projectRel.getInput().getRowType(), projectRel.getRowType(), inputOpAf.vcolsInCalcite, projectRel.getCluster().getTypeFactory(), true);
ExprNodeDesc exprCol = projectRel.getChildExps().get(pos).accept(converter);
colExprMap.put(exprNames.get(pos), exprCol);
exprCols.add(exprCol);
// TODO: Cols that come through PTF should it retain (VirtualColumness)?
if (converter.getWindowFunctionSpec() != null) {
for (WindowFunctionSpec wfs : converter.getWindowFunctionSpec()) {
windowingSpec.addWindowFunction(wfs);
}
}
}
if (windowingSpec.getWindowExpressions() != null && !windowingSpec.getWindowExpressions().isEmpty()) {
inputOpAf = genPTF(inputOpAf, windowingSpec);
}
// TODO: is this a safe assumption (name collision, external names...)
SelectDesc sd = new SelectDesc(exprCols, exprNames);
Pair<ArrayList<ColumnInfo>, Set<Integer>> colInfoVColPair = createColInfos(projectRel.getChildExps(), exprCols, exprNames, inputOpAf);
SelectOperator selOp = (SelectOperator) OperatorFactory.getAndMakeChild(sd, new RowSchema(colInfoVColPair.getKey()), inputOpAf.inputs.get(0));
selOp.setColumnExprMap(colExprMap);
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + selOp + " with row schema: [" + selOp.getSchema() + "]");
}
return new OpAttr(inputOpAf.tabAlias, colInfoVColPair.getValue(), selOp);
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class HiveOpConverter method visit.
/**
* TODO: 1) isSamplingPred 2) sampleDesc 3) isSortedFilter
*/
OpAttr visit(HiveFilter filterRel) throws SemanticException {
OpAttr inputOpAf = dispatch(filterRel.getInput());
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + filterRel.getId() + ":" + filterRel.getRelTypeName() + " with row type: [" + filterRel.getRowType() + "]");
}
ExprNodeDesc filCondExpr = filterRel.getCondition().accept(new ExprNodeConverter(inputOpAf.tabAlias, filterRel.getInput().getRowType(), inputOpAf.vcolsInCalcite, filterRel.getCluster().getTypeFactory(), true));
FilterDesc filDesc = new FilterDesc(filCondExpr, false);
ArrayList<ColumnInfo> cinfoLst = createColInfos(inputOpAf.inputs.get(0));
FilterOperator filOp = (FilterOperator) OperatorFactory.getAndMakeChild(filDesc, new RowSchema(cinfoLst), inputOpAf.inputs.get(0));
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + filOp + " with row schema: [" + filOp.getSchema() + "]");
}
return inputOpAf.clone(filOp);
}
Aggregations