use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class HiveGBOpConvUtil method genMapSideGB.
@SuppressWarnings("unchecked")
private static OpAttr genMapSideGB(OpAttr inputOpAf, GBInfo gbAttrs) throws SemanticException {
ArrayList<String> outputColNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
Set<String> gbKeyColsAsNamesFrmIn = new HashSet<String>();
String colOutputName = null;
// 1. Build GB Keys, grouping set starting position
// 1.1 First Add original GB Keys
ArrayList<ExprNodeDesc> gbKeys = new ArrayList<ExprNodeDesc>();
for (int i = 0; i < gbAttrs.gbKeys.size(); i++) {
gbKeys.add(gbAttrs.gbKeys.get(i));
colOutputName = SemanticAnalyzer.getColumnInternalName(i);
colInfoLst.add(new ColumnInfo(colOutputName, gbAttrs.gbKeyTypes.get(i), "", false));
outputColNames.add(colOutputName);
gbKeyColsAsNamesFrmIn.add(gbAttrs.gbKeyColNamesInInput.get(i));
colExprMap.put(colOutputName, gbKeys.get(i));
}
// 1.2. Adjust GroupingSet Position, GBKeys for GroupingSet Position if
// needed. NOTE: GroupingID is added to map side GB only if we don't GrpSet
// doesn't require additional MR Jobs
int groupingSetsPosition = -1;
boolean inclGrpID = inclGrpSetInMapSide(gbAttrs);
if (inclGrpID) {
groupingSetsPosition = gbKeys.size();
addGrpSetCol(true, null, false, gbKeys, outputColNames, colInfoLst, colExprMap);
}
// gen would have prevented it)
for (int i = 0; i < gbAttrs.distExprNodes.size(); i++) {
if (!gbKeyColsAsNamesFrmIn.contains(gbAttrs.distExprNames.get(i))) {
gbKeys.add(gbAttrs.distExprNodes.get(i));
colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() - 1);
colInfoLst.add(new ColumnInfo(colOutputName, gbAttrs.distExprTypes.get(i), "", false));
outputColNames.add(colOutputName);
gbKeyColsAsNamesFrmIn.add(gbAttrs.distExprNames.get(i));
colExprMap.put(colOutputName, gbKeys.get(gbKeys.size() - 1));
}
}
// 2. Build Aggregations
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
for (UDAFAttrs udafAttr : gbAttrs.udafAttrs) {
Mode amode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.HASH, udafAttr.isDistinctUDAF);
aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udafAttr.udafEvaluator, udafAttr.udafParams, udafAttr.isDistinctUDAF, amode));
GenericUDAFInfo udafInfo;
try {
udafInfo = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, amode, udafAttr.udafParams);
} catch (SemanticException e) {
throw new RuntimeException(e);
}
colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() - 1);
colInfoLst.add(new ColumnInfo(colOutputName, udafInfo.returnType, "", false));
outputColNames.add(colOutputName);
}
// 3. Create GB
@SuppressWarnings("rawtypes") Operator gbOp = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.HASH, outputColNames, gbKeys, aggregations, false, gbAttrs.groupByMemoryUsage, gbAttrs.memoryThreshold, gbAttrs.grpSets, inclGrpID, groupingSetsPosition, gbAttrs.containsDistinctAggr), new RowSchema(colInfoLst), inputOpAf.inputs.get(0));
// 5. Setup Expr Col Map
// NOTE: UDAF is not included in ExprColMap
gbOp.setColumnExprMap(colExprMap);
return new OpAttr("", new HashSet<Integer>(), gbOp);
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class HiveGBOpConvUtil method getReduceKeysForRS.
/**
* Get Reduce Keys for RS following MapSide GB
*
* @param reduceKeys
* assumed to be deduped list of exprs
* @param outputKeyColumnNames
* @param colExprMap
* @return List of ExprNodeDesc of ReduceKeys
* @throws SemanticException
*/
private static ArrayList<ExprNodeDesc> getReduceKeysForRS(Operator inOp, int startPos, int endPos, List<String> outputKeyColumnNames, boolean addOnlyOneKeyColName, ArrayList<ColumnInfo> colInfoLst, Map<String, ExprNodeDesc> colExprMap, boolean addEmptyTabAlias, boolean setColToNonVirtual) throws SemanticException {
ArrayList<ExprNodeDesc> reduceKeys = null;
if (endPos < 0) {
reduceKeys = new ArrayList<ExprNodeDesc>();
} else {
reduceKeys = ExprNodeDescUtils.genExprNodeDesc(inOp, startPos, endPos, addEmptyTabAlias, setColToNonVirtual);
int outColNameIndx = startPos;
for (int i = 0; i < reduceKeys.size(); ++i) {
String outputColName = SemanticAnalyzer.getColumnInternalName(outColNameIndx);
outColNameIndx++;
if (!addOnlyOneKeyColName || i == 0) {
outputKeyColumnNames.add(outputColName);
}
// TODO: Verify if this is needed (Why can't it be always null/empty
String tabAlias = addEmptyTabAlias ? "" : null;
ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + outputColName, reduceKeys.get(i).getTypeInfo(), tabAlias, false);
colInfoLst.add(colInfo);
colExprMap.put(colInfo.getInternalName(), reduceKeys.get(i));
}
}
return reduceKeys;
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class HiveGBOpConvUtil method getValueKeysForRS.
/**
* Get Value Keys for RS following MapSide GB
*
* @param GroupByOperator
* MapSide GB
* @param outputKeyColumnNames
* @param colExprMap
* @return List of ExprNodeDesc of Values
* @throws SemanticException
*/
private static ArrayList<ExprNodeDesc> getValueKeysForRS(Operator inOp, int aggStartPos, List<String> outputKeyColumnNames, ArrayList<ColumnInfo> colInfoLst, Map<String, ExprNodeDesc> colExprMap, boolean addEmptyTabAlias, boolean setColToNonVirtual) throws SemanticException {
List<ColumnInfo> mapGBColInfoLst = inOp.getSchema().getSignature();
ArrayList<ExprNodeDesc> valueKeys = null;
if (aggStartPos >= mapGBColInfoLst.size()) {
valueKeys = new ArrayList<ExprNodeDesc>();
} else {
valueKeys = ExprNodeDescUtils.genExprNodeDesc(inOp, aggStartPos, mapGBColInfoLst.size() - 1, true, setColToNonVirtual);
for (int i = 0; i < valueKeys.size(); ++i) {
String outputColName = SemanticAnalyzer.getColumnInternalName(i);
outputKeyColumnNames.add(outputColName);
// TODO: Verify if this is needed (Why can't it be always null/empty
String tabAlias = addEmptyTabAlias ? "" : null;
ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + outputColName, valueKeys.get(i).getTypeInfo(), tabAlias, false);
colInfoLst.add(colInfo);
colExprMap.put(colInfo.getInternalName(), valueKeys.get(i));
}
}
return valueKeys;
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class HiveGBOpConvUtil method genReduceGBRS.
private static OpAttr genReduceGBRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
ArrayList<String> outputColumnNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
GroupByOperator reduceSideGB1 = (GroupByOperator) inputOpAf.inputs.get(0);
List<ColumnInfo> gb1ColInfoLst = reduceSideGB1.getSchema().getSignature();
ArrayList<ExprNodeDesc> reduceKeys = getReduceKeysForRS(reduceSideGB1, 0, gbInfo.gbKeys.size() - 1, outputColumnNames, false, colInfoLst, colExprMap, true, true);
if (inclGrpSetInReduceSide(gbInfo)) {
addGrpSetCol(false, gb1ColInfoLst.get(reduceKeys.size()).getInternalName(), true, reduceKeys, outputColumnNames, colInfoLst, colExprMap);
}
ArrayList<ExprNodeDesc> reduceValues = getValueKeysForRS(reduceSideGB1, reduceSideGB1.getConf().getKeys().size(), outputColumnNames, colInfoLst, colExprMap, true, true);
ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, getNumPartFieldsForReduceSideRS(gbInfo), getParallelismForReduceSideRS(gbInfo), AcidUtils.Operation.NOT_ACID), new RowSchema(colInfoLst), reduceSideGB1);
rsOp.setColumnExprMap(colExprMap);
return new OpAttr("", new HashSet<Integer>(), rsOp);
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class SubQueryUtils method buildSQJoinExpr.
/*
* construct the ASTNode for the SQ column that will join with the OuterQuery Expression.
* So for 'select ... from R1 where A in (select B from R2...)'
* this will build (. (TOK_TABLE_OR_COL Identifier[SQ_1]) Identifier[B])
* where 'SQ_1' is the alias generated for the SubQuery.
*/
static ASTNode buildSQJoinExpr(String sqAlias, RowResolver sqRR) {
List<ColumnInfo> signature = sqRR.getRowSchema().getSignature();
ColumnInfo joinColumn = signature.get(0);
String[] joinColName = sqRR.reverseLookup(joinColumn.getInternalName());
return createColRefAST(sqAlias, joinColName[1]);
}
Aggregations