use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class SemanticAnalyzer method genMapGroupByForSemijoin.
private // the
Operator genMapGroupByForSemijoin(// the
QB qb, // the
ArrayList<ASTNode> fields, // "tab.col"
Operator inputOperatorInfo, GroupByDesc.Mode mode) throws SemanticException {
RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo).getRowResolver();
RowResolver groupByOutputRowResolver = new RowResolver();
ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
ArrayList<String> outputColumnNames = new ArrayList<String>();
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
qb.getParseInfo();
// join keys should only
groupByOutputRowResolver.setIsExprResolver(true);
for (int i = 0; i < fields.size(); ++i) {
// get the group by keys to ColumnInfo
ASTNode colName = fields.get(i);
ExprNodeDesc grpByExprNode = genExprNodeDesc(colName, groupByInputRowResolver);
groupByKeys.add(grpByExprNode);
// generate output column names
String field = getColumnInternalName(i);
outputColumnNames.add(field);
ColumnInfo colInfo2 = new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false);
groupByOutputRowResolver.putExpression(colName, colInfo2);
// establish mapping from the output column to the input column
colExprMap.put(field, grpByExprNode);
}
// Generate group-by operator
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, null, false, -1, false), new RowSchema(groupByOutputRowResolver.getColumnInfos()), inputOperatorInfo), groupByOutputRowResolver);
op.setColumnExprMap(colExprMap);
return op;
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class SemanticAnalyzer method insertSelectForSemijoin.
/**
* Construct a selection operator for semijoin that filter out all fields
* other than the group by keys.
*
* @param fields
* list of fields need to be output
* @param input
* input operator
* @return the selection operator.
* @throws SemanticException
*/
private Operator insertSelectForSemijoin(ArrayList<ASTNode> fields, Operator input) throws SemanticException {
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
ArrayList<String> outputColumnNames = new ArrayList<String>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
RowResolver outputRR = new RowResolver();
// construct the list of columns that need to be projected
for (int i = 0; i < fields.size(); ++i) {
ASTNode field = fields.get(i);
ExprNodeDesc exprNode = genExprNodeDesc(field, inputRR);
String colName = getColumnInternalName(i);
outputColumnNames.add(colName);
ColumnInfo colInfo = new ColumnInfo(colName, exprNode.getTypeInfo(), "", false);
outputRR.putExpression(field, colInfo);
colList.add(exprNode);
colExprMap.put(colName, exprNode);
}
// create selection operator
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList, outputColumnNames, false), new RowSchema(outputRR.getColumnInfos()), input), outputRR);
output.setColumnExprMap(colExprMap);
return output;
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class HiveGBOpConvUtil method genReduceSideGB2.
private static OpAttr genReduceSideGB2(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
ArrayList<String> outputColNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
String colOutputName = null;
ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0);
List<ColumnInfo> rsColInfoLst = rs.getSchema().getSignature();
ColumnInfo ci;
// 1. Build GB Keys, grouping set starting position
// 1.1 First Add original GB Keys
ArrayList<ExprNodeDesc> gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, gbInfo.gbKeys.size() - 1, false, false);
for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
ci = rsColInfoLst.get(i);
colOutputName = gbInfo.outputColNames.get(i);
outputColNames.add(colOutputName);
colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), "", false));
colExprMap.put(colOutputName, gbKeys.get(i));
}
// 1.2 Add GrpSet Col
int groupingSetsPosition = -1;
if (inclGrpSetInReduceSide(gbInfo) && gbInfo.grpIdFunctionNeeded) {
groupingSetsPosition = gbKeys.size();
ExprNodeDesc grpSetColExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, rsColInfoLst.get(groupingSetsPosition).getInternalName(), null, false);
gbKeys.add(grpSetColExpr);
colOutputName = gbInfo.outputColNames.get(gbInfo.outputColNames.size() - 1);
;
outputColNames.add(colOutputName);
colInfoLst.add(new ColumnInfo(colOutputName, TypeInfoFactory.stringTypeInfo, null, true));
colExprMap.put(colOutputName, grpSetColExpr);
}
// 2. Add UDAF
UDAFAttrs udafAttr;
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
int udafStartPosInGBInfOutputColNames = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() : gbInfo.gbKeys.size() * 2;
int udafStartPosInInputRS = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() : gbInfo.gbKeys.size() + 1;
for (int i = 0; i < gbInfo.udafAttrs.size(); i++) {
udafAttr = gbInfo.udafAttrs.get(i);
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
aggParameters.add(new ExprNodeColumnDesc(rsColInfoLst.get(udafStartPosInInputRS + i)));
colOutputName = gbInfo.outputColNames.get(udafStartPosInGBInfOutputColNames + i);
outputColNames.add(colOutputName);
Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.FINAL, udafAttr.isDistinctUDAF);
GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, aggParameters);
aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, false, udafMode));
colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false));
}
Operator rsGBOp2 = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.FINAL, outputColNames, gbKeys, aggregations, false, gbInfo.groupByMemoryUsage, gbInfo.memoryThreshold, null, false, groupingSetsPosition, gbInfo.containsDistinctAggr), new RowSchema(colInfoLst), rs);
rsGBOp2.setColumnExprMap(colExprMap);
// TODO: Shouldn't we propgate vc? is it vc col from tab or all vc
return new OpAttr("", new HashSet<Integer>(), rsGBOp2);
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class HiveOpConverter method genInputSelectForUnion.
private Operator<? extends OperatorDesc> genInputSelectForUnion(Operator<? extends OperatorDesc> origInputOp, ArrayList<ColumnInfo> uColumnInfo) throws SemanticException {
Iterator<ColumnInfo> oIter = origInputOp.getSchema().getSignature().iterator();
Iterator<ColumnInfo> uIter = uColumnInfo.iterator();
List<ExprNodeDesc> columns = new ArrayList<ExprNodeDesc>();
List<String> colName = new ArrayList<String>();
Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
boolean needSelectOp = false;
while (oIter.hasNext()) {
ColumnInfo oInfo = oIter.next();
ColumnInfo uInfo = uIter.next();
if (!oInfo.isSameColumnForRR(uInfo)) {
needSelectOp = true;
}
ExprNodeDesc column = new ExprNodeColumnDesc(oInfo.getType(), oInfo.getInternalName(), oInfo.getTabAlias(), oInfo.getIsVirtualCol(), oInfo.isSkewedCol());
if (!oInfo.getType().equals(uInfo.getType())) {
column = ParseUtils.createConversionCast(column, (PrimitiveTypeInfo) uInfo.getType());
}
columns.add(column);
colName.add(uInfo.getInternalName());
columnExprMap.put(uInfo.getInternalName(), column);
}
if (needSelectOp) {
return OperatorFactory.getAndMakeChild(new SelectDesc(columns, colName), new RowSchema(uColumnInfo), columnExprMap, origInputOp);
} else {
return origInputOp;
}
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class HiveOpConverter method buildBacktrackFromReduceSinkForJoin.
private static Map<String, ExprNodeDesc> buildBacktrackFromReduceSinkForJoin(int initialPos, List<String> outputColumnNames, List<String> keyColNames, List<String> valueColNames, int[] index, Operator<?> inputOp, String tabAlias) {
Map<String, ExprNodeDesc> columnDescriptors = new LinkedHashMap<String, ExprNodeDesc>();
for (int i = 0; i < index.length; i++) {
ColumnInfo info = new ColumnInfo(inputOp.getSchema().getSignature().get(i));
String field;
if (index[i] >= 0) {
field = Utilities.ReduceField.KEY + "." + keyColNames.get(index[i]);
} else {
field = Utilities.ReduceField.VALUE + "." + valueColNames.get(-index[i] - 1);
}
ExprNodeColumnDesc desc = new ExprNodeColumnDesc(info.getType(), field, tabAlias, info.getIsVirtualCol());
columnDescriptors.put(outputColumnNames.get(initialPos + i), desc);
}
return columnDescriptors;
}
Aggregations