use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class HiveOpConverter method createColInfos.
private static Pair<ArrayList<ColumnInfo>, Set<Integer>> createColInfos(List<RexNode> calciteExprs, List<ExprNodeDesc> hiveExprs, List<String> projNames, OpAttr inpOpAf) {
if (hiveExprs.size() != projNames.size()) {
throw new RuntimeException("Column expressions list doesn't match Column Names list");
}
RexNode rexN;
ExprNodeDesc pe;
ArrayList<ColumnInfo> colInfos = new ArrayList<ColumnInfo>();
boolean vc;
Set<Integer> newVColSet = new HashSet<Integer>();
for (int i = 0; i < hiveExprs.size(); i++) {
pe = hiveExprs.get(i);
rexN = calciteExprs.get(i);
vc = false;
if (rexN instanceof RexInputRef) {
if (inpOpAf.vcolsInCalcite.contains(((RexInputRef) rexN).getIndex())) {
newVColSet.add(i);
vc = true;
}
}
colInfos.add(new ColumnInfo(projNames.get(i), pe.getTypeInfo(), inpOpAf.tabAlias, vc));
}
return new Pair<ArrayList<ColumnInfo>, Set<Integer>>(colInfos, newVColSet);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class HiveOpConverter method visit.
OpAttr visit(HiveSortExchange exchangeRel) throws SemanticException {
OpAttr inputOpAf = dispatch(exchangeRel.getInput());
String tabAlias = inputOpAf.tabAlias;
if (tabAlias == null || tabAlias.length() == 0) {
tabAlias = getHiveDerivedTableAlias();
}
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + exchangeRel.getId() + ":" + exchangeRel.getRelTypeName() + " with row type: [" + exchangeRel.getRowType() + "]");
}
RelDistribution distribution = exchangeRel.getDistribution();
if (distribution.getType() != Type.HASH_DISTRIBUTED) {
throw new SemanticException("Only hash distribution supported for LogicalExchange");
}
ExprNodeDesc[] expressions = new ExprNodeDesc[exchangeRel.getJoinKeys().size()];
for (int index = 0; index < exchangeRel.getJoinKeys().size(); index++) {
expressions[index] = convertToExprNode(exchangeRel.getJoinKeys().get(index), exchangeRel.getInput(), inputOpAf.tabAlias, inputOpAf);
}
exchangeRel.setJoinExpressions(expressions);
ReduceSinkOperator rsOp = genReduceSink(inputOpAf.inputs.get(0), tabAlias, expressions, -1, -1, Operation.NOT_ACID, hiveConf);
return new OpAttr(tabAlias, inputOpAf.vcolsInCalcite, rsOp);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class RewriteQueryUsingAggregateIndexCtx method replaceGroupByOperatorProcess.
/**
* We need to replace the count(indexed_column_key) GenericUDAF aggregation
* function for group-by construct to "sum" GenericUDAF. This method creates a
* new operator tree for a sample query that creates a GroupByOperator with
* sum aggregation function and uses that GroupByOperator information to
* replace the original GroupByOperator aggregation information. It replaces
* the AggregationDesc (aggregation descriptor) of the old GroupByOperator
* with the new Aggregation Desc of the new GroupByOperator.
* @return
*/
private void replaceGroupByOperatorProcess(GroupByOperator operator, int index) throws SemanticException {
RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this;
// We need to replace the GroupByOperator which is before RS
if (index == 0) {
// the query contains the sum aggregation GenericUDAF
String selReplacementCommand = "select sum(`" + rewriteQueryCtx.getAggregateFunction() + "`)" + " from `" + rewriteQueryCtx.getIndexName() + "` group by " + rewriteQueryCtx.getIndexKey() + " ";
// retrieve the operator tree for the query, and the required GroupByOperator from it
Operator<?> newOperatorTree = RewriteParseContextGenerator.generateOperatorTree(rewriteQueryCtx.getParseContext().getQueryState(), selReplacementCommand);
// we get our new GroupByOperator here
GroupByOperator newGbyOperator = OperatorUtils.findLastOperatorUpstream(newOperatorTree, GroupByOperator.class);
if (newGbyOperator == null) {
throw new SemanticException("Error replacing GroupBy operator.");
}
// we need this information to set the correct colList, outputColumnNames
// in SelectOperator
ExprNodeColumnDesc aggrExprNode = null;
// Construct the new AggregationDesc to get rid of the current
// internal names and replace them with new internal names
// as required by the operator tree
GroupByDesc newConf = newGbyOperator.getConf();
List<AggregationDesc> newAggrList = newConf.getAggregators();
if (newAggrList != null && newAggrList.size() > 0) {
for (AggregationDesc aggregationDesc : newAggrList) {
rewriteQueryCtx.setEval(aggregationDesc.getGenericUDAFEvaluator());
aggrExprNode = (ExprNodeColumnDesc) aggregationDesc.getParameters().get(0);
rewriteQueryCtx.setAggrExprNode(aggrExprNode);
}
}
// Now the GroupByOperator has the new AggregationList;
// sum(`_count_of_indexed_key`)
// instead of count(indexed_key)
GroupByDesc oldConf = operator.getConf();
oldConf.setAggregators((ArrayList<AggregationDesc>) newAggrList);
operator.setConf(oldConf);
} else {
// we just need to reset the GenericUDAFEvaluator and its name for this
// GroupByOperator whose parent is the ReduceSinkOperator
GroupByDesc childConf = operator.getConf();
List<AggregationDesc> childAggrList = childConf.getAggregators();
if (childAggrList != null && childAggrList.size() > 0) {
for (AggregationDesc aggregationDesc : childAggrList) {
List<ExprNodeDesc> paraList = aggregationDesc.getParameters();
List<ObjectInspector> parametersOIList = new ArrayList<ObjectInspector>();
for (ExprNodeDesc expr : paraList) {
parametersOIList.add(expr.getWritableObjectInspector());
}
GenericUDAFEvaluator evaluator = FunctionRegistry.getGenericUDAFEvaluator("sum", parametersOIList, false, false);
aggregationDesc.setGenericUDAFEvaluator(evaluator);
aggregationDesc.setGenericUDAFName("sum");
}
}
}
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class HiveGBOpConvUtil method genMapSideGB.
@SuppressWarnings("unchecked")
private static OpAttr genMapSideGB(OpAttr inputOpAf, GBInfo gbAttrs) throws SemanticException {
ArrayList<String> outputColNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
Set<String> gbKeyColsAsNamesFrmIn = new HashSet<String>();
String colOutputName = null;
// 1. Build GB Keys, grouping set starting position
// 1.1 First Add original GB Keys
ArrayList<ExprNodeDesc> gbKeys = new ArrayList<ExprNodeDesc>();
for (int i = 0; i < gbAttrs.gbKeys.size(); i++) {
gbKeys.add(gbAttrs.gbKeys.get(i));
colOutputName = SemanticAnalyzer.getColumnInternalName(i);
colInfoLst.add(new ColumnInfo(colOutputName, gbAttrs.gbKeyTypes.get(i), "", false));
outputColNames.add(colOutputName);
gbKeyColsAsNamesFrmIn.add(gbAttrs.gbKeyColNamesInInput.get(i));
colExprMap.put(colOutputName, gbKeys.get(i));
}
// 1.2. Adjust GroupingSet Position, GBKeys for GroupingSet Position if
// needed. NOTE: GroupingID is added to map side GB only if we don't GrpSet
// doesn't require additional MR Jobs
int groupingSetsPosition = -1;
boolean inclGrpID = inclGrpSetInMapSide(gbAttrs);
if (inclGrpID) {
groupingSetsPosition = gbKeys.size();
addGrpSetCol(true, null, false, gbKeys, outputColNames, colInfoLst, colExprMap);
}
// gen would have prevented it)
for (int i = 0; i < gbAttrs.distExprNodes.size(); i++) {
if (!gbKeyColsAsNamesFrmIn.contains(gbAttrs.distExprNames.get(i))) {
gbKeys.add(gbAttrs.distExprNodes.get(i));
colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() - 1);
colInfoLst.add(new ColumnInfo(colOutputName, gbAttrs.distExprTypes.get(i), "", false));
outputColNames.add(colOutputName);
gbKeyColsAsNamesFrmIn.add(gbAttrs.distExprNames.get(i));
colExprMap.put(colOutputName, gbKeys.get(gbKeys.size() - 1));
}
}
// 2. Build Aggregations
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
for (UDAFAttrs udafAttr : gbAttrs.udafAttrs) {
Mode amode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.HASH, udafAttr.isDistinctUDAF);
aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udafAttr.udafEvaluator, udafAttr.udafParams, udafAttr.isDistinctUDAF, amode));
GenericUDAFInfo udafInfo;
try {
udafInfo = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, amode, udafAttr.udafParams);
} catch (SemanticException e) {
throw new RuntimeException(e);
}
colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() - 1);
colInfoLst.add(new ColumnInfo(colOutputName, udafInfo.returnType, "", false));
outputColNames.add(colOutputName);
}
// 3. Create GB
@SuppressWarnings("rawtypes") Operator gbOp = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.HASH, outputColNames, gbKeys, aggregations, false, gbAttrs.groupByMemoryUsage, gbAttrs.memoryThreshold, gbAttrs.grpSets, inclGrpID, groupingSetsPosition, gbAttrs.containsDistinctAggr), new RowSchema(colInfoLst), inputOpAf.inputs.get(0));
// 5. Setup Expr Col Map
// NOTE: UDAF is not included in ExprColMap
gbOp.setColumnExprMap(colExprMap);
return new OpAttr("", new HashSet<Integer>(), gbOp);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class HiveGBOpConvUtil method getReduceKeysForRS.
/**
* Get Reduce Keys for RS following MapSide GB
*
* @param reduceKeys
* assumed to be deduped list of exprs
* @param outputKeyColumnNames
* @param colExprMap
* @return List of ExprNodeDesc of ReduceKeys
* @throws SemanticException
*/
private static ArrayList<ExprNodeDesc> getReduceKeysForRS(Operator inOp, int startPos, int endPos, List<String> outputKeyColumnNames, boolean addOnlyOneKeyColName, ArrayList<ColumnInfo> colInfoLst, Map<String, ExprNodeDesc> colExprMap, boolean addEmptyTabAlias, boolean setColToNonVirtual) throws SemanticException {
ArrayList<ExprNodeDesc> reduceKeys = null;
if (endPos < 0) {
reduceKeys = new ArrayList<ExprNodeDesc>();
} else {
reduceKeys = ExprNodeDescUtils.genExprNodeDesc(inOp, startPos, endPos, addEmptyTabAlias, setColToNonVirtual);
int outColNameIndx = startPos;
for (int i = 0; i < reduceKeys.size(); ++i) {
String outputColName = SemanticAnalyzer.getColumnInternalName(outColNameIndx);
outColNameIndx++;
if (!addOnlyOneKeyColName || i == 0) {
outputKeyColumnNames.add(outputColName);
}
// TODO: Verify if this is needed (Why can't it be always null/empty
String tabAlias = addEmptyTabAlias ? "" : null;
ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + outputColName, reduceKeys.get(i).getTypeInfo(), tabAlias, false);
colInfoLst.add(colInfo);
colExprMap.put(colInfo.getInternalName(), reduceKeys.get(i));
}
}
return reduceKeys;
}
Aggregations