use of org.apache.hadoop.hive.ql.plan.AggregationDesc in project hive by apache.
the class SemanticAnalyzer method genGroupByPlanMapGroupByOperator.
/**
* Generate the map-side GroupByOperator for the Query Block
* (qb.getParseInfo().getXXX(dest)). The new GroupByOperator will be a child
* of the inputOperatorInfo.
*
* @param mode
* The mode of the aggregation (HASH)
* @param genericUDAFEvaluators
* If not null, this function will store the mapping from Aggregation
* StringTree to the genericUDAFEvaluator in this parameter, so it
* can be used in the next-stage GroupBy aggregations.
* @return the new GroupByOperator
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanMapGroupByOperator(QB qb, String dest, List<ASTNode> grpByExprs, Operator inputOperatorInfo, GroupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, List<Long> groupingSetKeys, boolean groupingSetsPresent) throws SemanticException {
RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo).getRowResolver();
QBParseInfo parseInfo = qb.getParseInfo();
RowResolver groupByOutputRowResolver = new RowResolver();
groupByOutputRowResolver.setIsExprResolver(true);
ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
ArrayList<String> outputColumnNames = new ArrayList<String>();
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ExprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr, groupByInputRowResolver);
if ((grpByExprNode instanceof ExprNodeColumnDesc) && ExprNodeDescUtils.indexOf(grpByExprNode, groupByKeys) >= 0) {
// Skip duplicated grouping keys, it happens when define column alias.
grpByExprs.remove(i--);
continue;
}
groupByKeys.add(grpByExprNode);
String field = getColumnInternalName(i);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(grpbyExpr, new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false));
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// The grouping set key is present after the grouping keys, before the distinct keys
int groupingSetsPosition = -1;
// for the grouping set (corresponding to the rollup).
if (groupingSetsPresent) {
groupingSetsPosition = groupByKeys.size();
createNewGroupingKey(groupByKeys, outputColumnNames, groupByOutputRowResolver, colExprMap);
}
// If there is a distinctFuncExp, add all parameters to the reduceKeys.
if (!parseInfo.getDistinctFuncExprsForClause(dest).isEmpty()) {
List<ASTNode> list = parseInfo.getDistinctFuncExprsForClause(dest);
for (ASTNode value : list) {
// 0 is function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode parameter = (ASTNode) value.getChild(i);
if (groupByOutputRowResolver.getExpression(parameter) == null) {
ExprNodeDesc distExprNode = genExprNodeDesc(parameter, groupByInputRowResolver);
groupByKeys.add(distExprNode);
String field = getColumnInternalName(groupByKeys.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(parameter, new ColumnInfo(field, distExprNode.getTypeInfo(), "", false));
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
}
}
}
// For each aggregation
HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
assert (aggregationTrees != null);
boolean containsDistinctAggr = false;
for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
String aggName = unescapeIdentifier(value.getChild(0).getText());
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
// 0 is the function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode paraExpr = (ASTNode) value.getChild(i);
ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, groupByInputRowResolver);
aggParameters.add(paraExprNode);
}
boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
containsDistinctAggr = containsDistinctAggr || isDistinct;
boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, value, isDistinct, isAllColumns);
assert (genericUDAFEvaluator != null);
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct, amode));
String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
outputColumnNames.add(field);
if (groupByOutputRowResolver.getExpression(value) == null) {
groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
}
// GroupByOperators
if (genericUDAFEvaluators != null) {
genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
}
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, groupingSetKeys, groupingSetsPresent, groupingSetsPosition, containsDistinctAggr), new RowSchema(groupByOutputRowResolver.getColumnInfos()), inputOperatorInfo), groupByOutputRowResolver);
op.setColumnExprMap(colExprMap);
return op;
}
use of org.apache.hadoop.hive.ql.plan.AggregationDesc in project hive by apache.
the class FunctionUtils method extractEvaluators.
/**
* Extracts the UDAFE evaluators of the specified class from the provided aggregations.
*/
public static <T extends GenericUDAFEvaluator> List<T> extractEvaluators(Collection<? extends AggregationDesc> aggregations, Class<T> clazz) {
List<T> result = new ArrayList<>();
for (AggregationDesc d : aggregations) {
if (clazz.isInstance(d.getGenericUDAFEvaluator())) {
@SuppressWarnings("unchecked") T t = (T) d.getGenericUDAFEvaluator();
result.add(t);
}
}
return result;
}
use of org.apache.hadoop.hive.ql.plan.AggregationDesc in project hive by apache.
the class GroupByOperator method genColLists.
// Group by contains the columns needed - no need to aggregate from children
public List<String> genColLists(HashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtx) {
List<String> colLists = new ArrayList<String>();
List<ExprNodeDesc> keys = conf.getKeys();
for (ExprNodeDesc key : keys) {
colLists = Utilities.mergeUniqElems(colLists, key.getCols());
}
List<AggregationDesc> aggrs = conf.getAggregators();
for (AggregationDesc aggr : aggrs) {
List<ExprNodeDesc> params = aggr.getParameters();
for (ExprNodeDesc param : params) {
colLists = Utilities.mergeUniqElems(colLists, param.getCols());
}
}
return colLists;
}
use of org.apache.hadoop.hive.ql.plan.AggregationDesc in project hive by apache.
the class GroupByOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
numRowsInput = 0;
numRowsHashTbl = 0;
heartbeatInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESENDHEARTBEAT);
countAfterReport = 0;
ObjectInspector rowInspector = inputObjInspectors[0];
// init keyFields
int numKeys = conf.getKeys().size();
keyFields = new ExprNodeEvaluator[numKeys];
keyObjectInspectors = new ObjectInspector[numKeys];
currentKeyObjectInspectors = new ObjectInspector[numKeys];
for (int i = 0; i < numKeys; i++) {
keyFields[i] = ExprNodeEvaluatorFactory.get(conf.getKeys().get(i), hconf);
keyObjectInspectors[i] = keyFields[i].initialize(rowInspector);
currentKeyObjectInspectors[i] = ObjectInspectorUtils.getStandardObjectInspector(keyObjectInspectors[i], ObjectInspectorCopyOption.WRITABLE);
}
// Initialize the constants for the grouping sets, so that they can be re-used for
// each row
groupingSetsPresent = conf.isGroupingSetsPresent();
if (groupingSetsPresent) {
groupingSets = conf.getListGroupingSets();
groupingSetsPosition = conf.getGroupingSetPosition();
newKeysGroupingSets = new LongWritable[groupingSets.size()];
groupingSetsBitSet = new FastBitSet[groupingSets.size()];
int pos = 0;
for (Long groupingSet : groupingSets) {
// Create the mapping corresponding to the grouping set
newKeysGroupingSets[pos] = new LongWritable(groupingSet);
groupingSetsBitSet[pos] = groupingSet2BitSet(groupingSet, groupingSetsPosition);
pos++;
}
}
// initialize unionExpr for reduce-side
// reduce KEY has union field as the last field if there are distinct
// aggregates in group-by.
List<? extends StructField> sfs = ((StructObjectInspector) rowInspector).getAllStructFieldRefs();
if (sfs.size() > 0) {
StructField keyField = sfs.get(0);
if (keyField.getFieldName().toUpperCase().equals(Utilities.ReduceField.KEY.name())) {
ObjectInspector keyObjInspector = keyField.getFieldObjectInspector();
if (keyObjInspector instanceof StructObjectInspector) {
List<? extends StructField> keysfs = ((StructObjectInspector) keyObjInspector).getAllStructFieldRefs();
if (keysfs.size() > 0) {
// the last field is the union field, if any
StructField sf = keysfs.get(keysfs.size() - 1);
if (sf.getFieldObjectInspector().getCategory().equals(ObjectInspector.Category.UNION)) {
unionExprEval = ExprNodeEvaluatorFactory.get(new ExprNodeColumnDesc(TypeInfoUtils.getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()), keyField.getFieldName() + "." + sf.getFieldName(), null, false), hconf);
unionExprEval.initialize(rowInspector);
}
}
}
}
}
// init aggregationParameterFields
List<AggregationDesc> aggrs = conf.getAggregators();
aggregationParameterFields = new ExprNodeEvaluator[aggrs.size()][];
aggregationParameterObjectInspectors = new ObjectInspector[aggrs.size()][];
aggregationParameterStandardObjectInspectors = new ObjectInspector[aggrs.size()][];
aggregationParameterObjects = new Object[aggrs.size()][];
aggregationIsDistinct = new boolean[aggrs.size()];
for (int i = 0; i < aggrs.size(); i++) {
AggregationDesc aggr = aggrs.get(i);
List<ExprNodeDesc> parameters = aggr.getParameters();
aggregationParameterFields[i] = new ExprNodeEvaluator[parameters.size()];
aggregationParameterObjectInspectors[i] = new ObjectInspector[parameters.size()];
aggregationParameterStandardObjectInspectors[i] = new ObjectInspector[parameters.size()];
aggregationParameterObjects[i] = new Object[parameters.size()];
for (int j = 0; j < parameters.size(); j++) {
aggregationParameterFields[i][j] = ExprNodeEvaluatorFactory.get(parameters.get(j), hconf);
aggregationParameterObjectInspectors[i][j] = aggregationParameterFields[i][j].initialize(rowInspector);
if (unionExprEval != null) {
String[] names = parameters.get(j).getExprString().split("\\.");
// parameters of the form : KEY.colx:t.coly
if (Utilities.ReduceField.KEY.name().equals(names[0]) && names.length > 2) {
String name = names[names.length - 2];
int tag = Integer.parseInt(name.split("\\:")[1]);
if (aggr.getDistinct()) {
// is distinct
distinctKeyAggrs.computeIfAbsent(tag, t -> new HashSet<>()).add(i);
} else {
nonDistinctKeyAggrs.computeIfAbsent(tag, t -> new HashSet<>()).add(i);
}
} else {
// will be KEY._COLx or VALUE._COLx
nonDistinctAggrs.add(i);
}
} else {
if (aggr.getDistinct()) {
aggregationIsDistinct[i] = true;
}
}
aggregationParameterStandardObjectInspectors[i][j] = ObjectInspectorUtils.getStandardObjectInspector(aggregationParameterObjectInspectors[i][j], ObjectInspectorCopyOption.WRITABLE);
aggregationParameterObjects[i][j] = null;
}
if (parameters.size() == 0) {
// for ex: count(*)
nonDistinctAggrs.add(i);
}
}
// init aggregationClasses
aggregationEvaluators = new GenericUDAFEvaluator[conf.getAggregators().size()];
for (int i = 0; i < aggregationEvaluators.length; i++) {
AggregationDesc agg = conf.getAggregators().get(i);
aggregationEvaluators[i] = agg.getGenericUDAFEvaluator();
}
MapredContext context = MapredContext.get();
if (context != null) {
for (GenericUDAFEvaluator genericUDAFEvaluator : aggregationEvaluators) {
context.setup(genericUDAFEvaluator);
}
}
// grouping id should be pruned, which is the last of key columns
// see ColumnPrunerGroupByProc
outputKeyLength = conf.pruneGroupingSetId() ? keyFields.length - 1 : keyFields.length;
// init objectInspectors
ObjectInspector[] objectInspectors = new ObjectInspector[outputKeyLength + aggregationEvaluators.length];
for (int i = 0; i < outputKeyLength; i++) {
objectInspectors[i] = currentKeyObjectInspectors[i];
}
for (int i = 0; i < aggregationEvaluators.length; i++) {
objectInspectors[outputKeyLength + i] = aggregationEvaluators[i].init(conf.getAggregators().get(i).getMode(), aggregationParameterObjectInspectors[i]);
}
aggregationsParametersLastInvoke = new Object[conf.getAggregators().size()][];
if ((conf.getMode() != GroupByDesc.Mode.HASH || conf.getBucketGroup()) && (!groupingSetsPresent)) {
aggregations = newAggregations();
hashAggr = false;
} else {
hashAggregations = new HashMap<KeyWrapper, AggregationBuffer[]>(256);
aggregations = newAggregations();
hashAggr = true;
keyPositionsSize = new ArrayList<Integer>();
aggrPositions = new List[aggregations.length];
groupbyMapAggrInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEGROUPBYMAPINTERVAL);
// compare every groupbyMapAggrInterval rows
numRowsCompareHashAggr = groupbyMapAggrInterval;
minReductionHashAggr = conf.getMinReductionHashAggr();
}
List<String> fieldNames = new ArrayList<String>(conf.getOutputColumnNames());
outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, Arrays.asList(objectInspectors));
KeyWrapperFactory keyWrapperFactory = new KeyWrapperFactory(keyFields, keyObjectInspectors, currentKeyObjectInspectors);
newKeys = keyWrapperFactory.getKeyWrapper();
isTez = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez");
isLlap = LlapDaemonInfo.INSTANCE.isLlap();
numExecutors = isLlap ? LlapDaemonInfo.INSTANCE.getNumExecutors() : 1;
firstRow = true;
// is not known, estimate that based on the number of entries
if (hashAggr) {
computeMaxEntriesHashAggr();
}
memoryMXBean = ManagementFactory.getMemoryMXBean();
maxMemory = isTez ? getConf().getMaxMemoryAvailable() : memoryMXBean.getHeapMemoryUsage().getMax();
memoryThreshold = this.getConf().getMemoryThreshold();
LOG.info("isTez: {} isLlap: {} numExecutors: {} maxMemory: {}", isTez, isLlap, numExecutors, maxMemory);
}
use of org.apache.hadoop.hive.ql.plan.AggregationDesc in project hive by apache.
the class VectorGroupByOperatorBench method buildAggregationDesc.
private AggregationDesc buildAggregationDesc(String aggregate, GenericUDAFEvaluator.Mode mode, String column, TypeInfo typeInfo) throws SemanticException {
ExprNodeDesc inputColumn = new ExprNodeColumnDesc(typeInfo, column, "table", false);
ArrayList<ExprNodeDesc> params = new ArrayList<ExprNodeDesc>();
params.add(inputColumn);
AggregationDesc agg = new AggregationDesc();
ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo);
GenericUDAFEvaluator genericUDAFEvaluator = FunctionRegistry.getGenericUDAFEvaluator(aggregate, ImmutableList.of(oi).asList(), false, false);
agg.setGenericUDAFEvaluator(genericUDAFEvaluator);
if (aggregate.equals("bloom_filter")) {
GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator udafBloomFilterEvaluator = (GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator) agg.getGenericUDAFEvaluator();
udafBloomFilterEvaluator.setHintEntries(10000);
}
agg.setGenericUDAFName(aggregate);
agg.setMode(mode);
agg.setParameters(params);
return agg;
}
Aggregations