use of org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator in project hive by apache.
the class WindowingTableFunction method processRow.
/*
* (non-Javadoc)
*
* @see
* org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator#processRow(java
* .lang.Object)
*
* - hand row to each Function, provided there are enough rows for Function's
* window. - call getNextObject on each Function. - output as many rows as
* possible, based on minimum sz of Output List
*/
@Override
public List<Object> processRow(Object row) throws HiveException {
/*
* Once enough rows have been output, there is no need to process input rows.
*/
if (streamingState.rankLimitReached()) {
return null;
}
streamingState.rollingPart.append(row);
WindowTableFunctionDef tabDef = (WindowTableFunctionDef) tableDef;
for (int i = 0; i < tabDef.getWindowFunctions().size(); i++) {
WindowFunctionDef wFn = tabDef.getWindowFunctions().get(i);
GenericUDAFEvaluator fnEval = wFn.getWFnEval();
int a = 0;
if (wFn.getArgs() != null) {
for (PTFExpressionDef arg : wFn.getArgs()) {
streamingState.funcArgs[i][a++] = arg.getExprEvaluator().evaluate(row);
}
}
if (fnEval != null && fnEval instanceof ISupportStreamingModeForWindowing) {
fnEval.aggregate(streamingState.aggBuffers[i], streamingState.funcArgs[i]);
Object out = ((ISupportStreamingModeForWindowing) fnEval).getNextResult(streamingState.aggBuffers[i]);
if (out != null) {
streamingState.fnOutputs[i].add(out == ISupportStreamingModeForWindowing.NULL_RESULT ? null : out);
}
} else {
int rowToProcess = streamingState.rollingPart.rowToProcess(wFn.getWindowFrame());
if (rowToProcess >= 0) {
Object out = evaluateWindowFunction(wFn, rowToProcess, streamingState.rollingPart);
streamingState.fnOutputs[i].add(out);
}
}
}
List<Object> oRows = new ArrayList<Object>();
while (true) {
boolean hasRow = streamingState.hasOutputRow();
if (!hasRow) {
break;
}
oRows.add(streamingState.nextOutputRow());
}
return oRows.size() == 0 ? null : oRows;
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator in project hive by apache.
the class WindowingTableFunction method iterator.
@SuppressWarnings("rawtypes")
@Override
public Iterator<Object> iterator(PTFPartitionIterator<Object> pItr) throws HiveException {
WindowTableFunctionDef wTFnDef = (WindowTableFunctionDef) getTableDef();
ArrayList<Object> output = new ArrayList<Object>();
List<?>[] outputFromPivotFunctions = new List<?>[wTFnDef.getWindowFunctions().size()];
ArrayList<Integer> wFnsWithWindows = new ArrayList<Integer>();
PTFPartition iPart = pItr.getPartition();
int i = 0;
for (WindowFunctionDef wFn : wTFnDef.getWindowFunctions()) {
boolean processWindow = processWindow(wFn.getWindowFrame());
pItr.reset();
if (!processWindow && !wFn.isPivotResult()) {
Object out = evaluateFunctionOnPartition(wFn, iPart);
output.add(out);
} else if (wFn.isPivotResult()) {
GenericUDAFEvaluator streamingEval = wFn.getWFnEval().getWindowingEvaluator(wFn.getWindowFrame());
if (streamingEval != null && streamingEval instanceof ISupportStreamingModeForWindowing) {
ISupportStreamingModeForWindowing strEval = (ISupportStreamingModeForWindowing) streamingEval;
if (strEval.getRowsRemainingAfterTerminate() == 0) {
wFn.setWFnEval(streamingEval);
if (wFn.getOI() instanceof ListObjectInspector) {
ListObjectInspector listOI = (ListObjectInspector) wFn.getOI();
wFn.setOI(listOI.getListElementObjectInspector());
}
output.add(null);
wFnsWithWindows.add(i);
} else {
outputFromPivotFunctions[i] = (List) evaluateFunctionOnPartition(wFn, iPart);
output.add(null);
}
} else {
outputFromPivotFunctions[i] = (List) evaluateFunctionOnPartition(wFn, iPart);
output.add(null);
}
} else {
output.add(null);
wFnsWithWindows.add(i);
}
i++;
}
for (i = 0; i < iPart.getOutputOI().getAllStructFieldRefs().size(); i++) {
output.add(null);
}
if (wTFnDef.getRankLimit() != -1) {
rnkLimitDef = new RankLimit(wTFnDef.getRankLimit(), wTFnDef.getRankLimitFunction(), wTFnDef.getWindowFunctions());
}
return new WindowingIterator(iPart, output, outputFromPivotFunctions, ArrayUtils.toPrimitive(wFnsWithWindows.toArray(new Integer[wFnsWithWindows.size()])));
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator in project hive by apache.
the class GroupByOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
numRowsInput = 0;
numRowsHashTbl = 0;
heartbeatInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESENDHEARTBEAT);
countAfterReport = 0;
groupingSetsPresent = conf.isGroupingSetsPresent();
ObjectInspector rowInspector = inputObjInspectors[0];
// init keyFields
int numKeys = conf.getKeys().size();
keyFields = new ExprNodeEvaluator[numKeys];
keyObjectInspectors = new ObjectInspector[numKeys];
currentKeyObjectInspectors = new ObjectInspector[numKeys];
for (int i = 0; i < numKeys; i++) {
keyFields[i] = ExprNodeEvaluatorFactory.get(conf.getKeys().get(i), hconf);
keyObjectInspectors[i] = keyFields[i].initialize(rowInspector);
currentKeyObjectInspectors[i] = ObjectInspectorUtils.getStandardObjectInspector(keyObjectInspectors[i], ObjectInspectorCopyOption.WRITABLE);
}
// each row
if (groupingSetsPresent) {
groupingSets = conf.getListGroupingSets();
groupingSetsPosition = conf.getGroupingSetPosition();
newKeysGroupingSets = new IntWritable[groupingSets.size()];
groupingSetsBitSet = new FastBitSet[groupingSets.size()];
int pos = 0;
for (Integer groupingSet : groupingSets) {
// Create the mapping corresponding to the grouping set
newKeysGroupingSets[pos] = new IntWritable(groupingSet);
groupingSetsBitSet[pos] = groupingSet2BitSet(groupingSet, groupingSetsPosition);
pos++;
}
}
// initialize unionExpr for reduce-side
// reduce KEY has union field as the last field if there are distinct
// aggregates in group-by.
List<? extends StructField> sfs = ((StructObjectInspector) rowInspector).getAllStructFieldRefs();
if (sfs.size() > 0) {
StructField keyField = sfs.get(0);
if (keyField.getFieldName().toUpperCase().equals(Utilities.ReduceField.KEY.name())) {
ObjectInspector keyObjInspector = keyField.getFieldObjectInspector();
if (keyObjInspector instanceof StructObjectInspector) {
List<? extends StructField> keysfs = ((StructObjectInspector) keyObjInspector).getAllStructFieldRefs();
if (keysfs.size() > 0) {
// the last field is the union field, if any
StructField sf = keysfs.get(keysfs.size() - 1);
if (sf.getFieldObjectInspector().getCategory().equals(ObjectInspector.Category.UNION)) {
unionExprEval = ExprNodeEvaluatorFactory.get(new ExprNodeColumnDesc(TypeInfoUtils.getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()), keyField.getFieldName() + "." + sf.getFieldName(), null, false), hconf);
unionExprEval.initialize(rowInspector);
}
}
}
}
}
// init aggregationParameterFields
ArrayList<AggregationDesc> aggrs = conf.getAggregators();
aggregationParameterFields = new ExprNodeEvaluator[aggrs.size()][];
aggregationParameterObjectInspectors = new ObjectInspector[aggrs.size()][];
aggregationParameterStandardObjectInspectors = new ObjectInspector[aggrs.size()][];
aggregationParameterObjects = new Object[aggrs.size()][];
aggregationIsDistinct = new boolean[aggrs.size()];
for (int i = 0; i < aggrs.size(); i++) {
AggregationDesc aggr = aggrs.get(i);
ArrayList<ExprNodeDesc> parameters = aggr.getParameters();
aggregationParameterFields[i] = new ExprNodeEvaluator[parameters.size()];
aggregationParameterObjectInspectors[i] = new ObjectInspector[parameters.size()];
aggregationParameterStandardObjectInspectors[i] = new ObjectInspector[parameters.size()];
aggregationParameterObjects[i] = new Object[parameters.size()];
for (int j = 0; j < parameters.size(); j++) {
aggregationParameterFields[i][j] = ExprNodeEvaluatorFactory.get(parameters.get(j), hconf);
aggregationParameterObjectInspectors[i][j] = aggregationParameterFields[i][j].initialize(rowInspector);
if (unionExprEval != null) {
String[] names = parameters.get(j).getExprString().split("\\.");
// parameters of the form : KEY.colx:t.coly
if (Utilities.ReduceField.KEY.name().equals(names[0]) && names.length > 2) {
String name = names[names.length - 2];
int tag = Integer.parseInt(name.split("\\:")[1]);
if (aggr.getDistinct()) {
// is distinct
Set<Integer> set = distinctKeyAggrs.get(tag);
if (null == set) {
set = new HashSet<Integer>();
distinctKeyAggrs.put(tag, set);
}
if (!set.contains(i)) {
set.add(i);
}
} else {
Set<Integer> set = nonDistinctKeyAggrs.get(tag);
if (null == set) {
set = new HashSet<Integer>();
nonDistinctKeyAggrs.put(tag, set);
}
if (!set.contains(i)) {
set.add(i);
}
}
} else {
// will be KEY._COLx or VALUE._COLx
if (!nonDistinctAggrs.contains(i)) {
nonDistinctAggrs.add(i);
}
}
} else {
if (aggr.getDistinct()) {
aggregationIsDistinct[i] = true;
}
}
aggregationParameterStandardObjectInspectors[i][j] = ObjectInspectorUtils.getStandardObjectInspector(aggregationParameterObjectInspectors[i][j], ObjectInspectorCopyOption.WRITABLE);
aggregationParameterObjects[i][j] = null;
}
if (parameters.size() == 0) {
// for ex: count(*)
if (!nonDistinctAggrs.contains(i)) {
nonDistinctAggrs.add(i);
}
}
}
// init aggregationClasses
aggregationEvaluators = new GenericUDAFEvaluator[conf.getAggregators().size()];
for (int i = 0; i < aggregationEvaluators.length; i++) {
AggregationDesc agg = conf.getAggregators().get(i);
aggregationEvaluators[i] = agg.getGenericUDAFEvaluator();
}
MapredContext context = MapredContext.get();
if (context != null) {
for (GenericUDAFEvaluator genericUDAFEvaluator : aggregationEvaluators) {
context.setup(genericUDAFEvaluator);
}
}
// grouping id should be pruned, which is the last of key columns
// see ColumnPrunerGroupByProc
outputKeyLength = conf.pruneGroupingSetId() ? keyFields.length - 1 : keyFields.length;
// init objectInspectors
ObjectInspector[] objectInspectors = new ObjectInspector[outputKeyLength + aggregationEvaluators.length];
for (int i = 0; i < outputKeyLength; i++) {
objectInspectors[i] = currentKeyObjectInspectors[i];
}
for (int i = 0; i < aggregationEvaluators.length; i++) {
objectInspectors[outputKeyLength + i] = aggregationEvaluators[i].init(conf.getAggregators().get(i).getMode(), aggregationParameterObjectInspectors[i]);
}
aggregationsParametersLastInvoke = new Object[conf.getAggregators().size()][];
if ((conf.getMode() != GroupByDesc.Mode.HASH || conf.getBucketGroup()) && (!groupingSetsPresent)) {
aggregations = newAggregations();
hashAggr = false;
} else {
hashAggregations = new HashMap<KeyWrapper, AggregationBuffer[]>(256);
aggregations = newAggregations();
hashAggr = true;
keyPositionsSize = new ArrayList<Integer>();
aggrPositions = new List[aggregations.length];
groupbyMapAggrInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEGROUPBYMAPINTERVAL);
// compare every groupbyMapAggrInterval rows
numRowsCompareHashAggr = groupbyMapAggrInterval;
minReductionHashAggr = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION);
}
List<String> fieldNames = new ArrayList<String>(conf.getOutputColumnNames());
outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, Arrays.asList(objectInspectors));
KeyWrapperFactory keyWrapperFactory = new KeyWrapperFactory(keyFields, keyObjectInspectors, currentKeyObjectInspectors);
newKeys = keyWrapperFactory.getKeyWrapper();
isTez = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez");
isLlap = isTez && HiveConf.getVar(hconf, HiveConf.ConfVars.HIVE_EXECUTION_MODE).equals("llap");
numExecutors = isLlap ? HiveConf.getIntVar(hconf, HiveConf.ConfVars.LLAP_DAEMON_NUM_EXECUTORS) : 1;
firstRow = true;
// is not known, estimate that based on the number of entries
if (hashAggr) {
computeMaxEntriesHashAggr();
}
memoryMXBean = ManagementFactory.getMemoryMXBean();
maxMemory = isTez ? getConf().getMaxMemoryAvailable() : memoryMXBean.getHeapMemoryUsage().getMax();
memoryThreshold = this.getConf().getMemoryThreshold();
LOG.info("isTez: {} isLlap: {} numExecutors: {} maxMemory: {}", isTez, isLlap, numExecutors, maxMemory);
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator in project hive by apache.
the class RewriteQueryUsingAggregateIndexCtx method replaceGroupByOperatorProcess.
/**
* We need to replace the count(indexed_column_key) GenericUDAF aggregation
* function for group-by construct to "sum" GenericUDAF. This method creates a
* new operator tree for a sample query that creates a GroupByOperator with
* sum aggregation function and uses that GroupByOperator information to
* replace the original GroupByOperator aggregation information. It replaces
* the AggregationDesc (aggregation descriptor) of the old GroupByOperator
* with the new Aggregation Desc of the new GroupByOperator.
* @return
*/
private void replaceGroupByOperatorProcess(GroupByOperator operator, int index) throws SemanticException {
RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this;
// We need to replace the GroupByOperator which is before RS
if (index == 0) {
// the query contains the sum aggregation GenericUDAF
String selReplacementCommand = "select sum(`" + rewriteQueryCtx.getAggregateFunction() + "`)" + " from `" + rewriteQueryCtx.getIndexName() + "` group by " + rewriteQueryCtx.getIndexKey() + " ";
// retrieve the operator tree for the query, and the required GroupByOperator from it
Operator<?> newOperatorTree = RewriteParseContextGenerator.generateOperatorTree(rewriteQueryCtx.getParseContext().getQueryState(), selReplacementCommand);
// we get our new GroupByOperator here
GroupByOperator newGbyOperator = OperatorUtils.findLastOperatorUpstream(newOperatorTree, GroupByOperator.class);
if (newGbyOperator == null) {
throw new SemanticException("Error replacing GroupBy operator.");
}
// we need this information to set the correct colList, outputColumnNames
// in SelectOperator
ExprNodeColumnDesc aggrExprNode = null;
// Construct the new AggregationDesc to get rid of the current
// internal names and replace them with new internal names
// as required by the operator tree
GroupByDesc newConf = newGbyOperator.getConf();
List<AggregationDesc> newAggrList = newConf.getAggregators();
if (newAggrList != null && newAggrList.size() > 0) {
for (AggregationDesc aggregationDesc : newAggrList) {
rewriteQueryCtx.setEval(aggregationDesc.getGenericUDAFEvaluator());
aggrExprNode = (ExprNodeColumnDesc) aggregationDesc.getParameters().get(0);
rewriteQueryCtx.setAggrExprNode(aggrExprNode);
}
}
// Now the GroupByOperator has the new AggregationList;
// sum(`_count_of_indexed_key`)
// instead of count(indexed_key)
GroupByDesc oldConf = operator.getConf();
oldConf.setAggregators((ArrayList<AggregationDesc>) newAggrList);
operator.setConf(oldConf);
} else {
// we just need to reset the GenericUDAFEvaluator and its name for this
// GroupByOperator whose parent is the ReduceSinkOperator
GroupByDesc childConf = operator.getConf();
List<AggregationDesc> childAggrList = childConf.getAggregators();
if (childAggrList != null && childAggrList.size() > 0) {
for (AggregationDesc aggregationDesc : childAggrList) {
List<ExprNodeDesc> paraList = aggregationDesc.getParameters();
List<ObjectInspector> parametersOIList = new ArrayList<ObjectInspector>();
for (ExprNodeDesc expr : paraList) {
parametersOIList.add(expr.getWritableObjectInspector());
}
GenericUDAFEvaluator evaluator = FunctionRegistry.getGenericUDAFEvaluator("sum", parametersOIList, false, false);
aggregationDesc.setGenericUDAFEvaluator(evaluator);
aggregationDesc.setGenericUDAFName("sum");
}
}
}
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator in project hive by apache.
the class PTFDeserializer method setupWdwFnEvaluator.
static void setupWdwFnEvaluator(WindowFunctionDef def) throws HiveException {
List<PTFExpressionDef> args = def.getArgs();
List<ObjectInspector> argOIs = new ArrayList<ObjectInspector>();
ObjectInspector[] funcArgOIs = null;
if (args != null) {
for (PTFExpressionDef arg : args) {
argOIs.add(arg.getOI());
}
funcArgOIs = new ObjectInspector[args.size()];
funcArgOIs = argOIs.toArray(funcArgOIs);
}
GenericUDAFEvaluator wFnEval = def.getWFnEval();
ObjectInspector OI = wFnEval.init(GenericUDAFEvaluator.Mode.COMPLETE, funcArgOIs);
def.setWFnEval(wFnEval);
def.setOI(OI);
}
Aggregations