use of org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer in project hive by apache.
the class BasePartitionEvaluator method calcFunctionValue.
/**
* Given a partition iterator, calculate the function value
* @param pItr the partition pointer
* @return the function value
* @throws HiveException
*/
protected Object calcFunctionValue(PTFPartitionIterator<Object> pItr, LeadLagInfo leadLagInfo) throws HiveException {
// To handle the case like SUM(LAG(f)) over(), aggregation function includes
// LAG/LEAD call
PTFOperator.connectLeadLagFunctionsToPartition(leadLagInfo, pItr);
AggregationBuffer aggBuffer = wrappedEvaluator.getNewAggregationBuffer();
Object[] argValues = new Object[parameters == null ? 0 : parameters.size()];
while (pItr.hasNext()) {
Object row = pItr.next();
int i = 0;
if (parameters != null) {
for (PTFExpressionDef param : parameters) {
argValues[i++] = param.getExprEvaluator().evaluate(row);
}
}
wrappedEvaluator.aggregate(aggBuffer, argValues);
}
// The object is reused during evaluating, make a copy here
return ObjectInspectorUtils.copyToStandardObject(wrappedEvaluator.evaluate(aggBuffer), outputOI);
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer in project hive by apache.
the class GroupByOperator method shouldBeFlushed.
/**
* Based on user-parameters, should the hash table be flushed.
*
* @param newKeys
* keys for the row under consideration
**/
private boolean shouldBeFlushed(KeyWrapper newKeys) {
int numEntries = hashAggregations.size();
long usedMemory;
float rate;
// variable portion of the size every NUMROWSESTIMATESIZE rows.
if ((numEntriesHashTable == 0) || ((numEntries % NUMROWSESTIMATESIZE) == 0)) {
//check how much memory left memory
usedMemory = memoryMXBean.getHeapMemoryUsage().getUsed();
// TODO: there is no easy and reliable way to compute the memory used by the executor threads and on-heap cache.
// Assuming the used memory is equally divided among all executors.
usedMemory = isLlap ? usedMemory / numExecutors : usedMemory;
rate = (float) usedMemory / (float) maxMemory;
if (rate > memoryThreshold) {
if (isTez && numEntriesHashTable == 0) {
return false;
} else {
return true;
}
}
for (Integer pos : keyPositionsSize) {
Object key = newKeys.getKeyArray()[pos.intValue()];
// Ignore nulls
if (key != null) {
if (key instanceof LazyString) {
totalVariableSize += ((LazyPrimitive<LazyStringObjectInspector, Text>) key).getWritableObject().getLength();
} else if (key instanceof String) {
totalVariableSize += ((String) key).length();
} else if (key instanceof Text) {
totalVariableSize += ((Text) key).getLength();
} else if (key instanceof LazyBinary) {
totalVariableSize += ((LazyPrimitive<LazyBinaryObjectInspector, BytesWritable>) key).getWritableObject().getLength();
} else if (key instanceof BytesWritable) {
totalVariableSize += ((BytesWritable) key).getLength();
} else if (key instanceof ByteArrayRef) {
totalVariableSize += ((ByteArrayRef) key).getData().length;
}
}
}
AggregationBuffer[] aggs = hashAggregations.get(newKeys);
for (int i = 0; i < aggs.length; i++) {
AggregationBuffer agg = aggs[i];
if (estimableAggregationEvaluators[i]) {
totalVariableSize += ((GenericUDAFEvaluator.AbstractAggregationBuffer) agg).estimate();
continue;
}
if (aggrPositions[i] != null) {
totalVariableSize += estimateSize(agg, aggrPositions[i]);
}
}
numEntriesVarSize++;
// Update the number of entries that can fit in the hash table
numEntriesHashTable = (int) (maxHashTblMemory / (fixedRowSize + (totalVariableSize / numEntriesVarSize)));
if (isLogTraceEnabled) {
LOG.trace("Hash Aggr: #hash table = " + numEntries + " #max in hash table = " + numEntriesHashTable);
}
}
// flush if necessary
if (numEntries >= numEntriesHashTable) {
return true;
}
return false;
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer in project SQLWindowing by hbutani.
the class WindowingTableFunction method executeFnwithWindow.
static ArrayList<Object> executeFnwithWindow(QueryDef qDef, WindowFunctionDef wFnDef, Partition iPart) throws HiveException, WindowingException {
ArrayList<Object> vals = new ArrayList<Object>();
GenericUDAFEvaluator fEval = wFnDef.getEvaluator();
Object[] args = new Object[wFnDef.getArgs().size()];
for (int i = 0; i < iPart.size(); i++) {
AggregationBuffer aggBuffer = fEval.getNewAggregationBuffer();
Range rng = getRange(wFnDef, i, iPart);
PartitionIterator<Object> rItr = rng.iterator();
RuntimeUtils.connectLeadLagFunctionsToPartition(qDef, rItr);
while (rItr.hasNext()) {
Object row = rItr.next();
int j = 0;
for (ArgDef arg : wFnDef.getArgs()) {
args[j++] = arg.getExprEvaluator().evaluate(row);
}
fEval.aggregate(aggBuffer, args);
}
Object out = fEval.evaluate(aggBuffer);
out = ObjectInspectorUtils.copyToStandardObject(out, wFnDef.getOI(), ObjectInspectorCopyOption.WRITABLE);
vals.add(out);
}
return vals;
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer in project hive by apache.
the class TestStreamingSum method _agg.
public static <T, TW> void _agg(GenericUDAFResolver fnR, TypeInfo[] inputTypes, Iterator<T> inVals, TypeHandler<T, TW> typeHandler, TW[] in, ObjectInspector[] inputOIs, int inSz, int numPreceding, int numFollowing, Iterator<T> outVals) throws HiveException {
GenericUDAFEvaluator fn = fnR.getEvaluator(inputTypes);
fn.init(Mode.COMPLETE, inputOIs);
fn = fn.getWindowingEvaluator(wdwFrame(numPreceding, numFollowing));
AggregationBuffer agg = fn.getNewAggregationBuffer();
ISupportStreamingModeForWindowing oS = (ISupportStreamingModeForWindowing) fn;
int outSz = 0;
while (inVals.hasNext()) {
typeHandler.set(inVals.next(), in[0]);
fn.aggregate(agg, in);
Object out = oS.getNextResult(agg);
if (out != null) {
if (out == ISupportStreamingModeForWindowing.NULL_RESULT) {
out = null;
} else {
try {
out = typeHandler.get((TW) out);
} catch (ClassCastException ce) {
}
}
Assert.assertEquals(out, outVals.next());
outSz++;
}
}
fn.terminate(agg);
while (outSz < inSz) {
Object out = oS.getNextResult(agg);
if (out == ISupportStreamingModeForWindowing.NULL_RESULT) {
out = null;
} else {
try {
out = typeHandler.get((TW) out);
} catch (ClassCastException ce) {
}
}
Assert.assertEquals(out, outVals.next());
outSz++;
}
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer in project hive by apache.
the class GroupByOperator method estimateRowSize.
/**
* @return the size of each row
**/
private void estimateRowSize() throws HiveException {
// estimate the size of each entry -
// a datatype with unknown size (String/Struct etc. - is assumed to be 256
// bytes for now).
// 64 bytes is the overhead for a reference
fixedRowSize = javaHashEntryOverHead;
ArrayList<ExprNodeDesc> keys = conf.getKeys();
// track of the variable length keys
for (int pos = 0; pos < keys.size(); pos++) {
fixedRowSize += getSize(pos, keys.get(pos).getTypeInfo());
}
// Go over all the aggregation classes and and get the size of the fields of
// fixed length. Keep track of the variable length
// fields in these aggregation classes.
estimableAggregationEvaluators = new boolean[aggregationEvaluators.length];
for (int i = 0; i < aggregationEvaluators.length; i++) {
fixedRowSize += javaObjectOverHead;
AggregationBuffer agg = aggregationEvaluators[i].getNewAggregationBuffer();
if (GenericUDAFEvaluator.isEstimable(agg)) {
estimableAggregationEvaluators[i] = true;
continue;
}
Field[] fArr = ObjectInspectorUtils.getDeclaredNonStaticFields(agg.getClass());
for (Field f : fArr) {
fixedRowSize += getSize(i, f.getType(), f);
}
}
}
Aggregations