Search in sources :

Example 1 with AggregationBuffer

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer in project hive by apache.

the class BasePartitionEvaluator method calcFunctionValue.

/**
   * Given a partition iterator, calculate the function value
   * @param pItr  the partition pointer
   * @return      the function value
   * @throws HiveException
   */
protected Object calcFunctionValue(PTFPartitionIterator<Object> pItr, LeadLagInfo leadLagInfo) throws HiveException {
    // To handle the case like SUM(LAG(f)) over(), aggregation function includes
    // LAG/LEAD call
    PTFOperator.connectLeadLagFunctionsToPartition(leadLagInfo, pItr);
    AggregationBuffer aggBuffer = wrappedEvaluator.getNewAggregationBuffer();
    Object[] argValues = new Object[parameters == null ? 0 : parameters.size()];
    while (pItr.hasNext()) {
        Object row = pItr.next();
        int i = 0;
        if (parameters != null) {
            for (PTFExpressionDef param : parameters) {
                argValues[i++] = param.getExprEvaluator().evaluate(row);
            }
        }
        wrappedEvaluator.aggregate(aggBuffer, argValues);
    }
    // The object is reused during evaluating, make a copy here
    return ObjectInspectorUtils.copyToStandardObject(wrappedEvaluator.evaluate(aggBuffer), outputOI);
}
Also used : PTFExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef) AggregationBuffer(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer) AbstractAggregationBuffer(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AbstractAggregationBuffer)

Example 2 with AggregationBuffer

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer in project hive by apache.

the class GroupByOperator method shouldBeFlushed.

/**
   * Based on user-parameters, should the hash table be flushed.
   *
   * @param newKeys
   *          keys for the row under consideration
   **/
private boolean shouldBeFlushed(KeyWrapper newKeys) {
    int numEntries = hashAggregations.size();
    long usedMemory;
    float rate;
    // variable portion of the size every NUMROWSESTIMATESIZE rows.
    if ((numEntriesHashTable == 0) || ((numEntries % NUMROWSESTIMATESIZE) == 0)) {
        //check how much memory left memory
        usedMemory = memoryMXBean.getHeapMemoryUsage().getUsed();
        // TODO: there is no easy and reliable way to compute the memory used by the executor threads and on-heap cache.
        // Assuming the used memory is equally divided among all executors.
        usedMemory = isLlap ? usedMemory / numExecutors : usedMemory;
        rate = (float) usedMemory / (float) maxMemory;
        if (rate > memoryThreshold) {
            if (isTez && numEntriesHashTable == 0) {
                return false;
            } else {
                return true;
            }
        }
        for (Integer pos : keyPositionsSize) {
            Object key = newKeys.getKeyArray()[pos.intValue()];
            // Ignore nulls
            if (key != null) {
                if (key instanceof LazyString) {
                    totalVariableSize += ((LazyPrimitive<LazyStringObjectInspector, Text>) key).getWritableObject().getLength();
                } else if (key instanceof String) {
                    totalVariableSize += ((String) key).length();
                } else if (key instanceof Text) {
                    totalVariableSize += ((Text) key).getLength();
                } else if (key instanceof LazyBinary) {
                    totalVariableSize += ((LazyPrimitive<LazyBinaryObjectInspector, BytesWritable>) key).getWritableObject().getLength();
                } else if (key instanceof BytesWritable) {
                    totalVariableSize += ((BytesWritable) key).getLength();
                } else if (key instanceof ByteArrayRef) {
                    totalVariableSize += ((ByteArrayRef) key).getData().length;
                }
            }
        }
        AggregationBuffer[] aggs = hashAggregations.get(newKeys);
        for (int i = 0; i < aggs.length; i++) {
            AggregationBuffer agg = aggs[i];
            if (estimableAggregationEvaluators[i]) {
                totalVariableSize += ((GenericUDAFEvaluator.AbstractAggregationBuffer) agg).estimate();
                continue;
            }
            if (aggrPositions[i] != null) {
                totalVariableSize += estimateSize(agg, aggrPositions[i]);
            }
        }
        numEntriesVarSize++;
        // Update the number of entries that can fit in the hash table
        numEntriesHashTable = (int) (maxHashTblMemory / (fixedRowSize + (totalVariableSize / numEntriesVarSize)));
        if (isLogTraceEnabled) {
            LOG.trace("Hash Aggr: #hash table = " + numEntries + " #max in hash table = " + numEntriesHashTable);
        }
    }
    // flush if necessary
    if (numEntries >= numEntriesHashTable) {
        return true;
    }
    return false;
}
Also used : GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) LazyBinaryObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) LazyPrimitive(org.apache.hadoop.hive.serde2.lazy.LazyPrimitive) LazyBinary(org.apache.hadoop.hive.serde2.lazy.LazyBinary) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) UnionObject(org.apache.hadoop.hive.serde2.objectinspector.UnionObject) AggregationBuffer(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer)

Example 3 with AggregationBuffer

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer in project SQLWindowing by hbutani.

the class WindowingTableFunction method executeFnwithWindow.

static ArrayList<Object> executeFnwithWindow(QueryDef qDef, WindowFunctionDef wFnDef, Partition iPart) throws HiveException, WindowingException {
    ArrayList<Object> vals = new ArrayList<Object>();
    GenericUDAFEvaluator fEval = wFnDef.getEvaluator();
    Object[] args = new Object[wFnDef.getArgs().size()];
    for (int i = 0; i < iPart.size(); i++) {
        AggregationBuffer aggBuffer = fEval.getNewAggregationBuffer();
        Range rng = getRange(wFnDef, i, iPart);
        PartitionIterator<Object> rItr = rng.iterator();
        RuntimeUtils.connectLeadLagFunctionsToPartition(qDef, rItr);
        while (rItr.hasNext()) {
            Object row = rItr.next();
            int j = 0;
            for (ArgDef arg : wFnDef.getArgs()) {
                args[j++] = arg.getExprEvaluator().evaluate(row);
            }
            fEval.aggregate(aggBuffer, args);
        }
        Object out = fEval.evaluate(aggBuffer);
        out = ObjectInspectorUtils.copyToStandardObject(out, wFnDef.getOI(), ObjectInspectorCopyOption.WRITABLE);
        vals.add(out);
    }
    return vals;
}
Also used : GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ArrayList(java.util.ArrayList) AggregationBuffer(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer) ArgDef(com.sap.hadoop.windowing.query2.definition.ArgDef)

Example 4 with AggregationBuffer

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer in project hive by apache.

the class TestStreamingSum method _agg.

public static <T, TW> void _agg(GenericUDAFResolver fnR, TypeInfo[] inputTypes, Iterator<T> inVals, TypeHandler<T, TW> typeHandler, TW[] in, ObjectInspector[] inputOIs, int inSz, int numPreceding, int numFollowing, Iterator<T> outVals) throws HiveException {
    GenericUDAFEvaluator fn = fnR.getEvaluator(inputTypes);
    fn.init(Mode.COMPLETE, inputOIs);
    fn = fn.getWindowingEvaluator(wdwFrame(numPreceding, numFollowing));
    AggregationBuffer agg = fn.getNewAggregationBuffer();
    ISupportStreamingModeForWindowing oS = (ISupportStreamingModeForWindowing) fn;
    int outSz = 0;
    while (inVals.hasNext()) {
        typeHandler.set(inVals.next(), in[0]);
        fn.aggregate(agg, in);
        Object out = oS.getNextResult(agg);
        if (out != null) {
            if (out == ISupportStreamingModeForWindowing.NULL_RESULT) {
                out = null;
            } else {
                try {
                    out = typeHandler.get((TW) out);
                } catch (ClassCastException ce) {
                }
            }
            Assert.assertEquals(out, outVals.next());
            outSz++;
        }
    }
    fn.terminate(agg);
    while (outSz < inSz) {
        Object out = oS.getNextResult(agg);
        if (out == ISupportStreamingModeForWindowing.NULL_RESULT) {
            out = null;
        } else {
            try {
                out = typeHandler.get((TW) out);
            } catch (ClassCastException ce) {
            }
        }
        Assert.assertEquals(out, outVals.next());
        outSz++;
    }
}
Also used : ISupportStreamingModeForWindowing(org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) AggregationBuffer(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer)

Example 5 with AggregationBuffer

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer in project hive by apache.

the class GroupByOperator method estimateRowSize.

/**
   * @return the size of each row
   **/
private void estimateRowSize() throws HiveException {
    // estimate the size of each entry -
    // a datatype with unknown size (String/Struct etc. - is assumed to be 256
    // bytes for now).
    // 64 bytes is the overhead for a reference
    fixedRowSize = javaHashEntryOverHead;
    ArrayList<ExprNodeDesc> keys = conf.getKeys();
    // track of the variable length keys
    for (int pos = 0; pos < keys.size(); pos++) {
        fixedRowSize += getSize(pos, keys.get(pos).getTypeInfo());
    }
    // Go over all the aggregation classes and and get the size of the fields of
    // fixed length. Keep track of the variable length
    // fields in these aggregation classes.
    estimableAggregationEvaluators = new boolean[aggregationEvaluators.length];
    for (int i = 0; i < aggregationEvaluators.length; i++) {
        fixedRowSize += javaObjectOverHead;
        AggregationBuffer agg = aggregationEvaluators[i].getNewAggregationBuffer();
        if (GenericUDAFEvaluator.isEstimable(agg)) {
            estimableAggregationEvaluators[i] = true;
            continue;
        }
        Field[] fArr = ObjectInspectorUtils.getDeclaredNonStaticFields(agg.getClass());
        for (Field f : fArr) {
            fixedRowSize += getSize(i, f.getType(), f);
        }
    }
}
Also used : Field(java.lang.reflect.Field) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) AggregationBuffer(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer)

Aggregations

AggregationBuffer (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer)6 GenericUDAFEvaluator (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator)4 ArgDef (com.sap.hadoop.windowing.query2.definition.ArgDef)2 ArrayList (java.util.ArrayList)2 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)2 SameList (com.sap.hadoop.ds.SameList)1 WindowingException (com.sap.hadoop.windowing.WindowingException)1 WindowFunctionInfo (com.sap.hadoop.windowing.functions2.FunctionRegistry.WindowFunctionInfo)1 WindowFunctionDef (com.sap.hadoop.windowing.query2.definition.WindowFunctionDef)1 Partition (com.sap.hadoop.windowing.runtime2.Partition)1 Field (java.lang.reflect.Field)1 List (java.util.List)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1 PTFExpressionDef (org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef)1 AbstractAggregationBuffer (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AbstractAggregationBuffer)1 ISupportStreamingModeForWindowing (org.apache.hadoop.hive.ql.udf.generic.ISupportStreamingModeForWindowing)1 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)1 ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)1 LazyBinary (org.apache.hadoop.hive.serde2.lazy.LazyBinary)1