Search in sources :

Example 1 with LazyBinaryObjectInspector

use of org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector in project hive by apache.

the class GroupByOperator method shouldBeFlushed.

/**
 * Based on user-parameters, should the hash table be flushed.
 *
 * @param newKeys
 *          keys for the row under consideration
 */
private boolean shouldBeFlushed(KeyWrapper newKeys) {
    int numEntries = hashAggregations.size();
    long usedMemory;
    float rate;
    // variable portion of the size every NUMROWSESTIMATESIZE rows.
    if ((numEntriesHashTable == 0) || ((numEntries % NUMROWSESTIMATESIZE) == 0)) {
        // check how much memory left memory
        usedMemory = memoryMXBean.getHeapMemoryUsage().getUsed();
        // TODO: there is no easy and reliable way to compute the memory used by the executor threads and on-heap cache.
        // Assuming the used memory is equally divided among all executors.
        usedMemory = isLlap ? usedMemory / numExecutors : usedMemory;
        rate = (float) usedMemory / (float) maxMemory;
        if (rate > memoryThreshold) {
            return (!isTez || numEntriesHashTable != 0);
        }
        for (Integer pos : keyPositionsSize) {
            Object key = newKeys.getKeyArray()[pos.intValue()];
            // Ignore nulls
            if (key != null) {
                if (key instanceof LazyString) {
                    totalVariableSize += ((LazyPrimitive<LazyStringObjectInspector, Text>) key).getWritableObject().getLength();
                } else if (key instanceof String) {
                    totalVariableSize += ((String) key).length();
                } else if (key instanceof Text) {
                    totalVariableSize += ((Text) key).getLength();
                } else if (key instanceof LazyBinary) {
                    totalVariableSize += ((LazyPrimitive<LazyBinaryObjectInspector, BytesWritable>) key).getWritableObject().getLength();
                } else if (key instanceof BytesWritable) {
                    totalVariableSize += ((BytesWritable) key).getLength();
                } else if (key instanceof ByteArrayRef) {
                    totalVariableSize += ((ByteArrayRef) key).getData().length;
                }
            }
        }
        AggregationBuffer[] aggs = hashAggregations.get(newKeys);
        for (int i = 0; i < aggs.length; i++) {
            AggregationBuffer agg = aggs[i];
            if (estimableAggregationEvaluators[i]) {
                totalVariableSize += ((GenericUDAFEvaluator.AbstractAggregationBuffer) agg).estimate();
                continue;
            }
            if (aggrPositions[i] != null) {
                totalVariableSize += estimateSize(agg, aggrPositions[i]);
            }
        }
        numEntriesVarSize++;
        // Update the number of entries that can fit in the hash table
        numEntriesHashTable = (int) (maxHashTblMemory / (fixedRowSize + (totalVariableSize / numEntriesVarSize)));
        LOG.trace("Hash Aggr: #hash table = {} #max in hash table = {}", numEntries, numEntriesHashTable);
    }
    // flush if necessary
    return (numEntries >= numEntriesHashTable);
}
Also used : GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) LazyBinaryObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) LazyPrimitive(org.apache.hadoop.hive.serde2.lazy.LazyPrimitive) LazyBinary(org.apache.hadoop.hive.serde2.lazy.LazyBinary) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) UnionObject(org.apache.hadoop.hive.serde2.objectinspector.UnionObject) AggregationBuffer(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer)

Example 2 with LazyBinaryObjectInspector

use of org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector in project hive by apache.

the class TestLazyBinarySerDe method testLazyBinaryObjectInspector.

/**
 * Test to see if byte[] with correct contents is generated by
 * LazyBinaryObjectInspector from input BytesWritable
 * @throws Throwable
 */
@Test
public void testLazyBinaryObjectInspector() throws Throwable {
    // create input ByteArrayRef
    ByteArrayRef inpBARef = new ByteArrayRef();
    inpBARef.setData(inpBArray);
    AbstractPrimitiveLazyObjectInspector<?> binInspector = LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.binaryTypeInfo, false, (byte) 0);
    // create LazyBinary initialed with inputBA
    LazyBinary lazyBin = (LazyBinary) LazyFactory.createLazyObject(binInspector);
    lazyBin.init(inpBARef, 0, inpBArray.length);
    // use inspector to get a byte[] out of LazyBinary
    byte[] outBARef = (byte[]) binInspector.getPrimitiveJavaObject(lazyBin);
    assertTrue("compare input and output BAs", Arrays.equals(inpBArray, outBARef));
}
Also used : ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) LazyBinary(org.apache.hadoop.hive.serde2.lazy.LazyBinary) Test(org.junit.Test)

Aggregations

ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)2 LazyBinary (org.apache.hadoop.hive.serde2.lazy.LazyBinary)2 GenericUDAFEvaluator (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator)1 AggregationBuffer (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer)1 LazyPrimitive (org.apache.hadoop.hive.serde2.lazy.LazyPrimitive)1 LazyString (org.apache.hadoop.hive.serde2.lazy.LazyString)1 LazyBinaryObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector)1 UnionObject (org.apache.hadoop.hive.serde2.objectinspector.UnionObject)1 BytesWritable (org.apache.hadoop.io.BytesWritable)1 Text (org.apache.hadoop.io.Text)1 Test (org.junit.Test)1