use of org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector in project hive by apache.
the class GroupByOperator method shouldBeFlushed.
/**
* Based on user-parameters, should the hash table be flushed.
*
* @param newKeys
* keys for the row under consideration
*/
private boolean shouldBeFlushed(KeyWrapper newKeys) {
int numEntries = hashAggregations.size();
long usedMemory;
float rate;
// variable portion of the size every NUMROWSESTIMATESIZE rows.
if ((numEntriesHashTable == 0) || ((numEntries % NUMROWSESTIMATESIZE) == 0)) {
// check how much memory left memory
usedMemory = memoryMXBean.getHeapMemoryUsage().getUsed();
// TODO: there is no easy and reliable way to compute the memory used by the executor threads and on-heap cache.
// Assuming the used memory is equally divided among all executors.
usedMemory = isLlap ? usedMemory / numExecutors : usedMemory;
rate = (float) usedMemory / (float) maxMemory;
if (rate > memoryThreshold) {
return (!isTez || numEntriesHashTable != 0);
}
for (Integer pos : keyPositionsSize) {
Object key = newKeys.getKeyArray()[pos.intValue()];
// Ignore nulls
if (key != null) {
if (key instanceof LazyString) {
totalVariableSize += ((LazyPrimitive<LazyStringObjectInspector, Text>) key).getWritableObject().getLength();
} else if (key instanceof String) {
totalVariableSize += ((String) key).length();
} else if (key instanceof Text) {
totalVariableSize += ((Text) key).getLength();
} else if (key instanceof LazyBinary) {
totalVariableSize += ((LazyPrimitive<LazyBinaryObjectInspector, BytesWritable>) key).getWritableObject().getLength();
} else if (key instanceof BytesWritable) {
totalVariableSize += ((BytesWritable) key).getLength();
} else if (key instanceof ByteArrayRef) {
totalVariableSize += ((ByteArrayRef) key).getData().length;
}
}
}
AggregationBuffer[] aggs = hashAggregations.get(newKeys);
for (int i = 0; i < aggs.length; i++) {
AggregationBuffer agg = aggs[i];
if (estimableAggregationEvaluators[i]) {
totalVariableSize += ((GenericUDAFEvaluator.AbstractAggregationBuffer) agg).estimate();
continue;
}
if (aggrPositions[i] != null) {
totalVariableSize += estimateSize(agg, aggrPositions[i]);
}
}
numEntriesVarSize++;
// Update the number of entries that can fit in the hash table
numEntriesHashTable = (int) (maxHashTblMemory / (fixedRowSize + (totalVariableSize / numEntriesVarSize)));
LOG.trace("Hash Aggr: #hash table = {} #max in hash table = {}", numEntries, numEntriesHashTable);
}
// flush if necessary
return (numEntries >= numEntriesHashTable);
}
use of org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector in project hive by apache.
the class TestLazyBinarySerDe method testLazyBinaryObjectInspector.
/**
* Test to see if byte[] with correct contents is generated by
* LazyBinaryObjectInspector from input BytesWritable
* @throws Throwable
*/
@Test
public void testLazyBinaryObjectInspector() throws Throwable {
// create input ByteArrayRef
ByteArrayRef inpBARef = new ByteArrayRef();
inpBARef.setData(inpBArray);
AbstractPrimitiveLazyObjectInspector<?> binInspector = LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.binaryTypeInfo, false, (byte) 0);
// create LazyBinary initialed with inputBA
LazyBinary lazyBin = (LazyBinary) LazyFactory.createLazyObject(binInspector);
lazyBin.init(inpBARef, 0, inpBArray.length);
// use inspector to get a byte[] out of LazyBinary
byte[] outBARef = (byte[]) binInspector.getPrimitiveJavaObject(lazyBin);
assertTrue("compare input and output BAs", Arrays.equals(inpBArray, outBARef));
}
Aggregations