use of org.apache.tez.runtime.library.api.KeyValueReader in project hive by apache.
the class ReduceRecordSource method init.
void init(JobConf jconf, Operator<?> reducer, boolean vectorized, TableDesc keyTableDesc, TableDesc valueTableDesc, Reader reader, boolean handleGroupKey, byte tag, VectorizedRowBatchCtx batchContext, long vectorizedVertexNum) throws Exception {
this.vectorizedVertexNum = vectorizedVertexNum;
ObjectInspector keyObjectInspector;
this.reducer = reducer;
this.vectorized = vectorized;
this.keyTableDesc = keyTableDesc;
if (reader instanceof KeyValueReader) {
this.reader = new KeyValuesFromKeyValue((KeyValueReader) reader);
} else {
this.reader = new KeyValuesFromKeyValues((KeyValuesReader) reader);
this.handleGroupKey = handleGroupKey;
this.tag = tag;
try {
inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
keyObjectInspector = inputKeyDeserializer.getObjectInspector();
if (vectorized) {
keyStructInspector = (StructObjectInspector) keyObjectInspector;
firstValueColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
// We should initialize the SerDe with the TypeInfo when available.
this.valueTableDesc = valueTableDesc;
inputValueDeserializer = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputValueDeserializer, null, valueTableDesc.getProperties(), null);
valueObjectInspector = inputValueDeserializer.getObjectInspector();
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
if (vectorized) {
/* vectorization only works with struct object inspectors */
valueStructInspectors = (StructObjectInspector) valueObjectInspector;
final int totalColumns = firstValueColumnOffset + valueStructInspectors.getAllStructFieldRefs().size();
valueStringWriters = new ArrayList<VectorExpressionWriter>(totalColumns);
rowObjectInspector = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspectors);
batch = batchContext.createVectorizedRowBatch();
// Setup vectorized deserialization for the key and value.
BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeyDeserializer;
keyBinarySortableDeserializeToRow = new VectorDeserializeRow<BinarySortableDeserializeRead>(new BinarySortableDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(keyStructInspector), /* useExternalBuffer */
true, binarySortableSerDe.getSortOrders()));
final int valuesSize = valueStructInspectors.getAllStructFieldRefs().size();
if (valuesSize > 0) {
valueLazyBinaryDeserializeToRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(valueStructInspectors), /* useExternalBuffer */
// Create data buffers for value bytes column vectors.
for (int i = firstValueColumnOffset; i < batch.numCols; i++) {
ColumnVector colVector = batch.cols[i];
if (colVector instanceof BytesColumnVector) {
BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
} else {
rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
} catch (Throwable e) {
abort = true;
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else {
throw new RuntimeException("Reduce operator initialization failed", e);
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
use of org.apache.tez.runtime.library.api.KeyValueReader in project hive by apache.
the class DynamicValueRegistryTez method init.
public void init(RegistryConf conf) throws Exception {
RegistryConfTez rct = (RegistryConfTez) conf;
for (String inputSourceName : rct.baseWork.getInputSourceToRuntimeValuesInfo().keySet()) {"Runtime value source: " + inputSourceName);
LogicalInput runtimeValueInput = rct.inputs.get(inputSourceName);
RuntimeValuesInfo runtimeValuesInfo = rct.baseWork.getInputSourceToRuntimeValuesInfo().get(inputSourceName);
// Setup deserializer/obj inspectors for the incoming data source
Deserializer deserializer = ReflectionUtils.newInstance(runtimeValuesInfo.getTableDesc().getDeserializerClass(), null);
deserializer.initialize(rct.conf, runtimeValuesInfo.getTableDesc().getProperties());
ObjectInspector inspector = deserializer.getObjectInspector();
// Set up col expressions for the dynamic values using this input
List<ExprNodeEvaluator> colExprEvaluators = new ArrayList<ExprNodeEvaluator>();
for (ExprNodeDesc expr : runtimeValuesInfo.getColExprs()) {
ExprNodeEvaluator exprEval = ExprNodeEvaluatorFactory.get(expr, null);
List<Input> inputList = new ArrayList<Input>();
KeyValueReader kvReader = (KeyValueReader) runtimeValueInput.getReader();
long rowCount = 0;
while ( {
Object row = deserializer.deserialize((Writable) kvReader.getCurrentValue());
for (int colIdx = 0; colIdx < colExprEvaluators.size(); ++colIdx) {
// Read each expression and save it to the value registry
ExprNodeEvaluator eval = colExprEvaluators.get(colIdx);
Object val = eval.evaluate(row);
setValue(runtimeValuesInfo.getDynamicValueIDs().get(colIdx), val);
// For now, expecting a single row (min/max, aggregated bloom filter), or no rows
if (rowCount == 0) {
LOG.debug("No input rows from " + inputSourceName + ", filling dynamic values with nulls");
for (int colIdx = 0; colIdx < colExprEvaluators.size(); ++colIdx) {
ExprNodeEvaluator eval = colExprEvaluators.get(colIdx);
setValue(runtimeValuesInfo.getDynamicValueIDs().get(colIdx), null);
} else if (rowCount > 1) {
throw new IllegalStateException("Expected 0 or 1 rows from " + inputSourceName + ", got " + rowCount);
use of org.apache.tez.runtime.library.api.KeyValueReader in project hive by apache.
the class HashTableLoader method load.
public void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
Map<Integer, String> parentToInput = desc.getParentToInput();
Map<Integer, Long> parentKeyCounts = desc.getParentKeyCounts();
boolean isCrossProduct = false;
List<ExprNodeDesc> joinExprs = desc.getKeys().values().iterator().next();
if (joinExprs.size() == 0) {
isCrossProduct = true;
boolean useOptimizedTables = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
boolean useHybridGraceHashJoin = desc.isHybridHashJoin();
boolean isFirstKey = true;
// Get the total available memory from memory manager
long totalMapJoinMemory = desc.getMemoryNeeded();"Memory manager allocates " + totalMapJoinMemory + " bytes for the loading hashtable.");
if (totalMapJoinMemory <= 0) {
totalMapJoinMemory = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
long processMaxMemory = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax();
if (totalMapJoinMemory > processMaxMemory) {
float hashtableMemoryUsage = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
LOG.warn("totalMapJoinMemory value of " + totalMapJoinMemory + " is greater than the max memory size of " + processMaxMemory);
// Don't want to attempt to grab more memory than we have available .. percentage is a bit arbitrary
totalMapJoinMemory = (long) (processMaxMemory * hashtableMemoryUsage);
// Only applicable to n-way Hybrid Grace Hash Join
HybridHashTableConf nwayConf = null;
long totalSize = 0;
// position of the biggest small table
int biggest = 0;
Map<Integer, Long> tableMemorySizes = null;
if (useHybridGraceHashJoin && mapJoinTables.length > 2) {
// Create a Conf for n-way HybridHashTableContainers
nwayConf = new HybridHashTableConf();"N-way join: " + (mapJoinTables.length - 1) + " small tables.");
// Find the biggest small table; also calculate total data size of all small tables
// the size of the biggest small table
long maxSize = Long.MIN_VALUE;
for (int pos = 0; pos < mapJoinTables.length; pos++) {
if (pos == desc.getPosBigTable()) {
long smallTableSize = desc.getParentDataSizes().get(pos);
totalSize += smallTableSize;
if (maxSize < smallTableSize) {
maxSize = smallTableSize;
biggest = pos;
tableMemorySizes = divideHybridHashTableMemory(mapJoinTables, desc, totalSize, totalMapJoinMemory);
// Using biggest small table, calculate number of partitions to create for each small table
long memory = tableMemorySizes.get(biggest);
int numPartitions = 0;
try {
numPartitions = HybridHashTableContainer.calcNumPartitions(memory, maxSize, HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS), HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE));
} catch (IOException e) {
throw new HiveException(e);
MemoryMonitorInfo memoryMonitorInfo = desc.getMemoryMonitorInfo();
boolean doMemCheck = false;
long effectiveThreshold = 0;
if (memoryMonitorInfo != null) {
effectiveThreshold = memoryMonitorInfo.getEffectiveThreshold(desc.getMaxMemoryAvailable());
// Flip the flag at runtime in case if we are running outside of LLAP
if (!LlapDaemonInfo.INSTANCE.isLlap()) {
if (memoryMonitorInfo.doMemoryMonitoring()) {
doMemCheck = true;
if (LOG.isInfoEnabled()) {"Memory monitoring for hash table loader enabled. {}", memoryMonitorInfo);
if (!doMemCheck) {
if (LOG.isInfoEnabled()) {"Not doing hash table memory monitoring. {}", memoryMonitorInfo);
for (int pos = 0; pos < mapJoinTables.length; pos++) {
if (pos == desc.getPosBigTable()) {
long numEntries = 0;
String inputName = parentToInput.get(pos);
LogicalInput input = tezContext.getInput(inputName);
try {
} catch (Exception e) {
throw new HiveException(e);
try {
KeyValueReader kvReader = (KeyValueReader) input.getReader();
MapJoinObjectSerDeContext keyCtx = mapJoinTableSerdes[pos].getKeyContext(), valCtx = mapJoinTableSerdes[pos].getValueContext();
if (useOptimizedTables) {
ObjectInspector keyOi = keyCtx.getSerDe().getObjectInspector();
if (!MapJoinBytesTableContainer.isSupportedKey(keyOi)) {
if (isFirstKey) {
useOptimizedTables = false;"Not using optimized hash table. " + "Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
} else {
throw new HiveException(describeOi("Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
isFirstKey = false;
Long keyCountObj = parentKeyCounts.get(pos);
long keyCount = (keyCountObj == null) ? -1 : keyCountObj.longValue();
long memory = 0;
if (useHybridGraceHashJoin) {
if (mapJoinTables.length > 2) {
memory = tableMemorySizes.get(pos);
} else {
// binary join
memory = totalMapJoinMemory;
MapJoinTableContainer tableContainer;
if (useOptimizedTables) {
if (!useHybridGraceHashJoin || isCrossProduct) {
tableContainer = new MapJoinBytesTableContainer(hconf, valCtx, keyCount, 0);
} else {
tableContainer = new HybridHashTableContainer(hconf, keyCount, memory, desc.getParentDataSizes().get(pos), nwayConf);
} else {
tableContainer = new HashMapWrapper(hconf, keyCount);
}"Loading hash table for input: {} cacheKey: {} tableContainer: {} smallTablePos: {}", inputName, cacheKey, tableContainer.getClass().getSimpleName(), pos);
tableContainer.setSerde(keyCtx, valCtx);
while ( {
tableContainer.putRow((Writable) kvReader.getCurrentKey(), (Writable) kvReader.getCurrentValue());
if (doMemCheck && (numEntries % memoryMonitorInfo.getMemoryCheckInterval() == 0)) {
final long estMemUsage = tableContainer.getEstimatedMemorySize();
if (estMemUsage > effectiveThreshold) {
String msg = "Hash table loading exceeded memory limits for input: " + inputName + " numEntries: " + numEntries + " estimatedMemoryUsage: " + estMemUsage + " effectiveThreshold: " + effectiveThreshold + " memoryMonitorInfo: " + memoryMonitorInfo;
throw new MapJoinMemoryExhaustionError(msg);
} else {
if (LOG.isInfoEnabled()) {"Checking hash table loader memory usage for input: {} numEntries: {} " + "estimatedMemoryUsage: {} effectiveThreshold: {}", inputName, numEntries, estMemUsage, effectiveThreshold);
mapJoinTables[pos] = tableContainer;
if (doMemCheck) {"Finished loading hash table for input: {} cacheKey: {} numEntries: {} estimatedMemoryUsage: {}", inputName, cacheKey, numEntries, tableContainer.getEstimatedMemorySize());
} else {"Finished loading hash table for input: {} cacheKey: {} numEntries: {}", inputName, cacheKey, numEntries);
} catch (Exception e) {
throw new HiveException(e);