use of org.apache.tez.runtime.api.LogicalInput in project hive by apache.
the class VectorMapJoinFastHashTableLoader method load.
@Override
public void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
Map<Integer, String> parentToInput = desc.getParentToInput();
Map<Integer, Long> parentKeyCounts = desc.getParentKeyCounts();
for (int pos = 0; pos < mapJoinTables.length; pos++) {
if (pos == desc.getPosBigTable()) {
continue;
}
String inputName = parentToInput.get(pos);
LogicalInput input = tezContext.getInput(inputName);
try {
input.start();
tezContext.getTezProcessorContext().waitForAnyInputReady(Collections.<Input>singletonList(input));
} catch (Exception e) {
throw new HiveException(e);
}
try {
KeyValueReader kvReader = (KeyValueReader) input.getReader();
Long keyCountObj = parentKeyCounts.get(pos);
long keyCount = (keyCountObj == null) ? -1 : keyCountObj.longValue();
VectorMapJoinFastTableContainer vectorMapJoinFastTableContainer = new VectorMapJoinFastTableContainer(desc, hconf, keyCount);
// No SerDes here.
vectorMapJoinFastTableContainer.setSerde(null, null);
while (kvReader.next()) {
vectorMapJoinFastTableContainer.putRow((BytesWritable) kvReader.getCurrentKey(), (BytesWritable) kvReader.getCurrentValue());
}
vectorMapJoinFastTableContainer.seal();
mapJoinTables[pos] = (MapJoinTableContainer) vectorMapJoinFastTableContainer;
} catch (IOException e) {
throw new HiveException(e);
} catch (SerDeException e) {
throw new HiveException(e);
} catch (Exception e) {
throw new HiveException(e);
}
}
}
use of org.apache.tez.runtime.api.LogicalInput in project hive by apache.
the class ReduceRecordProcessor method init.
@Override
void init(MRTaskReporter mrReporter, Map<String, LogicalInput> inputs, Map<String, LogicalOutput> outputs) throws Exception {
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
super.init(mrReporter, inputs, outputs);
MapredContext.init(false, new JobConf(jconf));
List<LogicalInput> shuffleInputs = getShuffleInputs(inputs);
// TODO HIVE-14042. Move to using a loop and a timed wait once TEZ-3302 is fixed.
checkAbortCondition();
if (shuffleInputs != null) {
l4j.info("Waiting for ShuffleInputs to become ready");
processorContext.waitForAllInputsReady(new ArrayList<Input>(shuffleInputs));
}
connectOps.clear();
ReduceWork redWork = reduceWork;
l4j.info("Main work is " + reduceWork.getName());
List<HashTableDummyOperator> workOps = reduceWork.getDummyOps();
HashSet<HashTableDummyOperator> dummyOps = workOps == null ? null : new HashSet<>(workOps);
tagToReducerMap.put(redWork.getTag(), redWork);
if (mergeWorkList != null) {
for (BaseWork mergeWork : mergeWorkList) {
if (l4j.isDebugEnabled()) {
l4j.debug("Additional work " + mergeWork.getName());
}
workOps = mergeWork.getDummyOps();
if (workOps != null) {
if (dummyOps == null) {
dummyOps = new HashSet<>(workOps);
} else {
dummyOps.addAll(workOps);
}
}
ReduceWork mergeReduceWork = (ReduceWork) mergeWork;
reducer = mergeReduceWork.getReducer();
// Check immediately after reducer is assigned, in cae the abort came in during
checkAbortCondition();
DummyStoreOperator dummyStoreOp = getJoinParentOp(reducer);
connectOps.put(mergeReduceWork.getTag(), dummyStoreOp);
tagToReducerMap.put(mergeReduceWork.getTag(), mergeReduceWork);
}
((TezContext) MapredContext.get()).setDummyOpsMap(connectOps);
}
checkAbortCondition();
bigTablePosition = (byte) reduceWork.getTag();
ObjectInspector[] mainWorkOIs = null;
((TezContext) MapredContext.get()).setInputs(inputs);
((TezContext) MapredContext.get()).setTezProcessorContext(processorContext);
int numTags = reduceWork.getTagToValueDesc().size();
reducer = reduceWork.getReducer();
// Check immediately after reducer is assigned, in cae the abort came in during
checkAbortCondition();
// set memory available for operators
long memoryAvailableToTask = processorContext.getTotalMemoryAvailableToTask();
if (reducer.getConf() != null) {
reducer.getConf().setMaxMemoryAvailable(memoryAvailableToTask);
l4j.info("Memory available for operators set to {}", LlapUtil.humanReadableByteCount(memoryAvailableToTask));
}
OperatorUtils.setMemoryAvailable(reducer.getChildOperators(), memoryAvailableToTask);
// Setup values registry
String valueRegistryKey = DynamicValue.DYNAMIC_VALUE_REGISTRY_CACHE_KEY;
DynamicValueRegistryTez registryTez = dynamicValueCache.retrieve(valueRegistryKey, new Callable<DynamicValueRegistryTez>() {
@Override
public DynamicValueRegistryTez call() {
return new DynamicValueRegistryTez();
}
});
dynamicValueCacheKeys.add(valueRegistryKey);
RegistryConfTez registryConf = new RegistryConfTez(jconf, reduceWork, processorContext, inputs);
registryTez.init(registryConf);
checkAbortCondition();
if (numTags > 1) {
sources = new ReduceRecordSource[numTags];
mainWorkOIs = new ObjectInspector[numTags];
initializeMultipleSources(reduceWork, numTags, mainWorkOIs, sources);
((TezContext) MapredContext.get()).setRecordSources(sources);
reducer.initialize(jconf, mainWorkOIs);
} else {
numTags = tagToReducerMap.keySet().size();
sources = new ReduceRecordSource[numTags];
mainWorkOIs = new ObjectInspector[numTags];
for (int i : tagToReducerMap.keySet()) {
redWork = tagToReducerMap.get(i);
reducer = redWork.getReducer();
// Check immediately after reducer is assigned, in cae the abort came in during
checkAbortCondition();
initializeSourceForTag(redWork, i, mainWorkOIs, sources, redWork.getTagToValueDesc().get(0), redWork.getTagToInput().get(0));
reducer.initializeLocalWork(jconf);
}
reducer = reduceWork.getReducer();
// Check immediately after reducer is assigned, in cae the abort came in during
checkAbortCondition();
((TezContext) MapredContext.get()).setRecordSources(sources);
reducer.initialize(jconf, new ObjectInspector[] { mainWorkOIs[bigTablePosition] });
for (int i : tagToReducerMap.keySet()) {
if (i == bigTablePosition) {
continue;
}
redWork = tagToReducerMap.get(i);
reducer = redWork.getReducer();
// Check immediately after reducer is assigned, in cae the abort came in during
checkAbortCondition();
reducer.initialize(jconf, new ObjectInspector[] { mainWorkOIs[i] });
}
}
checkAbortCondition();
reducer = reduceWork.getReducer();
// initialize reduce operator tree
try {
l4j.info(reducer.dump(0));
// dummy parent operators as well.
if (dummyOps != null) {
for (HashTableDummyOperator dummyOp : dummyOps) {
// TODO HIVE-14042. Propagating abort to dummyOps.
dummyOp.initialize(jconf, null);
checkAbortCondition();
}
}
// set output collector for any reduce sink operators in the pipeline.
List<Operator<?>> children = new LinkedList<Operator<?>>();
children.add(reducer);
if (dummyOps != null) {
children.addAll(dummyOps);
}
createOutputMap();
OperatorUtils.setChildrenCollector(children, outMap);
checkAbortCondition();
reducer.setReporter(reporter);
MapredContext.get().setReporter(reporter);
} catch (Throwable e) {
super.setAborted(true);
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else if (e instanceof InterruptedException) {
l4j.info("Hit an interrupt while initializing ReduceRecordProcessor. Message={}", e.getMessage());
throw (InterruptedException) e;
} else {
throw new RuntimeException("Reduce operator initialization failed", e);
}
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
}
use of org.apache.tez.runtime.api.LogicalInput in project hive by apache.
the class MapRecordProcessor method getMRInput.
private MRInputLegacy getMRInput(Map<String, LogicalInput> inputs) throws Exception {
// there should be only one MRInput
MRInputLegacy theMRInput = null;
// start all mr/multi-mr inputs
Set<Input> li = new HashSet<Input>();
for (LogicalInput inp : inputs.values()) {
if (inp instanceof MRInputLegacy || inp instanceof MultiMRInput) {
inp.start();
li.add(inp);
}
}
// TODO: HIVE-14042. Potential blocking call. MRInput handles this correctly even if an interrupt is swallowed.
// MultiMRInput may not. Fix once TEZ-3302 is resolved.
processorContext.waitForAllInputsReady(li);
l4j.info("The input names are: " + Arrays.toString(inputs.keySet().toArray()));
for (Entry<String, LogicalInput> inp : inputs.entrySet()) {
if (inp.getValue() instanceof MRInputLegacy) {
if (theMRInput != null) {
throw new IllegalArgumentException("Only one MRInput is expected");
}
// a better logic would be to find the alias
theMRInput = (MRInputLegacy) inp.getValue();
} else if (inp.getValue() instanceof MultiMRInput) {
multiMRInputMap.put(inp.getKey(), (MultiMRInput) inp.getValue());
}
}
if (theMRInput != null) {
theMRInput.init();
} else {
String alias = mapWork.getAliasToWork().keySet().iterator().next();
if (inputs.get(alias) instanceof MultiMRInput) {
mainWorkMultiMRInput = (MultiMRInput) inputs.get(alias);
} else {
throw new IOException("Unexpected input type found: " + inputs.get(alias).getClass().getCanonicalName());
}
}
return theMRInput;
}
use of org.apache.tez.runtime.api.LogicalInput in project hive by apache.
the class MergeFileRecordProcessor method getMRInput.
private MRInputLegacy getMRInput(Map<String, LogicalInput> inputs) throws Exception {
LOG.info("The inputs are: " + inputs);
// start the mr input and wait for ready event. number of MRInput is expected to be 1
List<Input> li = Lists.newArrayList();
int numMRInputs = 0;
for (LogicalInput inp : inputs.values()) {
if (inp instanceof MRInputLegacy) {
numMRInputs++;
if (numMRInputs > 1) {
throw new IllegalArgumentException("Only one MRInput is expected");
}
inp.start();
li.add(inp);
} else {
throw new IllegalArgumentException("Expecting only one input of type MRInputLegacy." + " Found type: " + inp.getClass().getCanonicalName());
}
}
// typically alter table .. concatenate is run on only one partition/one table,
// so it doesn't matter if we wait for all inputs or any input to be ready.
processorContext.waitForAnyInputReady(li);
final MRInputLegacy theMRInput;
if (li.size() == 1) {
theMRInput = (MRInputLegacy) li.get(0);
theMRInput.init();
} else {
throw new IllegalArgumentException("MRInputs count is expected to be 1");
}
return theMRInput;
}
use of org.apache.tez.runtime.api.LogicalInput in project hive by apache.
the class DynamicValueRegistryTez method init.
@Override
public void init(RegistryConf conf) throws Exception {
RegistryConfTez rct = (RegistryConfTez) conf;
for (String inputSourceName : rct.baseWork.getInputSourceToRuntimeValuesInfo().keySet()) {
LOG.info("Runtime value source: " + inputSourceName);
LogicalInput runtimeValueInput = rct.inputs.get(inputSourceName);
RuntimeValuesInfo runtimeValuesInfo = rct.baseWork.getInputSourceToRuntimeValuesInfo().get(inputSourceName);
// Setup deserializer/obj inspectors for the incoming data source
Deserializer deserializer = ReflectionUtils.newInstance(runtimeValuesInfo.getTableDesc().getDeserializerClass(), null);
deserializer.initialize(rct.conf, runtimeValuesInfo.getTableDesc().getProperties());
ObjectInspector inspector = deserializer.getObjectInspector();
// Set up col expressions for the dynamic values using this input
List<ExprNodeEvaluator> colExprEvaluators = new ArrayList<ExprNodeEvaluator>();
for (ExprNodeDesc expr : runtimeValuesInfo.getColExprs()) {
ExprNodeEvaluator exprEval = ExprNodeEvaluatorFactory.get(expr, null);
exprEval.initialize(inspector);
colExprEvaluators.add(exprEval);
}
runtimeValueInput.start();
List<Input> inputList = new ArrayList<Input>();
inputList.add(runtimeValueInput);
rct.processorContext.waitForAllInputsReady(inputList);
KeyValueReader kvReader = (KeyValueReader) runtimeValueInput.getReader();
long rowCount = 0;
while (kvReader.next()) {
Object row = deserializer.deserialize((Writable) kvReader.getCurrentValue());
rowCount++;
for (int colIdx = 0; colIdx < colExprEvaluators.size(); ++colIdx) {
// Read each expression and save it to the value registry
ExprNodeEvaluator eval = colExprEvaluators.get(colIdx);
Object val = eval.evaluate(row);
setValue(runtimeValuesInfo.getDynamicValueIDs().get(colIdx), val);
}
}
// For now, expecting a single row (min/max, aggregated bloom filter), or no rows
if (rowCount == 0) {
LOG.debug("No input rows from " + inputSourceName + ", filling dynamic values with nulls");
for (int colIdx = 0; colIdx < colExprEvaluators.size(); ++colIdx) {
ExprNodeEvaluator eval = colExprEvaluators.get(colIdx);
setValue(runtimeValuesInfo.getDynamicValueIDs().get(colIdx), null);
}
} else if (rowCount > 1) {
throw new IllegalStateException("Expected 0 or 1 rows from " + inputSourceName + ", got " + rowCount);
}
}
}
Aggregations