use of org.apache.tez.runtime.api.Input in project hive by apache.
the class MergeFileRecordProcessor method getMRInput.
private MRInputLegacy getMRInput(Map<String, LogicalInput> inputs) throws Exception {
LOG.info("The inputs are: " + inputs);
// start the mr input and wait for ready event. number of MRInput is expected to be 1
List<Input> li = Lists.newArrayList();
int numMRInputs = 0;
for (LogicalInput inp : inputs.values()) {
if (inp instanceof MRInputLegacy) {
numMRInputs++;
if (numMRInputs > 1) {
throw new IllegalArgumentException("Only one MRInput is expected");
}
inp.start();
li.add(inp);
} else {
throw new IllegalArgumentException("Expecting only one input of type MRInputLegacy." + " Found type: " + inp.getClass().getCanonicalName());
}
}
// typically alter table .. concatenate is run on only one partition/one table,
// so it doesn't matter if we wait for all inputs or any input to be ready.
processorContext.waitForAnyInputReady(li);
final MRInputLegacy theMRInput;
if (li.size() == 1) {
theMRInput = (MRInputLegacy) li.get(0);
theMRInput.init();
} else {
throw new IllegalArgumentException("MRInputs count is expected to be 1");
}
return theMRInput;
}
use of org.apache.tez.runtime.api.Input in project hive by apache.
the class ReduceRecordProcessor method init.
@Override
void init(MRTaskReporter mrReporter, Map<String, LogicalInput> inputs, Map<String, LogicalOutput> outputs) throws Exception {
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
super.init(mrReporter, inputs, outputs);
MapredContext.init(false, new JobConf(jconf));
List<LogicalInput> shuffleInputs = getShuffleInputs(inputs);
// TODO HIVE-14042. Move to using a loop and a timed wait once TEZ-3302 is fixed.
checkAbortCondition();
if (shuffleInputs != null) {
LOG.info("Waiting for ShuffleInputs to become ready");
processorContext.waitForAllInputsReady(new ArrayList<Input>(shuffleInputs));
}
connectOps.clear();
ReduceWork redWork = reduceWork;
LOG.info("Main work is " + reduceWork.getName());
List<HashTableDummyOperator> workOps = reduceWork.getDummyOps();
Set<HashTableDummyOperator> dummyOps = workOps == null ? new HashSet<>() : new HashSet<>(workOps);
tagToReducerMap.put(redWork.getTag(), redWork);
if (mergeWorkList != null) {
for (BaseWork mergeWork : mergeWorkList) {
LOG.debug("Additional work {}", mergeWork.getName());
workOps = mergeWork.getDummyOps();
if (workOps != null) {
dummyOps.addAll(workOps);
}
ReduceWork mergeReduceWork = (ReduceWork) mergeWork;
reducer = mergeReduceWork.getReducer();
// Check immediately after reducer is assigned, in cae the abort came in during
checkAbortCondition();
DummyStoreOperator dummyStoreOp = getJoinParentOp(reducer);
connectOps.put(mergeReduceWork.getTag(), dummyStoreOp);
tagToReducerMap.put(mergeReduceWork.getTag(), mergeReduceWork);
}
((TezContext) MapredContext.get()).setDummyOpsMap(connectOps);
}
checkAbortCondition();
bigTablePosition = (byte) reduceWork.getTag();
ObjectInspector[] mainWorkOIs = null;
((TezContext) MapredContext.get()).setInputs(inputs);
((TezContext) MapredContext.get()).setTezProcessorContext(processorContext);
int numTags = reduceWork.getTagToValueDesc().size();
reducer = reduceWork.getReducer();
// Check immediately after reducer is assigned, in cae the abort came in during
checkAbortCondition();
// set memory available for operators
long memoryAvailableToTask = processorContext.getTotalMemoryAvailableToTask();
if (reducer.getConf() != null) {
reducer.getConf().setMaxMemoryAvailable(memoryAvailableToTask);
LOG.info("Memory available for operators set to {}", LlapUtil.humanReadableByteCount(memoryAvailableToTask));
}
OperatorUtils.setMemoryAvailable(reducer.getChildOperators(), memoryAvailableToTask);
// Setup values registry
String valueRegistryKey = DynamicValue.DYNAMIC_VALUE_REGISTRY_CACHE_KEY;
DynamicValueRegistryTez registryTez = dynamicValueCache.retrieve(valueRegistryKey, () -> new DynamicValueRegistryTez());
dynamicValueCacheKeys.add(valueRegistryKey);
RegistryConfTez registryConf = new RegistryConfTez(jconf, reduceWork, processorContext, inputs);
registryTez.init(registryConf);
checkAbortCondition();
if (numTags > 1) {
sources = new ReduceRecordSource[numTags];
mainWorkOIs = new ObjectInspector[numTags];
initializeMultipleSources(reduceWork, numTags, mainWorkOIs, sources);
((TezContext) MapredContext.get()).setRecordSources(sources);
reducer.initialize(jconf, mainWorkOIs);
} else {
numTags = tagToReducerMap.keySet().size();
sources = new ReduceRecordSource[numTags];
mainWorkOIs = new ObjectInspector[numTags];
for (int i : tagToReducerMap.keySet()) {
redWork = tagToReducerMap.get(i);
reducer = redWork.getReducer();
// Check immediately after reducer is assigned, in cae the abort came in during
checkAbortCondition();
initializeSourceForTag(redWork, i, mainWorkOIs, sources, redWork.getTagToValueDesc().get(0), redWork.getTagToInput().get(0));
reducer.initializeLocalWork(jconf);
}
reducer = reduceWork.getReducer();
// Check immediately after reducer is assigned, in cae the abort came in during
checkAbortCondition();
((TezContext) MapredContext.get()).setRecordSources(sources);
reducer.initialize(jconf, new ObjectInspector[] { mainWorkOIs[bigTablePosition] });
for (int i : tagToReducerMap.keySet()) {
if (i == bigTablePosition) {
continue;
}
redWork = tagToReducerMap.get(i);
reducer = redWork.getReducer();
// Check immediately after reducer is assigned, in cae the abort came in during
checkAbortCondition();
reducer.initialize(jconf, new ObjectInspector[] { mainWorkOIs[i] });
}
}
checkAbortCondition();
reducer = reduceWork.getReducer();
// initialize reduce operator tree
try {
LOG.info(reducer.dump(0));
// dummy parent operators as well.
for (HashTableDummyOperator dummyOp : dummyOps) {
// TODO HIVE-14042. Propagating abort to dummyOps.
dummyOp.initialize(jconf, null);
checkAbortCondition();
}
// set output collector for any reduce sink operators in the pipeline.
List<Operator<?>> children = new ArrayList<>();
children.add(reducer);
children.addAll(dummyOps);
createOutputMap();
OperatorUtils.setChildrenCollector(children, outMap);
checkAbortCondition();
reducer.setReporter(reporter);
MapredContext.get().setReporter(reporter);
} catch (Throwable e) {
super.setAborted(true);
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else if (e instanceof InterruptedException) {
LOG.info("Hit an interrupt while initializing ReduceRecordProcessor. Message={}", e.getMessage());
throw (InterruptedException) e;
} else {
throw new RuntimeException(redWork.getName() + " operator initialization failed", e);
}
}
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
}
use of org.apache.tez.runtime.api.Input in project hive by apache.
the class MapRecordProcessor method getMRInput.
private MRInputLegacy getMRInput(Map<String, LogicalInput> inputs) throws Exception {
// there should be only one MRInput
MRInputLegacy theMRInput = null;
// start all mr/multi-mr inputs
Set<Input> li = new HashSet<>();
for (LogicalInput inp : inputs.values()) {
if (inp instanceof MRInputLegacy || inp instanceof MultiMRInput) {
inp.start();
li.add(inp);
}
}
// TODO: HIVE-14042. Potential blocking call. MRInput handles this correctly even if an interrupt is swallowed.
// MultiMRInput may not. Fix once TEZ-3302 is resolved.
processorContext.waitForAllInputsReady(li);
LOG.info("The input names are: {}", String.join(",", inputs.keySet()));
for (Entry<String, LogicalInput> inp : inputs.entrySet()) {
if (inp.getValue() instanceof MRInputLegacy) {
if (theMRInput != null) {
throw new IllegalArgumentException("Only one MRInput is expected");
}
// a better logic would be to find the alias
theMRInput = (MRInputLegacy) inp.getValue();
} else if (inp.getValue() instanceof MultiMRInput) {
multiMRInputMap.put(inp.getKey(), (MultiMRInput) inp.getValue());
}
}
if (theMRInput != null) {
theMRInput.init();
} else {
String alias = mapWork.getAliasToWork().keySet().iterator().next();
if (inputs.get(alias) instanceof MultiMRInput) {
mainWorkMultiMRInput = (MultiMRInput) inputs.get(alias);
} else {
throw new IOException("Unexpected input type found: " + inputs.get(alias).getClass().getCanonicalName());
}
}
return theMRInput;
}
use of org.apache.tez.runtime.api.Input in project tez by apache.
the class LogicalIOProcessorRuntimeTask method createInput.
private LogicalInput createInput(InputSpec inputSpec, InputContext inputContext) throws TezException {
InputDescriptor inputDesc = inputSpec.getInputDescriptor();
Input input = ReflectionUtils.createClazzInstance(inputDesc.getClassName(), new Class[] { InputContext.class, Integer.TYPE }, new Object[] { inputContext, inputSpec.getPhysicalEdgeCount() });
if (!(input instanceof LogicalInput)) {
throw new TezUncheckedException(inputDesc.getClass().getName() + " is not a sub-type of LogicalInput." + " Only LogicalInput sub-types supported by LogicalIOProcessor.");
}
return (LogicalInput) input;
}
use of org.apache.tez.runtime.api.Input in project tez by apache.
the class TestInputReadyTracker method testGrouped.
@Test(timeout = 20000)
public void testGrouped() throws InterruptedException {
InputReadyTracker inputReadyTracker = new InputReadyTracker();
ImmediatelyReadyInputForTest input1 = new ImmediatelyReadyInputForTest(inputReadyTracker);
ControlledReadyInputForTest input2 = new ControlledReadyInputForTest(inputReadyTracker);
ImmediatelyReadyInputForTest input3 = new ImmediatelyReadyInputForTest(inputReadyTracker);
ControlledReadyInputForTest input4 = new ControlledReadyInputForTest(inputReadyTracker);
List<Input> group1Inputs = new ArrayList<Input>();
group1Inputs.add(input1);
group1Inputs.add(input2);
List<Input> group2Inputs = new ArrayList<Input>();
group2Inputs.add(input3);
group2Inputs.add(input4);
Map<String, MergedLogicalInput> mergedInputMap = new HashMap<String, MergedLogicalInput>();
MergedInputContext mergedInputContext1 = new TezMergedInputContextImpl(null, "group1", mergedInputMap, inputReadyTracker, null, null);
MergedInputContext mergedInputContext2 = new TezMergedInputContextImpl(null, "group2", mergedInputMap, inputReadyTracker, null, null);
AnyOneMergedInputForTest group1 = new AnyOneMergedInputForTest(mergedInputContext1, group1Inputs);
AllMergedInputForTest group2 = new AllMergedInputForTest(mergedInputContext2, group2Inputs);
mergedInputMap.put("group1", group1);
mergedInputMap.put("group2", group2);
// Register groups with tracker
List<MergedLogicalInput> groups = Lists.newArrayList(group1, group2);
inputReadyTracker.setGroupedInputs(groups);
// Test for simple inputs
List<Input> requestList;
long startTime = 0l;
long readyTime = 0l;
requestList = new ArrayList<Input>();
requestList.add(group1);
Input readyInput = inputReadyTracker.waitForAnyInputReady(requestList);
assertTrue(group1.isReady);
assertTrue(input1.isReady);
assertFalse(input2.isReady);
assertEquals(group1, readyInput);
requestList = new ArrayList<Input>();
requestList.add(group2);
startTime = System.nanoTime();
setDelayedInputReady(input4);
inputReadyTracker.waitForAllInputsReady(requestList);
readyTime = System.nanoTime();
// Should have moved into ready state - only happens when the setReady function is invoked.
// Ensure the method returned only after the specific Input was told it is ready
assertTrue(group2.isReady);
assertTrue(input3.isReady);
assertTrue(input4.isReady);
assertTrue(readyTime >= startTime + SLEEP_TIME);
}
Aggregations