use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class GenTezWork method process.
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException {
GenTezProcContext context = (GenTezProcContext) procContext;
assert context != null && context.currentTask != null && context.currentRootOperator != null;
// Operator is a file sink or reduce sink. Something that forces
// a new vertex.
Operator<?> operator = (Operator<?>) nd;
// root is the start of the operator pipeline we're currently
// packing into a vertex, typically a table scan, union or join
Operator<?> root = context.currentRootOperator;
LOG.debug("Root operator: " + root);
LOG.debug("Leaf operator: " + operator);
if (context.clonedReduceSinks.contains(operator)) {
// just skip and keep going
return null;
}
TezWork tezWork = context.currentTask.getWork();
// Right now the work graph is pretty simple. If there is no
// Preceding work we have a root and will generate a map
// vertex. If there is a preceding work we will generate
// a reduce vertex
BaseWork work;
if (context.rootToWorkMap.containsKey(root)) {
// will result into a vertex with multiple FS or RS operators.
if (context.childToWorkMap.containsKey(operator)) {
// if we've seen both root and child, we can bail.
// clear out the mapjoin set. we don't need it anymore.
context.currentMapJoinOperators.clear();
// clear out the union set. we don't need it anymore.
context.currentUnionOperators.clear();
return null;
} else {
// At this point we don't have to do anything special. Just
// run through the regular paces w/o creating a new task.
work = context.rootToWorkMap.get(root);
}
} else {
// create a new vertex
if (context.preceedingWork == null) {
work = utils.createMapWork(context, root, tezWork, null);
} else {
work = GenTezUtils.createReduceWork(context, root, tezWork);
}
context.rootToWorkMap.put(root, work);
}
// this is where we set the sort columns that we will be using for KeyValueInputMerge
if (operator instanceof DummyStoreOperator) {
work.addSortCols(root.getOpTraits().getSortCols().get(0));
}
if (!context.childToWorkMap.containsKey(operator)) {
List<BaseWork> workItems = new LinkedList<BaseWork>();
workItems.add(work);
context.childToWorkMap.put(operator, workItems);
} else {
context.childToWorkMap.get(operator).add(work);
}
// which can affect the working of all downstream transformations.
if (context.currentMergeJoinOperator != null) {
// we are currently walking the big table side of the merge join. we need to create or hook up
// merge join work.
MergeJoinWork mergeJoinWork = null;
if (context.opMergeJoinWorkMap.containsKey(context.currentMergeJoinOperator)) {
// we have found a merge work corresponding to this closing operator. Hook up this work.
mergeJoinWork = context.opMergeJoinWorkMap.get(context.currentMergeJoinOperator);
} else {
// we need to create the merge join work
mergeJoinWork = new MergeJoinWork();
mergeJoinWork.setMergeJoinOperator(context.currentMergeJoinOperator);
tezWork.add(mergeJoinWork);
context.opMergeJoinWorkMap.put(context.currentMergeJoinOperator, mergeJoinWork);
}
// connect the work correctly.
work.addSortCols(root.getOpTraits().getSortCols().get(0));
mergeJoinWork.addMergedWork(work, null, context.leafOperatorToFollowingWork);
Operator<? extends OperatorDesc> parentOp = getParentFromStack(context.currentMergeJoinOperator, stack);
// Set the big table position. Both the reduce work and merge join operator
// should be set with the same value.
// int pos = context.currentMergeJoinOperator.getTagForOperator(parentOp);
int pos = context.currentMergeJoinOperator.getConf().getBigTablePosition();
work.setTag(pos);
context.currentMergeJoinOperator.getConf().setBigTablePosition(pos);
tezWork.setVertexType(work, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);
for (BaseWork parentWork : tezWork.getParents(work)) {
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, work);
tezWork.disconnect(parentWork, work);
tezWork.connect(parentWork, mergeJoinWork, edgeProp);
}
for (BaseWork childWork : tezWork.getChildren(work)) {
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(work, childWork);
tezWork.disconnect(work, childWork);
tezWork.connect(mergeJoinWork, childWork, edgeProp);
}
tezWork.remove(work);
context.rootToWorkMap.put(root, mergeJoinWork);
context.childToWorkMap.get(operator).remove(work);
context.childToWorkMap.get(operator).add(mergeJoinWork);
work = mergeJoinWork;
context.currentMergeJoinOperator = null;
}
// remember which mapjoin operator links with which work
if (!context.currentMapJoinOperators.isEmpty()) {
for (MapJoinOperator mj : context.currentMapJoinOperators) {
// so we can later run the same logic that is run in ReduceSinkMapJoinProc.
if (mj.getConf().isDynamicPartitionHashJoin()) {
// Since this is a dynamic partitioned hash join, the work for this join should be a ReduceWork
ReduceWork reduceWork = (ReduceWork) work;
int bigTablePosition = mj.getConf().getPosBigTable();
reduceWork.setTag(bigTablePosition);
// Use context.mapJoinParentMap to get the original RS parents, because
// the MapJoin's parents may have been replaced by dummy operator.
List<Operator<?>> mapJoinOriginalParents = context.mapJoinParentMap.get(mj);
if (mapJoinOriginalParents == null) {
throw new SemanticException("Unexpected error - context.mapJoinParentMap did not have an entry for " + mj);
}
for (int pos = 0; pos < mapJoinOriginalParents.size(); ++pos) {
// This processing only needs to happen for the small tables
if (pos == bigTablePosition) {
continue;
}
Operator<?> parentOp = mapJoinOriginalParents.get(pos);
context.smallTableParentToMapJoinMap.put(parentOp, mj);
ReduceSinkOperator parentRS = (ReduceSinkOperator) parentOp;
// TableDesc needed for dynamic partitioned hash join
GenMapRedUtils.setKeyAndValueDesc(reduceWork, parentRS);
// has its ReduceSink parent removed.
if (!context.mapJoinToUnprocessedSmallTableReduceSinks.get(mj).contains(parentRS)) {
// This reduce sink has been processed already, so the work for the parentRS exists
BaseWork parentWork = ReduceSinkMapJoinProc.getMapJoinParentWork(context, parentRS);
int tag = parentRS.getConf().getTag();
tag = (tag == -1 ? 0 : tag);
reduceWork.getTagToInput().put(tag, parentWork.getName());
}
}
}
LOG.debug("Processing map join: " + mj);
// mapjoin later
if (!context.mapJoinWorkMap.containsKey(mj)) {
List<BaseWork> workItems = new LinkedList<BaseWork>();
workItems.add(work);
context.mapJoinWorkMap.put(mj, workItems);
} else {
context.mapJoinWorkMap.get(mj).add(work);
}
/*
* this happens in case of map join operations.
* The tree looks like this:
*
* RS <--- we are here perhaps
* |
* MapJoin
* / \
* RS TS
* /
* TS
*
* If we are at the RS pointed above, and we may have already visited the
* RS following the TS, we have already generated work for the TS-RS.
* We need to hook the current work to this generated work.
*/
if (context.linkOpWithWorkMap.containsKey(mj)) {
Map<BaseWork, TezEdgeProperty> linkWorkMap = context.linkOpWithWorkMap.get(mj);
if (linkWorkMap != null) {
// Note: it's not quite clear why this is done inside this if. Seems like it should be on the top level.
if (context.linkChildOpWithDummyOp.containsKey(mj)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Adding dummy ops to work: " + work.getName() + ": " + context.linkChildOpWithDummyOp.get(mj));
}
for (Operator<?> dummy : context.linkChildOpWithDummyOp.get(mj)) {
work.addDummyOp((HashTableDummyOperator) dummy);
}
}
for (Entry<BaseWork, TezEdgeProperty> parentWorkMap : linkWorkMap.entrySet()) {
BaseWork parentWork = parentWorkMap.getKey();
LOG.debug("connecting " + parentWork.getName() + " with " + work.getName());
TezEdgeProperty edgeProp = parentWorkMap.getValue();
tezWork.connect(parentWork, work, edgeProp);
if (edgeProp.getEdgeType() == EdgeType.CUSTOM_EDGE) {
tezWork.setVertexType(work, VertexType.INITIALIZED_EDGES);
}
// of the downstream work
for (ReduceSinkOperator r : context.linkWorkWithReduceSinkMap.get(parentWork)) {
if (r.getConf().getOutputName() != null) {
LOG.debug("Cloning reduce sink for multi-child broadcast edge");
// we've already set this one up. Need to clone for the next work.
r = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(r.getCompilationOpContext(), (ReduceSinkDesc) r.getConf().clone(), new RowSchema(r.getSchema()), r.getParentOperators());
context.clonedReduceSinks.add(r);
}
r.getConf().setOutputName(work.getName());
context.connectedReduceSinks.add(r);
}
}
}
}
}
// clear out the set. we don't need it anymore.
context.currentMapJoinOperators.clear();
}
// we might have to connect parent work with this work later.
for (Operator<?> parent : new ArrayList<Operator<?>>(root.getParentOperators())) {
if (LOG.isDebugEnabled()) {
LOG.debug("Removing " + parent + " as parent from " + root);
}
context.leafOperatorToFollowingWork.remove(parent);
context.leafOperatorToFollowingWork.put(parent, work);
root.removeParent(parent);
}
if (!context.currentUnionOperators.isEmpty()) {
// if there are union all operators, it means that the walking context contains union all operators.
// please see more details of context.currentUnionOperator in GenTezWorkWalker
UnionWork unionWork;
if (context.unionWorkMap.containsKey(operator)) {
// since we've passed this operator before.
assert operator.getChildOperators().isEmpty();
unionWork = (UnionWork) context.unionWorkMap.get(operator);
// finally connect the union work with work
connectUnionWorkWithWork(unionWork, work, tezWork, context);
} else {
// we've not seen this terminal before. we need to check
// rootUnionWorkMap which contains the information of mapping the root
// operator of a union work to a union work
unionWork = context.rootUnionWorkMap.get(root);
if (unionWork == null) {
// if unionWork is null, it means it is the first time. we need to
// create a union work object and add this work to it. Subsequent
// work should reference the union and not the actual work.
unionWork = GenTezUtils.createUnionWork(context, root, operator, tezWork);
// finally connect the union work with work
connectUnionWorkWithWork(unionWork, work, tezWork, context);
}
}
context.currentUnionOperators.clear();
work = unionWork;
}
// reasons. Roots are data sources, leaves are data sinks. I know.
if (context.leafOperatorToFollowingWork.containsKey(operator)) {
BaseWork followingWork = context.leafOperatorToFollowingWork.get(operator);
long bytesPerReducer = context.conf.getLongVar(HiveConf.ConfVars.BYTESPERREDUCER);
LOG.debug("Second pass. Leaf operator: " + operator + " has common downstream work:" + followingWork);
if (operator instanceof DummyStoreOperator) {
// this is the small table side.
assert (followingWork instanceof MergeJoinWork);
MergeJoinWork mergeJoinWork = (MergeJoinWork) followingWork;
CommonMergeJoinOperator mergeJoinOp = mergeJoinWork.getMergeJoinOperator();
work.setTag(mergeJoinOp.getTagForOperator(operator));
mergeJoinWork.addMergedWork(null, work, context.leafOperatorToFollowingWork);
tezWork.setVertexType(mergeJoinWork, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);
for (BaseWork parentWork : tezWork.getParents(work)) {
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, work);
tezWork.disconnect(parentWork, work);
tezWork.connect(parentWork, mergeJoinWork, edgeProp);
}
work = mergeJoinWork;
} else {
// need to add this branch to the key + value info
assert operator instanceof ReduceSinkOperator && ((followingWork instanceof ReduceWork) || (followingWork instanceof MergeJoinWork) || followingWork instanceof UnionWork);
ReduceSinkOperator rs = (ReduceSinkOperator) operator;
ReduceWork rWork = null;
if (followingWork instanceof MergeJoinWork) {
MergeJoinWork mergeJoinWork = (MergeJoinWork) followingWork;
rWork = (ReduceWork) mergeJoinWork.getMainWork();
} else if (followingWork instanceof UnionWork) {
// this can only be possible if there is merge work followed by the union
UnionWork unionWork = (UnionWork) followingWork;
int index = getFollowingWorkIndex(tezWork, unionWork, rs);
BaseWork baseWork = tezWork.getChildren(unionWork).get(index);
if (baseWork instanceof MergeJoinWork) {
MergeJoinWork mergeJoinWork = (MergeJoinWork) baseWork;
// disconnect the connection to union work and connect to merge work
followingWork = mergeJoinWork;
rWork = (ReduceWork) mergeJoinWork.getMainWork();
} else {
rWork = (ReduceWork) baseWork;
}
} else {
rWork = (ReduceWork) followingWork;
}
GenMapRedUtils.setKeyAndValueDesc(rWork, rs);
// remember which parent belongs to which tag
int tag = rs.getConf().getTag();
rWork.getTagToInput().put(tag == -1 ? 0 : tag, work.getName());
// remember the output name of the reduce sink
rs.getConf().setOutputName(rWork.getName());
// For dynamic partitioned hash join, run the ReduceSinkMapJoinProc logic for any
// ReduceSink parents that we missed.
MapJoinOperator mj = context.smallTableParentToMapJoinMap.get(rs);
if (mj != null) {
// Only need to run the logic for tables we missed
if (context.mapJoinToUnprocessedSmallTableReduceSinks.get(mj).contains(rs)) {
// ReduceSinkMapJoinProc logic does not work unless the ReduceSink is connected as
// a parent of the MapJoin, but at this point we have already removed all of the
// parents from the MapJoin.
// Try temporarily adding the RS as a parent
ArrayList<Operator<?>> tempMJParents = new ArrayList<Operator<?>>();
tempMJParents.add(rs);
mj.setParentOperators(tempMJParents);
// ReduceSink also needs MapJoin as child
List<Operator<?>> rsChildren = rs.getChildOperators();
rsChildren.add(mj);
// Since the MapJoin has had all of its other parents removed at this point,
// it would be bad here if processReduceSinkToHashJoin() tries to do anything
// with the RS parent based on its position in the list of parents.
ReduceSinkMapJoinProc.processReduceSinkToHashJoin(rs, mj, context);
// Remove any parents from MapJoin again
mj.removeParents();
// TODO: do we also need to remove the MapJoin from the list of RS's children?
}
}
if (!context.connectedReduceSinks.contains(rs)) {
// add dependency between the two work items
TezEdgeProperty edgeProp;
EdgeType edgeType = GenTezUtils.determineEdgeType(work, followingWork, rs);
if (rWork.isAutoReduceParallelism()) {
edgeProp = new TezEdgeProperty(context.conf, edgeType, true, rWork.getMinReduceTasks(), rWork.getMaxReduceTasks(), bytesPerReducer);
} else {
edgeProp = new TezEdgeProperty(edgeType);
}
tezWork.connect(work, followingWork, edgeProp);
context.connectedReduceSinks.add(rs);
}
}
} else {
LOG.debug("First pass. Leaf operator: " + operator);
}
// the next item will be a new root.
if (!operator.getChildOperators().isEmpty()) {
assert operator.getChildOperators().size() == 1;
context.parentOfRoot = operator;
context.currentRootOperator = operator.getChildOperators().get(0);
context.preceedingWork = work;
}
return null;
}
use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class GenSparkUtils method createReduceWork.
public ReduceWork createReduceWork(GenSparkProcContext context, Operator<?> root, SparkWork sparkWork) throws SemanticException {
Preconditions.checkArgument(!root.getParentOperators().isEmpty(), "AssertionError: expected root.getParentOperators() to be non-empty");
ReduceWork reduceWork = new ReduceWork("Reducer " + (++sequenceNumber));
LOG.debug("Adding reduce work (" + reduceWork.getName() + ") for " + root);
reduceWork.setReducer(root);
reduceWork.setNeedsTagging(GenMapRedUtils.needsTagging(reduceWork));
// Pick the maximum # reducers across all parents as the # of reduce tasks.
int maxExecutors = -1;
for (Operator<? extends OperatorDesc> parentOfRoot : root.getParentOperators()) {
Preconditions.checkArgument(parentOfRoot instanceof ReduceSinkOperator, "AssertionError: expected parentOfRoot to be an " + "instance of ReduceSinkOperator, but was " + parentOfRoot.getClass().getName());
ReduceSinkOperator reduceSink = (ReduceSinkOperator) parentOfRoot;
maxExecutors = Math.max(maxExecutors, reduceSink.getConf().getNumReducers());
}
reduceWork.setNumReduceTasks(maxExecutors);
ReduceSinkOperator reduceSink = (ReduceSinkOperator) context.parentOfRoot;
setupReduceSink(context, reduceWork, reduceSink);
sparkWork.add(reduceWork);
SparkEdgeProperty edgeProp = getEdgeProperty(reduceSink, reduceWork);
sparkWork.connect(context.preceedingWork, reduceWork, edgeProp);
return reduceWork;
}
use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class DriverContext method finished.
public void finished(TaskRunner runner) {
if (statsTasks.isEmpty() || !(runner.getTask() instanceof MapRedTask)) {
return;
}
MapRedTask mapredTask = (MapRedTask) runner.getTask();
MapWork mapWork = mapredTask.getWork().getMapWork();
ReduceWork reduceWork = mapredTask.getWork().getReduceWork();
List<Operator> operators = new ArrayList<Operator>(mapWork.getAliasToWork().values());
if (reduceWork != null) {
operators.add(reduceWork.getReducer());
}
final List<String> statKeys = new ArrayList<String>(1);
NodeUtils.iterate(operators, FileSinkOperator.class, new Function<FileSinkOperator>() {
@Override
public void apply(FileSinkOperator fsOp) {
if (fsOp.getConf().isGatherStats()) {
statKeys.add(fsOp.getConf().getStatsAggPrefix());
}
}
});
for (String statKey : statKeys) {
statsTasks.get(statKey).getWork().setSourceTask(mapredTask);
}
}
use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class ReduceRecordProcessor method init.
@Override
void init(MRTaskReporter mrReporter, Map<String, LogicalInput> inputs, Map<String, LogicalOutput> outputs) throws Exception {
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
super.init(mrReporter, inputs, outputs);
MapredContext.init(false, new JobConf(jconf));
List<LogicalInput> shuffleInputs = getShuffleInputs(inputs);
// TODO HIVE-14042. Move to using a loop and a timed wait once TEZ-3302 is fixed.
checkAbortCondition();
if (shuffleInputs != null) {
l4j.info("Waiting for ShuffleInputs to become ready");
processorContext.waitForAllInputsReady(new ArrayList<Input>(shuffleInputs));
}
connectOps.clear();
ReduceWork redWork = reduceWork;
l4j.info("Main work is " + reduceWork.getName());
List<HashTableDummyOperator> workOps = reduceWork.getDummyOps();
HashSet<HashTableDummyOperator> dummyOps = workOps == null ? null : new HashSet<>(workOps);
tagToReducerMap.put(redWork.getTag(), redWork);
if (mergeWorkList != null) {
for (BaseWork mergeWork : mergeWorkList) {
if (l4j.isDebugEnabled()) {
l4j.debug("Additional work " + mergeWork.getName());
}
workOps = mergeWork.getDummyOps();
if (workOps != null) {
if (dummyOps == null) {
dummyOps = new HashSet<>(workOps);
} else {
dummyOps.addAll(workOps);
}
}
ReduceWork mergeReduceWork = (ReduceWork) mergeWork;
reducer = mergeReduceWork.getReducer();
// Check immediately after reducer is assigned, in cae the abort came in during
checkAbortCondition();
DummyStoreOperator dummyStoreOp = getJoinParentOp(reducer);
connectOps.put(mergeReduceWork.getTag(), dummyStoreOp);
tagToReducerMap.put(mergeReduceWork.getTag(), mergeReduceWork);
}
((TezContext) MapredContext.get()).setDummyOpsMap(connectOps);
}
checkAbortCondition();
bigTablePosition = (byte) reduceWork.getTag();
ObjectInspector[] mainWorkOIs = null;
((TezContext) MapredContext.get()).setInputs(inputs);
((TezContext) MapredContext.get()).setTezProcessorContext(processorContext);
int numTags = reduceWork.getTagToValueDesc().size();
reducer = reduceWork.getReducer();
// Check immediately after reducer is assigned, in cae the abort came in during
checkAbortCondition();
// set memory available for operators
long memoryAvailableToTask = processorContext.getTotalMemoryAvailableToTask();
if (reducer.getConf() != null) {
reducer.getConf().setMaxMemoryAvailable(memoryAvailableToTask);
l4j.info("Memory available for operators set to {}", LlapUtil.humanReadableByteCount(memoryAvailableToTask));
}
OperatorUtils.setMemoryAvailable(reducer.getChildOperators(), memoryAvailableToTask);
// Setup values registry
String valueRegistryKey = DynamicValue.DYNAMIC_VALUE_REGISTRY_CACHE_KEY;
DynamicValueRegistryTez registryTez = dynamicValueCache.retrieve(valueRegistryKey, new Callable<DynamicValueRegistryTez>() {
@Override
public DynamicValueRegistryTez call() {
return new DynamicValueRegistryTez();
}
});
dynamicValueCacheKeys.add(valueRegistryKey);
RegistryConfTez registryConf = new RegistryConfTez(jconf, reduceWork, processorContext, inputs);
registryTez.init(registryConf);
checkAbortCondition();
if (numTags > 1) {
sources = new ReduceRecordSource[numTags];
mainWorkOIs = new ObjectInspector[numTags];
initializeMultipleSources(reduceWork, numTags, mainWorkOIs, sources);
((TezContext) MapredContext.get()).setRecordSources(sources);
reducer.initialize(jconf, mainWorkOIs);
} else {
numTags = tagToReducerMap.keySet().size();
sources = new ReduceRecordSource[numTags];
mainWorkOIs = new ObjectInspector[numTags];
for (int i : tagToReducerMap.keySet()) {
redWork = tagToReducerMap.get(i);
reducer = redWork.getReducer();
// Check immediately after reducer is assigned, in cae the abort came in during
checkAbortCondition();
initializeSourceForTag(redWork, i, mainWorkOIs, sources, redWork.getTagToValueDesc().get(0), redWork.getTagToInput().get(0));
reducer.initializeLocalWork(jconf);
}
reducer = reduceWork.getReducer();
// Check immediately after reducer is assigned, in cae the abort came in during
checkAbortCondition();
((TezContext) MapredContext.get()).setRecordSources(sources);
reducer.initialize(jconf, new ObjectInspector[] { mainWorkOIs[bigTablePosition] });
for (int i : tagToReducerMap.keySet()) {
if (i == bigTablePosition) {
continue;
}
redWork = tagToReducerMap.get(i);
reducer = redWork.getReducer();
// Check immediately after reducer is assigned, in cae the abort came in during
checkAbortCondition();
reducer.initialize(jconf, new ObjectInspector[] { mainWorkOIs[i] });
}
}
checkAbortCondition();
reducer = reduceWork.getReducer();
// initialize reduce operator tree
try {
l4j.info(reducer.dump(0));
// dummy parent operators as well.
if (dummyOps != null) {
for (HashTableDummyOperator dummyOp : dummyOps) {
// TODO HIVE-14042. Propagating abort to dummyOps.
dummyOp.initialize(jconf, null);
checkAbortCondition();
}
}
// set output collector for any reduce sink operators in the pipeline.
List<Operator<?>> children = new LinkedList<Operator<?>>();
children.add(reducer);
if (dummyOps != null) {
children.addAll(dummyOps);
}
createOutputMap();
OperatorUtils.setChildrenCollector(children, outMap);
checkAbortCondition();
reducer.setReporter(reporter);
MapredContext.get().setReporter(reporter);
} catch (Throwable e) {
super.setAborted(true);
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else if (e instanceof InterruptedException) {
l4j.info("Hit an interrupt while initializing ReduceRecordProcessor. Message={}", e.getMessage());
throw (InterruptedException) e;
} else {
throw new RuntimeException("Reduce operator initialization failed", e);
}
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
}
use of org.apache.hadoop.hive.ql.plan.ReduceWork in project hive by apache.
the class GenMapRedUtils method splitPlan.
/**
* Met cRS in pOP(parentTask with RS)-cRS-cOP(noTask) case
* Create new child task for cRS-cOP and link two tasks by temporary file : pOP-FS / TS-cRS-cOP
*
* @param cRS
* the reduce sink operator encountered
* @param opProcCtx
* processing context
*/
static void splitPlan(ReduceSinkOperator cRS, GenMRProcContext opProcCtx) throws SemanticException {
// Generate a new task
ParseContext parseCtx = opProcCtx.getParseCtx();
Task<? extends Serializable> parentTask = opProcCtx.getCurrTask();
MapredWork childPlan = getMapRedWork(parseCtx);
Task<? extends Serializable> childTask = TaskFactory.get(childPlan, parseCtx.getConf());
Operator<? extends OperatorDesc> reducer = cRS.getChildOperators().get(0);
// Add the reducer
ReduceWork rWork = new ReduceWork();
childPlan.setReduceWork(rWork);
rWork.setReducer(reducer);
ReduceSinkDesc desc = cRS.getConf();
childPlan.getReduceWork().setNumReduceTasks(new Integer(desc.getNumReducers()));
opProcCtx.getOpTaskMap().put(reducer, childTask);
splitTasks(cRS, parentTask, childTask, opProcCtx);
}
Aggregations