use of org.apache.hadoop.hive.ql.parse.GenTezProcContext in project hive by apache.
the class ReduceSinkMapJoinProc method process.
/* (non-Javadoc)
* This processor addresses the RS-MJ case that occurs in tez on the small/hash
* table side of things. The work that RS will be a part of must be connected
* to the MJ work via be a broadcast edge.
* We should not walk down the tree when we encounter this pattern because:
* the type of work (map work or reduce work) needs to be determined
* on the basis of the big table side because it may be a mapwork (no need for shuffle)
* or reduce work.
*/
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException {
GenTezProcContext context = (GenTezProcContext) procContext;
MapJoinOperator mapJoinOp = (MapJoinOperator) nd;
// remember the original parent list before we start modifying it.
if (!context.mapJoinParentMap.containsKey(mapJoinOp)) {
List<Operator<?>> parents = new ArrayList<Operator<?>>(mapJoinOp.getParentOperators());
context.mapJoinParentMap.put(mapJoinOp, parents);
}
boolean isBigTable = stack.size() < 2 || !(stack.get(stack.size() - 2) instanceof ReduceSinkOperator);
ReduceSinkOperator parentRS = null;
if (!isBigTable) {
parentRS = (ReduceSinkOperator) stack.get(stack.size() - 2);
// For dynamic partitioned hash join, the big table will also be coming from a ReduceSinkOperator
// Check for this condition.
// TODO: use indexOf(), or parentRS.getTag()?
isBigTable = (mapJoinOp.getParentOperators().indexOf(parentRS) == mapJoinOp.getConf().getPosBigTable());
}
if (mapJoinOp.getConf().isDynamicPartitionHashJoin() && !context.mapJoinToUnprocessedSmallTableReduceSinks.containsKey(mapJoinOp)) {
// Initialize set of unprocessed small tables
Set<ReduceSinkOperator> rsSet = Sets.newIdentityHashSet();
for (int pos = 0; pos < mapJoinOp.getParentOperators().size(); ++pos) {
if (pos == mapJoinOp.getConf().getPosBigTable()) {
continue;
}
rsSet.add((ReduceSinkOperator) mapJoinOp.getParentOperators().get(pos));
}
context.mapJoinToUnprocessedSmallTableReduceSinks.put(mapJoinOp, rsSet);
}
if (isBigTable) {
context.currentMapJoinOperators.add(mapJoinOp);
return null;
}
context.preceedingWork = null;
context.currentRootOperator = null;
return processReduceSinkToHashJoin(parentRS, mapJoinOp, context);
}
use of org.apache.hadoop.hive.ql.parse.GenTezProcContext in project hive by apache.
the class MergeJoinProc method process.
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
GenTezProcContext context = (GenTezProcContext) procCtx;
CommonMergeJoinOperator mergeJoinOp = (CommonMergeJoinOperator) nd;
if (stack.size() < 2) {
// safety check for L53 to get parentOp, although it is very unlikely that
// stack size is less than 2, i.e., there is only one MergeJoinOperator in the stack.
context.currentMergeJoinOperator = mergeJoinOp;
return null;
}
TezWork tezWork = context.currentTask.getWork();
@SuppressWarnings("unchecked") Operator<? extends OperatorDesc> parentOp = (Operator<? extends OperatorDesc>) ((stack.get(stack.size() - 2)));
// we need to set the merge work that has been created as part of the dummy store walk. If a
// merge work already exists for this merge join operator, add the dummy store work to the
// merge work. Else create a merge work, add above work to the merge work
MergeJoinWork mergeWork = null;
if (context.opMergeJoinWorkMap.containsKey(mergeJoinOp)) {
// we already have the merge work corresponding to this merge join operator
mergeWork = context.opMergeJoinWorkMap.get(mergeJoinOp);
} else {
mergeWork = new MergeJoinWork();
tezWork.add(mergeWork);
context.opMergeJoinWorkMap.put(mergeJoinOp, mergeWork);
}
if (!(stack.get(stack.size() - 2) instanceof DummyStoreOperator)) {
/* this may happen in one of the following case:
TS[0], FIL[26], SEL[2], DUMMY_STORE[30], MERGEJOIN[29]]
/
TS[3], FIL[27], SEL[5], ---------------
*/
context.currentMergeJoinOperator = mergeJoinOp;
mergeWork.setTag(mergeJoinOp.getTagForOperator(parentOp));
return null;
}
// Guaranteed to be just 1 because each DummyStoreOperator can be part of only one work.
BaseWork parentWork = context.childToWorkMap.get(parentOp).get(0);
mergeWork.addMergedWork(null, parentWork, context.leafOperatorToFollowingWork);
mergeWork.setMergeJoinOperator(mergeJoinOp);
tezWork.setVertexType(mergeWork, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);
for (BaseWork grandParentWork : tezWork.getParents(parentWork)) {
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(grandParentWork, parentWork);
tezWork.disconnect(grandParentWork, parentWork);
tezWork.connect(grandParentWork, mergeWork, edgeProp);
}
for (BaseWork childWork : tezWork.getChildren(parentWork)) {
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, childWork);
tezWork.disconnect(parentWork, childWork);
tezWork.connect(mergeWork, childWork, edgeProp);
}
tezWork.remove(parentWork);
DummyStoreOperator dummyOp = (DummyStoreOperator) (stack.get(stack.size() - 2));
parentWork.setTag(mergeJoinOp.getTagForOperator(dummyOp));
mergeJoinOp.getParentOperators().remove(dummyOp);
dummyOp.getChildOperators().clear();
return true;
}
Aggregations