use of org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator in project hive by apache.
the class MergeJoinProc method process.
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
GenTezProcContext context = (GenTezProcContext) procCtx;
CommonMergeJoinOperator mergeJoinOp = (CommonMergeJoinOperator) nd;
if (stack.size() < 2) {
// safety check for L53 to get parentOp, although it is very unlikely that
// stack size is less than 2, i.e., there is only one MergeJoinOperator in the stack.
context.currentMergeJoinOperator = mergeJoinOp;
return null;
}
TezWork tezWork = context.currentTask.getWork();
@SuppressWarnings("unchecked") Operator<? extends OperatorDesc> parentOp = (Operator<? extends OperatorDesc>) ((stack.get(stack.size() - 2)));
// we need to set the merge work that has been created as part of the dummy store walk. If a
// merge work already exists for this merge join operator, add the dummy store work to the
// merge work. Else create a merge work, add above work to the merge work
MergeJoinWork mergeWork = null;
if (context.opMergeJoinWorkMap.containsKey(mergeJoinOp)) {
// we already have the merge work corresponding to this merge join operator
mergeWork = context.opMergeJoinWorkMap.get(mergeJoinOp);
} else {
mergeWork = new MergeJoinWork();
tezWork.add(mergeWork);
context.opMergeJoinWorkMap.put(mergeJoinOp, mergeWork);
}
if (!(stack.get(stack.size() - 2) instanceof DummyStoreOperator)) {
/* this may happen in one of the following case:
TS[0], FIL[26], SEL[2], DUMMY_STORE[30], MERGEJOIN[29]]
/
TS[3], FIL[27], SEL[5], ---------------
*/
context.currentMergeJoinOperator = mergeJoinOp;
mergeWork.setTag(mergeJoinOp.getTagForOperator(parentOp));
return null;
}
// Guaranteed to be just 1 because each DummyStoreOperator can be part of only one work.
BaseWork parentWork = context.childToWorkMap.get(parentOp).get(0);
mergeWork.addMergedWork(null, parentWork, context.leafOperatorToFollowingWork);
mergeWork.setMergeJoinOperator(mergeJoinOp);
tezWork.setVertexType(mergeWork, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);
for (BaseWork grandParentWork : tezWork.getParents(parentWork)) {
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(grandParentWork, parentWork);
tezWork.disconnect(grandParentWork, parentWork);
tezWork.connect(grandParentWork, mergeWork, edgeProp);
}
for (BaseWork childWork : tezWork.getChildren(parentWork)) {
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, childWork);
tezWork.disconnect(parentWork, childWork);
tezWork.connect(mergeWork, childWork, edgeProp);
}
tezWork.remove(parentWork);
DummyStoreOperator dummyOp = (DummyStoreOperator) (stack.get(stack.size() - 2));
parentWork.setTag(mergeJoinOp.getTagForOperator(dummyOp));
mergeJoinOp.getParentOperators().remove(dummyOp);
dummyOp.getChildOperators().clear();
return true;
}
use of org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator in project hive by apache.
the class SparkCrossProductCheck method checkShuffleJoin.
private void checkShuffleJoin(SparkWork sparkWork) throws SemanticException {
for (ReduceWork reduceWork : sparkWork.getAllReduceWork()) {
Operator<? extends OperatorDesc> reducer = reduceWork.getReducer();
if (reducer instanceof JoinOperator || reducer instanceof CommonMergeJoinOperator) {
Map<Integer, CrossProductHandler.ExtractReduceSinkInfo.Info> rsInfo = new TreeMap<Integer, CrossProductHandler.ExtractReduceSinkInfo.Info>();
for (BaseWork parent : sparkWork.getParents(reduceWork)) {
rsInfo.putAll(new CrossProductHandler.ExtractReduceSinkInfo(null).analyze(parent));
}
checkForCrossProduct(reduceWork.getName(), reducer, rsInfo);
}
}
}
Aggregations