use of org.apache.hadoop.hive.ql.exec.DummyStoreOperator in project hive by apache.
the class ConvertJoinMapJoin method convertJoinSMBJoin.
// replaces the join operator with a new CommonJoinOperator, removes the
// parent reduce sinks
private void convertJoinSMBJoin(JoinOperator joinOp, OptimizeTezProcContext context, int mapJoinConversionPos, int numBuckets, boolean adjustParentsChildren) throws SemanticException {
MapJoinDesc mapJoinDesc = null;
if (adjustParentsChildren) {
mapJoinDesc = MapJoinProcessor.getMapJoinDesc(context.conf, joinOp, joinOp.getConf().isLeftInputJoin(), joinOp.getConf().getBaseSrc(), joinOp.getConf().getMapAliases(), mapJoinConversionPos, true);
} else {
JoinDesc joinDesc = joinOp.getConf();
// retain the original join desc in the map join.
mapJoinDesc = new MapJoinDesc(MapJoinProcessor.getKeys(joinOp.getConf().isLeftInputJoin(), joinOp.getConf().getBaseSrc(), joinOp).getSecond(), null, joinDesc.getExprs(), null, null, joinDesc.getOutputColumnNames(), mapJoinConversionPos, joinDesc.getConds(), joinDesc.getFilters(), joinDesc.getNoOuterJoin(), null, joinDesc.getMemoryMonitorInfo(), joinDesc.getInMemoryDataSize());
mapJoinDesc.setNullSafes(joinDesc.getNullSafes());
mapJoinDesc.setFilterMap(joinDesc.getFilterMap());
mapJoinDesc.setResidualFilterExprs(joinDesc.getResidualFilterExprs());
// keep column expression map, explain plan uses this to display
mapJoinDesc.setColumnExprMap(joinDesc.getColumnExprMap());
mapJoinDesc.setReversedExprs(joinDesc.getReversedExprs());
mapJoinDesc.resetOrder();
}
CommonMergeJoinOperator mergeJoinOp = (CommonMergeJoinOperator) OperatorFactory.get(joinOp.getCompilationOpContext(), new CommonMergeJoinDesc(numBuckets, mapJoinConversionPos, mapJoinDesc), joinOp.getSchema());
int numReduceSinks = joinOp.getOpTraits().getNumReduceSinks();
OpTraits opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), numBuckets, joinOp.getOpTraits().getSortCols(), numReduceSinks);
mergeJoinOp.setOpTraits(opTraits);
mergeJoinOp.setStatistics(joinOp.getStatistics());
for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) {
int pos = parentOp.getChildOperators().indexOf(joinOp);
parentOp.getChildOperators().remove(pos);
parentOp.getChildOperators().add(pos, mergeJoinOp);
}
for (Operator<? extends OperatorDesc> childOp : joinOp.getChildOperators()) {
int pos = childOp.getParentOperators().indexOf(joinOp);
childOp.getParentOperators().remove(pos);
childOp.getParentOperators().add(pos, mergeJoinOp);
}
List<Operator<? extends OperatorDesc>> childOperators = mergeJoinOp.getChildOperators();
List<Operator<? extends OperatorDesc>> parentOperators = mergeJoinOp.getParentOperators();
childOperators.clear();
parentOperators.clear();
childOperators.addAll(joinOp.getChildOperators());
parentOperators.addAll(joinOp.getParentOperators());
mergeJoinOp.getConf().setGenJoinKeys(false);
if (adjustParentsChildren) {
mergeJoinOp.getConf().setGenJoinKeys(true);
List<Operator<? extends OperatorDesc>> newParentOpList = new ArrayList<Operator<? extends OperatorDesc>>();
for (Operator<? extends OperatorDesc> parentOp : mergeJoinOp.getParentOperators()) {
for (Operator<? extends OperatorDesc> grandParentOp : parentOp.getParentOperators()) {
grandParentOp.getChildOperators().remove(parentOp);
grandParentOp.getChildOperators().add(mergeJoinOp);
newParentOpList.add(grandParentOp);
}
}
mergeJoinOp.getParentOperators().clear();
mergeJoinOp.getParentOperators().addAll(newParentOpList);
List<Operator<? extends OperatorDesc>> parentOps = new ArrayList<Operator<? extends OperatorDesc>>(mergeJoinOp.getParentOperators());
for (Operator<? extends OperatorDesc> parentOp : parentOps) {
int parentIndex = mergeJoinOp.getParentOperators().indexOf(parentOp);
if (parentIndex == mapJoinConversionPos) {
continue;
}
// insert the dummy store operator here
DummyStoreOperator dummyStoreOp = new TezDummyStoreOperator(mergeJoinOp.getCompilationOpContext());
dummyStoreOp.setParentOperators(new ArrayList<Operator<? extends OperatorDesc>>());
dummyStoreOp.setChildOperators(new ArrayList<Operator<? extends OperatorDesc>>());
dummyStoreOp.getChildOperators().add(mergeJoinOp);
int index = parentOp.getChildOperators().indexOf(mergeJoinOp);
parentOp.getChildOperators().remove(index);
parentOp.getChildOperators().add(index, dummyStoreOp);
dummyStoreOp.getParentOperators().add(parentOp);
mergeJoinOp.getParentOperators().remove(parentIndex);
mergeJoinOp.getParentOperators().add(parentIndex, dummyStoreOp);
}
}
mergeJoinOp.cloneOriginalParentsList(mergeJoinOp.getParentOperators());
}
use of org.apache.hadoop.hive.ql.exec.DummyStoreOperator in project hive by apache.
the class MergeJoinProc method process.
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
GenTezProcContext context = (GenTezProcContext) procCtx;
CommonMergeJoinOperator mergeJoinOp = (CommonMergeJoinOperator) nd;
if (stack.size() < 2) {
// safety check for L53 to get parentOp, although it is very unlikely that
// stack size is less than 2, i.e., there is only one MergeJoinOperator in the stack.
context.currentMergeJoinOperator = mergeJoinOp;
return null;
}
TezWork tezWork = context.currentTask.getWork();
@SuppressWarnings("unchecked") Operator<? extends OperatorDesc> parentOp = (Operator<? extends OperatorDesc>) ((stack.get(stack.size() - 2)));
// we need to set the merge work that has been created as part of the dummy store walk. If a
// merge work already exists for this merge join operator, add the dummy store work to the
// merge work. Else create a merge work, add above work to the merge work
MergeJoinWork mergeWork = null;
if (context.opMergeJoinWorkMap.containsKey(mergeJoinOp)) {
// we already have the merge work corresponding to this merge join operator
mergeWork = context.opMergeJoinWorkMap.get(mergeJoinOp);
} else {
mergeWork = new MergeJoinWork();
tezWork.add(mergeWork);
context.opMergeJoinWorkMap.put(mergeJoinOp, mergeWork);
}
if (!(stack.get(stack.size() - 2) instanceof DummyStoreOperator)) {
/* this may happen in one of the following case:
TS[0], FIL[26], SEL[2], DUMMY_STORE[30], MERGEJOIN[29]]
/
TS[3], FIL[27], SEL[5], ---------------
*/
context.currentMergeJoinOperator = mergeJoinOp;
mergeWork.setTag(mergeJoinOp.getTagForOperator(parentOp));
return null;
}
// Guaranteed to be just 1 because each DummyStoreOperator can be part of only one work.
BaseWork parentWork = context.childToWorkMap.get(parentOp).get(0);
mergeWork.addMergedWork(null, parentWork, context.leafOperatorToFollowingWork);
mergeWork.setMergeJoinOperator(mergeJoinOp);
tezWork.setVertexType(mergeWork, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);
for (BaseWork grandParentWork : tezWork.getParents(parentWork)) {
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(grandParentWork, parentWork);
tezWork.disconnect(grandParentWork, parentWork);
tezWork.connect(grandParentWork, mergeWork, edgeProp);
}
for (BaseWork childWork : tezWork.getChildren(parentWork)) {
TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, childWork);
tezWork.disconnect(parentWork, childWork);
tezWork.connect(mergeWork, childWork, edgeProp);
}
tezWork.remove(parentWork);
DummyStoreOperator dummyOp = (DummyStoreOperator) (stack.get(stack.size() - 2));
parentWork.setTag(mergeJoinOp.getTagForOperator(dummyOp));
mergeJoinOp.getParentOperators().remove(dummyOp);
dummyOp.getChildOperators().clear();
return true;
}
use of org.apache.hadoop.hive.ql.exec.DummyStoreOperator in project hive by apache.
the class SparkCompiler method generateTaskTreeHelper.
private void generateTaskTreeHelper(GenSparkProcContext procCtx, List<Node> topNodes) throws SemanticException {
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack. The dispatcher generates the plan from the operator tree
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
GenSparkWork genSparkWork = new GenSparkWork(GenSparkUtils.getUtils());
opRules.put(new RuleRegExp("Split Work - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), genSparkWork);
opRules.put(new RuleRegExp("Split Work - SparkPartitionPruningSink", SparkPartitionPruningSinkOperator.getOperatorName() + "%"), genSparkWork);
opRules.put(new TypeRule(MapJoinOperator.class), new SparkReduceSinkMapJoinProc());
opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"), new CompositeProcessor(new SparkFileSinkProcessor(), genSparkWork));
opRules.put(new RuleRegExp("Handle Analyze Command", TableScanOperator.getOperatorName() + "%"), new SparkProcessAnalyzeTable(GenSparkUtils.getUtils()));
opRules.put(new RuleRegExp("Remember union", UnionOperator.getOperatorName() + "%"), new NodeProcessor() {
@Override
public Object process(Node n, Stack<Node> s, NodeProcessorCtx procCtx, Object... os) throws SemanticException {
GenSparkProcContext context = (GenSparkProcContext) procCtx;
UnionOperator union = (UnionOperator) n;
// simply need to remember that we've seen a union.
context.currentUnionOperators.add(union);
return null;
}
});
/**
* SMB join case: (Big) (Small) (Small)
* TS TS TS
* \ | /
* \ DS DS
* \ | /
* SMBJoinOP
*
* Some of the other processors are expecting only one traversal beyond SMBJoinOp.
* We need to traverse from the big-table path only, and stop traversing on the
* small-table path once we reach SMBJoinOp.
* Also add some SMB join information to the context, so we can properly annotate
* the MapWork later on.
*/
opRules.put(new TypeRule(SMBMapJoinOperator.class), new NodeProcessor() {
@Override
public Object process(Node currNode, Stack<Node> stack, NodeProcessorCtx procCtx, Object... os) throws SemanticException {
GenSparkProcContext context = (GenSparkProcContext) procCtx;
SMBMapJoinOperator currSmbNode = (SMBMapJoinOperator) currNode;
SparkSMBMapJoinInfo smbMapJoinCtx = context.smbMapJoinCtxMap.get(currSmbNode);
if (smbMapJoinCtx == null) {
smbMapJoinCtx = new SparkSMBMapJoinInfo();
context.smbMapJoinCtxMap.put(currSmbNode, smbMapJoinCtx);
}
for (Node stackNode : stack) {
if (stackNode instanceof DummyStoreOperator) {
// If coming from small-table side, do some book-keeping, and skip traversal.
smbMapJoinCtx.smallTableRootOps.add(context.currentRootOperator);
return true;
}
}
// If coming from big-table side, do some book-keeping, and continue traversal
smbMapJoinCtx.bigTableRootOp = context.currentRootOperator;
return false;
}
});
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
GraphWalker ogw = new GenSparkWorkWalker(disp, procCtx);
ogw.startWalking(topNodes, null);
}
Aggregations