use of org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher in project hive by apache.
the class MapJoinProcessor method transform.
/**
* Transform the query tree. For each join, check if it is a map-side join (user specified). If
* yes, convert it to a map-side join.
*
* @param pactx
* current parse context
*/
@Override
public ParseContext transform(ParseContext pactx) throws SemanticException {
List<MapJoinOperator> listMapJoinOps = new ArrayList<MapJoinOperator>();
// traverse all the joins and convert them if necessary
if (pactx.getJoinOps() != null) {
Set<JoinOperator> joinMap = new HashSet<JoinOperator>();
Set<MapJoinOperator> mapJoinMap = pactx.getMapJoinOps();
if (mapJoinMap == null) {
mapJoinMap = new HashSet<MapJoinOperator>();
pactx.setMapJoinOps(mapJoinMap);
}
Iterator<JoinOperator> joinCtxIter = pactx.getJoinOps().iterator();
while (joinCtxIter.hasNext()) {
JoinOperator joinOp = joinCtxIter.next();
int mapJoinPos = mapSideJoin(joinOp);
if (mapJoinPos >= 0) {
MapJoinOperator mapJoinOp = generateMapJoinOperator(pactx, joinOp, mapJoinPos);
listMapJoinOps.add(mapJoinOp);
mapJoinOp.getConf().setQBJoinTreeProps(joinOp.getConf());
mapJoinMap.add(mapJoinOp);
} else {
joinOp.getConf().setQBJoinTreeProps(joinOp.getConf());
joinMap.add(joinOp);
}
}
// store the new joinContext
pactx.setJoinOps(joinMap);
}
// Go over the list and find if a reducer is not needed
List<AbstractMapJoinOperator<? extends MapJoinDesc>> listMapJoinOpsNoRed = new ArrayList<AbstractMapJoinOperator<? extends MapJoinDesc>>();
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack.
// The dispatcher generates the plan from the operator tree
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("R0", MapJoinOperator.getOperatorName() + "%"), getCurrentMapJoin());
opRules.put(new RuleRegExp("R1", MapJoinOperator.getOperatorName() + "%.*" + FileSinkOperator.getOperatorName() + "%"), getMapJoinFS());
opRules.put(new RuleRegExp("R2", MapJoinOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), getMapJoinDefault());
opRules.put(new RuleRegExp("R4", MapJoinOperator.getOperatorName() + "%.*" + UnionOperator.getOperatorName() + "%"), getMapJoinDefault());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(getDefault(), opRules, new MapJoinWalkerCtx(listMapJoinOpsNoRed, pactx));
GraphWalker ogw = new GenMapRedWalker(disp);
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(listMapJoinOps);
ogw.startWalking(topNodes, null);
pactx.setListMapJoinOpsNoReducer(listMapJoinOpsNoRed);
return pactx;
}
use of org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher in project hive by apache.
the class PartitionColumnsSeparator method transform.
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
// 1. Trigger transformation
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"), new StructInTransformer());
Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null);
GraphWalker ogw = new ForwardWalker(disp);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pctx.getTopOps().values());
ogw.startWalking(topNodes, null);
return pctx;
}
use of org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher in project hive by apache.
the class PrunerUtils method walkOperatorTree.
/**
* Walk operator tree for pruner generation.
*
* @param pctx
* @param opWalkerCtx
* @param filterProc
* @param defaultProc
* @throws SemanticException
*/
public static void walkOperatorTree(ParseContext pctx, NodeProcessorCtx opWalkerCtx, NodeProcessor filterProc, NodeProcessor defaultProc) throws SemanticException {
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
// Build regular expression for operator rule.
// "(TS%FIL%)|(TS%FIL%FIL%)"
String tsOprName = TableScanOperator.getOperatorName();
String filtOprName = FilterOperator.getOperatorName();
opRules.put(new RuleExactMatch("R1", new String[] { tsOprName, filtOprName, filtOprName }), filterProc);
opRules.put(new RuleExactMatch("R2", new String[] { tsOprName, filtOprName }), filterProc);
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(defaultProc, opRules, opWalkerCtx);
GraphWalker ogw = new DefaultGraphWalker(disp);
// Create a list of topop nodes
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pctx.getTopOps().values());
ogw.startWalking(topNodes, null);
}
use of org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher in project hive by apache.
the class ExprWalkerProcFactory method extractPushdownPreds.
/**
* Extracts pushdown predicates from the given list of predicate expression.
*
* @param opContext
* operator context used for resolving column references
* @param op
* operator of the predicates being processed
* @param preds
* @return The expression walker information
* @throws SemanticException
*/
public static ExprWalkerInfo extractPushdownPreds(OpWalkerInfo opContext, Operator<? extends OperatorDesc> op, List<ExprNodeDesc> preds) throws SemanticException {
// Create the walker, the rules dispatcher and the context.
ExprWalkerInfo exprContext = new ExprWalkerInfo(op);
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack. The dispatcher
// generates the plan from the operator tree
Map<Rule, NodeProcessor> exprRules = new LinkedHashMap<Rule, NodeProcessor>();
exprRules.put(new TypeRule(ExprNodeColumnDesc.class), getColumnProcessor());
exprRules.put(new TypeRule(ExprNodeFieldDesc.class), getFieldProcessor());
exprRules.put(new TypeRule(ExprNodeGenericFuncDesc.class), getGenericFuncProcessor());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(), exprRules, exprContext);
GraphWalker egw = new DefaultGraphWalker(disp);
List<Node> startNodes = new ArrayList<Node>();
List<ExprNodeDesc> clonedPreds = new ArrayList<ExprNodeDesc>();
for (ExprNodeDesc node : preds) {
ExprNodeDesc clone = node.clone();
clonedPreds.add(clone);
exprContext.getNewToOldExprMap().put(clone, node);
}
startNodes.addAll(clonedPreds);
egw.startWalking(startNodes, null);
HiveConf conf = opContext.getParseContext().getConf();
// check the root expression for final candidates
for (ExprNodeDesc pred : clonedPreds) {
extractFinalCandidates(pred, exprContext, conf);
}
return exprContext;
}
use of org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher in project hive by apache.
the class SparkCompiler method generateTaskTreeHelper.
private void generateTaskTreeHelper(GenSparkProcContext procCtx, List<Node> topNodes) throws SemanticException {
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack. The dispatcher generates the plan from the operator tree
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
GenSparkWork genSparkWork = new GenSparkWork(GenSparkUtils.getUtils());
opRules.put(new RuleRegExp("Split Work - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), genSparkWork);
opRules.put(new RuleRegExp("Split Work - SparkPartitionPruningSink", SparkPartitionPruningSinkOperator.getOperatorName() + "%"), genSparkWork);
opRules.put(new TypeRule(MapJoinOperator.class), new SparkReduceSinkMapJoinProc());
opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"), new CompositeProcessor(new SparkFileSinkProcessor(), genSparkWork));
opRules.put(new RuleRegExp("Handle Analyze Command", TableScanOperator.getOperatorName() + "%"), new SparkProcessAnalyzeTable(GenSparkUtils.getUtils()));
opRules.put(new RuleRegExp("Remember union", UnionOperator.getOperatorName() + "%"), new NodeProcessor() {
@Override
public Object process(Node n, Stack<Node> s, NodeProcessorCtx procCtx, Object... os) throws SemanticException {
GenSparkProcContext context = (GenSparkProcContext) procCtx;
UnionOperator union = (UnionOperator) n;
// simply need to remember that we've seen a union.
context.currentUnionOperators.add(union);
return null;
}
});
/**
* SMB join case: (Big) (Small) (Small)
* TS TS TS
* \ | /
* \ DS DS
* \ | /
* SMBJoinOP
*
* Some of the other processors are expecting only one traversal beyond SMBJoinOp.
* We need to traverse from the big-table path only, and stop traversing on the
* small-table path once we reach SMBJoinOp.
* Also add some SMB join information to the context, so we can properly annotate
* the MapWork later on.
*/
opRules.put(new TypeRule(SMBMapJoinOperator.class), new NodeProcessor() {
@Override
public Object process(Node currNode, Stack<Node> stack, NodeProcessorCtx procCtx, Object... os) throws SemanticException {
GenSparkProcContext context = (GenSparkProcContext) procCtx;
SMBMapJoinOperator currSmbNode = (SMBMapJoinOperator) currNode;
SparkSMBMapJoinInfo smbMapJoinCtx = context.smbMapJoinCtxMap.get(currSmbNode);
if (smbMapJoinCtx == null) {
smbMapJoinCtx = new SparkSMBMapJoinInfo();
context.smbMapJoinCtxMap.put(currSmbNode, smbMapJoinCtx);
}
for (Node stackNode : stack) {
if (stackNode instanceof DummyStoreOperator) {
//If coming from small-table side, do some book-keeping, and skip traversal.
smbMapJoinCtx.smallTableRootOps.add(context.currentRootOperator);
return true;
}
}
//If coming from big-table side, do some book-keeping, and continue traversal
smbMapJoinCtx.bigTableRootOp = context.currentRootOperator;
return false;
}
});
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
GraphWalker ogw = new GenSparkWorkWalker(disp, procCtx);
ogw.startWalking(topNodes, null);
}
Aggregations