use of org.apache.hadoop.hive.ql.lib.SemanticGraphWalker in project hive by apache.
the class PcrExprProcFactory method walkExprTree.
/**
* Remove partition conditions when necessary from the the expression tree.
*
* @param tabAlias
* the table alias
* @param parts
* the list of all pruned partitions for the table
* @param vcs
* virtual columns referenced
* @param pred
* expression tree of the target filter operator
* @return the node information of the root expression
* @throws SemanticException
*/
public static NodeInfoWrapper walkExprTree(String tabAlias, ArrayList<Partition> parts, List<VirtualColumn> vcs, ExprNodeDesc pred) throws SemanticException {
// Create the walker, the rules dispatcher and the context.
PcrExprProcCtx pprCtx = new PcrExprProcCtx(tabAlias, parts, vcs);
Map<SemanticRule, SemanticNodeProcessor> exprRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
exprRules.put(new RuleRegExp("R1", ExprNodeColumnDesc.class.getName() + "%"), getColumnProcessor());
exprRules.put(new RuleRegExp("R2", ExprNodeFieldDesc.class.getName() + "%"), getFieldProcessor());
exprRules.put(new RuleRegExp("R5", ExprNodeGenericFuncDesc.class.getName() + "%"), getGenericFuncProcessor());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
SemanticDispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(), exprRules, pprCtx);
SemanticGraphWalker egw = new ExpressionWalker(disp);
List<Node> startNodes = new ArrayList<Node>();
startNodes.add(pred);
HashMap<Node, Object> outputMap = new HashMap<Node, Object>();
egw.startWalking(startNodes, outputMap);
// Return the wrapper of the root node
return (NodeInfoWrapper) outputMap.get(pred);
}
use of org.apache.hadoop.hive.ql.lib.SemanticGraphWalker in project hive by apache.
the class ReduceSinkJoinDeDuplication method transform.
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
pGraphContext = pctx;
ReduceSinkJoinDeDuplicateProcCtx cppCtx = new ReduceSinkJoinDeDuplicateProcCtx(pGraphContext);
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("R1", ReduceSinkOperator.getOperatorName() + "%"), ReduceSinkJoinDeDuplicateProcFactory.getReducerMapJoinProc());
SemanticDispatcher disp = new DefaultRuleDispatcher(ReduceSinkJoinDeDuplicateProcFactory.getDefaultProc(), opRules, cppCtx);
SemanticGraphWalker ogw = new ForwardWalker(disp);
// Create a list of topop nodes
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pGraphContext.getTopOps().values());
ogw.startWalking(topNodes, null);
return pGraphContext;
}
use of org.apache.hadoop.hive.ql.lib.SemanticGraphWalker in project hive by apache.
the class Generator method transform.
/* (non-Javadoc)
* @see org.apache.hadoop.hive.ql.optimizer.Transform#transform(org.apache.hadoop.hive.ql.parse.ParseContext)
*/
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
if (hooks != null && hooks.contains(ATLAS_HOOK_CLASSNAME)) {
// Atlas would be interested in lineage information for insert,load,create etc.
if (!pctx.getQueryProperties().isCTAS() && !pctx.getQueryProperties().isMaterializedView() && pctx.getQueryProperties().isQuery() && pctx.getCreateTable() == null && pctx.getCreateViewDesc() == null && (pctx.getLoadTableWork() == null || pctx.getLoadTableWork().isEmpty())) {
LOG.debug("Not evaluating lineage");
return pctx;
}
}
Index index = pctx.getQueryState().getLineageState().getIndex();
if (index == null) {
index = new Index();
}
long sTime = System.currentTimeMillis();
// Create the lineage context
LineageCtx lCtx = new LineageCtx(pctx, index);
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%"), OpProcFactory.getTSProc());
opRules.put(new RuleRegExp("R2", ScriptOperator.getOperatorName() + "%"), OpProcFactory.getTransformProc());
opRules.put(new RuleRegExp("R3", UDTFOperator.getOperatorName() + "%"), OpProcFactory.getTransformProc());
opRules.put(new RuleRegExp("R4", SelectOperator.getOperatorName() + "%"), OpProcFactory.getSelProc());
opRules.put(new RuleRegExp("R5", GroupByOperator.getOperatorName() + "%"), OpProcFactory.getGroupByProc());
opRules.put(new RuleRegExp("R6", UnionOperator.getOperatorName() + "%"), OpProcFactory.getUnionProc());
opRules.put(new RuleRegExp("R7", CommonJoinOperator.getOperatorName() + "%|" + MapJoinOperator.getOperatorName() + "%"), OpProcFactory.getJoinProc());
opRules.put(new RuleRegExp("R8", ReduceSinkOperator.getOperatorName() + "%"), OpProcFactory.getReduceSinkProc());
opRules.put(new RuleRegExp("R9", LateralViewJoinOperator.getOperatorName() + "%"), OpProcFactory.getLateralViewJoinProc());
opRules.put(new RuleRegExp("R10", PTFOperator.getOperatorName() + "%"), OpProcFactory.getTransformProc());
opRules.put(new RuleRegExp("R11", FilterOperator.getOperatorName() + "%"), OpProcFactory.getFilterProc());
// The dispatcher fires the processor corresponding to the closest matching rule and passes the context along
SemanticDispatcher disp = new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(), opRules, lCtx);
SemanticGraphWalker ogw = new LevelOrderWalker(disp, 2);
// Create a list of topop nodes
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pctx.getTopOps().values());
ogw.startWalking(topNodes, null);
LOG.debug("Time taken for lineage transform={}", (System.currentTimeMillis() - sTime));
return pctx;
}
use of org.apache.hadoop.hive.ql.lib.SemanticGraphWalker in project hive by apache.
the class SparkCompiler method generateTaskTreeHelper.
private void generateTaskTreeHelper(GenSparkProcContext procCtx, List<Node> topNodes) throws SemanticException {
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack. The dispatcher generates the plan from the operator tree
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
GenSparkWork genSparkWork = new GenSparkWork(GenSparkUtils.getUtils());
opRules.put(new RuleRegExp("Split Work - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), genSparkWork);
opRules.put(new RuleRegExp("Split Work - SparkPartitionPruningSink", SparkPartitionPruningSinkOperator.getOperatorName() + "%"), genSparkWork);
opRules.put(new TypeRule(MapJoinOperator.class), new SparkReduceSinkMapJoinProc());
opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"), new CompositeProcessor(new SparkFileSinkProcessor(), genSparkWork));
opRules.put(new RuleRegExp("Handle Analyze Command", TableScanOperator.getOperatorName() + "%"), new SparkProcessAnalyzeTable(GenSparkUtils.getUtils()));
opRules.put(new RuleRegExp("Remember union", UnionOperator.getOperatorName() + "%"), new SemanticNodeProcessor() {
@Override
public Object process(Node n, Stack<Node> s, NodeProcessorCtx procCtx, Object... os) throws SemanticException {
GenSparkProcContext context = (GenSparkProcContext) procCtx;
UnionOperator union = (UnionOperator) n;
// simply need to remember that we've seen a union.
context.currentUnionOperators.add(union);
return null;
}
});
/**
* SMB join case: (Big) (Small) (Small)
* TS TS TS
* \ | /
* \ DS DS
* \ | /
* SMBJoinOP
*
* Some of the other processors are expecting only one traversal beyond SMBJoinOp.
* We need to traverse from the big-table path only, and stop traversing on the
* small-table path once we reach SMBJoinOp.
* Also add some SMB join information to the context, so we can properly annotate
* the MapWork later on.
*/
opRules.put(new TypeRule(SMBMapJoinOperator.class), new SemanticNodeProcessor() {
@Override
public Object process(Node currNode, Stack<Node> stack, NodeProcessorCtx procCtx, Object... os) throws SemanticException {
GenSparkProcContext context = (GenSparkProcContext) procCtx;
SMBMapJoinOperator currSmbNode = (SMBMapJoinOperator) currNode;
SparkSMBMapJoinInfo smbMapJoinCtx = context.smbMapJoinCtxMap.get(currSmbNode);
if (smbMapJoinCtx == null) {
smbMapJoinCtx = new SparkSMBMapJoinInfo();
context.smbMapJoinCtxMap.put(currSmbNode, smbMapJoinCtx);
}
for (Node stackNode : stack) {
if (stackNode instanceof DummyStoreOperator) {
// If coming from small-table side, do some book-keeping, and skip traversal.
smbMapJoinCtx.smallTableRootOps.add(context.currentRootOperator);
return true;
}
}
// If coming from big-table side, do some book-keeping, and continue traversal
smbMapJoinCtx.bigTableRootOp = context.currentRootOperator;
return false;
}
});
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
SemanticGraphWalker ogw = new GenSparkWorkWalker(disp, procCtx);
ogw.startWalking(topNodes, null);
}
use of org.apache.hadoop.hive.ql.lib.SemanticGraphWalker in project hive by apache.
the class OperatorHealthCheckerHook method walkTree.
private void walkTree(Collection<Node> rootOps) throws SemanticException {
if (rootOps.isEmpty()) {
return;
}
SemanticDispatcher disp = new DefaultRuleDispatcher(new OperatorHealthCheckerProcessor(), new HashMap<>(), null);
SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
ogw.startWalking(rootOps, nodeOutput);
}
Aggregations