use of org.apache.hadoop.hive.ql.lib.NodeProcessorCtx in project hive by apache.
the class StatsOptimizer method transform.
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
if (pctx.getFetchTask() != null || !pctx.getQueryProperties().isQuery() || pctx.getQueryProperties().isAnalyzeRewrite() || pctx.getQueryProperties().isCTAS() || pctx.getLoadFileWork().size() > 1 || !pctx.getLoadTableWork().isEmpty() || // tables is being sampled and we can not optimize.
!pctx.getNameToSplitSample().isEmpty()) {
return pctx;
}
String TS = TableScanOperator.getOperatorName() + "%";
String GBY = GroupByOperator.getOperatorName() + "%";
String RS = ReduceSinkOperator.getOperatorName() + "%";
String SEL = SelectOperator.getOperatorName() + "%";
String FS = FileSinkOperator.getOperatorName() + "%";
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("R1", TS + SEL + GBY + RS + GBY + SEL + FS), new MetaDataProcessor(pctx));
opRules.put(new RuleRegExp("R2", TS + SEL + GBY + RS + GBY + FS), new MetaDataProcessor(pctx));
NodeProcessorCtx soProcCtx = new StatsOptimizerProcContext();
SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, soProcCtx);
SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pctx.getTopOps().values());
ogw.startWalking(topNodes, null);
return pctx;
}
use of org.apache.hadoop.hive.ql.lib.NodeProcessorCtx in project phoenix by apache.
the class IndexPredicateAnalyzer method analyzePredicate.
/**
* Analyzes a predicate.
*
* @param predicate predicate to be analyzed
* @param searchConditions receives conditions produced by analysis
* @return residual predicate which could not be translated to
* searchConditions
*/
public ExprNodeDesc analyzePredicate(ExprNodeDesc predicate, final List<IndexSearchCondition> searchConditions) {
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
NodeProcessor nodeProcessor = new NodeProcessor() {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
// a pure conjunction: reject OR, CASE, etc.
for (Node ancestor : stack) {
if (nd == ancestor) {
break;
}
if (!FunctionRegistry.isOpAnd((ExprNodeDesc) ancestor)) {
return nd;
}
}
return analyzeExpr((ExprNodeGenericFuncDesc) nd, searchConditions, nodeOutputs);
}
};
Dispatcher disp = new DefaultRuleDispatcher(nodeProcessor, opRules, null);
GraphWalker ogw = new DefaultGraphWalker(disp);
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.add(predicate);
HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
try {
ogw.startWalking(topNodes, nodeOutput);
} catch (SemanticException ex) {
throw new RuntimeException(ex);
}
ExprNodeDesc residualPredicate = (ExprNodeDesc) nodeOutput.get(predicate);
return residualPredicate;
}
use of org.apache.hadoop.hive.ql.lib.NodeProcessorCtx in project hive by apache.
the class LBExprProcFactory method genPruner.
/**
* Generates the list bucketing pruner for the expression tree.
*
* @param tabAlias
* The table alias of the partition table that is being considered
* for pruning
* @param pred
* The predicate from which the list bucketing pruner needs to be
* generated
* @param part
* The partition this walker is walking
* @throws SemanticException
*/
public static ExprNodeDesc genPruner(String tabAlias, ExprNodeDesc pred, Partition part) throws SemanticException {
// Create the walker, the rules dispatcher and the context.
NodeProcessorCtx lbprCtx = new LBExprProcCtx(tabAlias, part);
Map<Node, Object> outputMap = PrunerUtils.walkExprTree(pred, lbprCtx, getColumnProcessor(), getFieldProcessor(), getGenericFuncProcessor(), getDefaultExprProcessor());
// Get the exprNodeDesc corresponding to the first start node;
return (ExprNodeDesc) outputMap.get(pred);
}
use of org.apache.hadoop.hive.ql.lib.NodeProcessorCtx in project hive by apache.
the class ListBucketingPruner method transform.
/*
* (non-Javadoc)
*
* @see org.apache.hadoop.hive.ql.optimizer.Transform#transform(org.apache.hadoop.hive.ql.parse.
* ParseContext)
*/
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
// create a the context for walking operators
NodeProcessorCtx opPartWalkerCtx = new LBOpPartitionWalkerCtx(pctx);
// Retrieve all partitions generated from partition pruner and partition column pruner
PrunerUtils.walkOperatorTree(pctx, opPartWalkerCtx, LBPartitionProcFactory.getFilterProc(), LBPartitionProcFactory.getDefaultProc());
PrunedPartitionList partsList = ((LBOpPartitionWalkerCtx) opPartWalkerCtx).getPartitions();
if (partsList != null) {
Set<Partition> parts = partsList.getPartitions();
if ((parts != null) && (parts.size() > 0)) {
for (Partition part : parts) {
// only process partition which is skewed and list bucketed
if (ListBucketingPrunerUtils.isListBucketingPart(part)) {
// create a the context for walking operators
NodeProcessorCtx opWalkerCtx = new LBOpWalkerCtx(pctx.getOpToPartToSkewedPruner(), part);
// walk operator tree to create expression tree for list bucketing
PrunerUtils.walkOperatorTree(pctx, opWalkerCtx, LBProcFactory.getFilterProc(), LBProcFactory.getDefaultProc());
}
}
}
}
return pctx;
}
use of org.apache.hadoop.hive.ql.lib.NodeProcessorCtx in project hive by apache.
the class SparkCompiler method generateTaskTreeHelper.
private void generateTaskTreeHelper(GenSparkProcContext procCtx, List<Node> topNodes) throws SemanticException {
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack. The dispatcher generates the plan from the operator tree
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
GenSparkWork genSparkWork = new GenSparkWork(GenSparkUtils.getUtils());
opRules.put(new RuleRegExp("Split Work - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), genSparkWork);
opRules.put(new RuleRegExp("Split Work - SparkPartitionPruningSink", SparkPartitionPruningSinkOperator.getOperatorName() + "%"), genSparkWork);
opRules.put(new TypeRule(MapJoinOperator.class), new SparkReduceSinkMapJoinProc());
opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"), new CompositeProcessor(new SparkFileSinkProcessor(), genSparkWork));
opRules.put(new RuleRegExp("Handle Analyze Command", TableScanOperator.getOperatorName() + "%"), new SparkProcessAnalyzeTable(GenSparkUtils.getUtils()));
opRules.put(new RuleRegExp("Remember union", UnionOperator.getOperatorName() + "%"), new SemanticNodeProcessor() {
@Override
public Object process(Node n, Stack<Node> s, NodeProcessorCtx procCtx, Object... os) throws SemanticException {
GenSparkProcContext context = (GenSparkProcContext) procCtx;
UnionOperator union = (UnionOperator) n;
// simply need to remember that we've seen a union.
context.currentUnionOperators.add(union);
return null;
}
});
/**
* SMB join case: (Big) (Small) (Small)
* TS TS TS
* \ | /
* \ DS DS
* \ | /
* SMBJoinOP
*
* Some of the other processors are expecting only one traversal beyond SMBJoinOp.
* We need to traverse from the big-table path only, and stop traversing on the
* small-table path once we reach SMBJoinOp.
* Also add some SMB join information to the context, so we can properly annotate
* the MapWork later on.
*/
opRules.put(new TypeRule(SMBMapJoinOperator.class), new SemanticNodeProcessor() {
@Override
public Object process(Node currNode, Stack<Node> stack, NodeProcessorCtx procCtx, Object... os) throws SemanticException {
GenSparkProcContext context = (GenSparkProcContext) procCtx;
SMBMapJoinOperator currSmbNode = (SMBMapJoinOperator) currNode;
SparkSMBMapJoinInfo smbMapJoinCtx = context.smbMapJoinCtxMap.get(currSmbNode);
if (smbMapJoinCtx == null) {
smbMapJoinCtx = new SparkSMBMapJoinInfo();
context.smbMapJoinCtxMap.put(currSmbNode, smbMapJoinCtx);
}
for (Node stackNode : stack) {
if (stackNode instanceof DummyStoreOperator) {
// If coming from small-table side, do some book-keeping, and skip traversal.
smbMapJoinCtx.smallTableRootOps.add(context.currentRootOperator);
return true;
}
}
// If coming from big-table side, do some book-keeping, and continue traversal
smbMapJoinCtx.bigTableRootOp = context.currentRootOperator;
return false;
}
});
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
SemanticGraphWalker ogw = new GenSparkWorkWalker(disp, procCtx);
ogw.startWalking(topNodes, null);
}
Aggregations