use of org.apache.hadoop.hive.ql.lib.GraphWalker in project hive by apache.
the class TypeCheckProcFactory method genExprNode.
protected static Map<ASTNode, ExprNodeDesc> genExprNode(ASTNode expr, TypeCheckCtx tcCtx, TypeCheckProcFactory tf) throws SemanticException {
// Create the walker, the rules dispatcher and the context.
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack. The dispatcher
// generates the plan from the operator tree
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("R1", HiveParser.TOK_NULL + "%"), tf.getNullExprProcessor());
opRules.put(new RuleRegExp("R2", HiveParser.Number + "%|" + HiveParser.IntegralLiteral + "%|" + HiveParser.NumberLiteral + "%"), tf.getNumExprProcessor());
opRules.put(new RuleRegExp("R3", HiveParser.Identifier + "%|" + HiveParser.StringLiteral + "%|" + HiveParser.TOK_CHARSETLITERAL + "%|" + HiveParser.TOK_STRINGLITERALSEQUENCE + "%|" + "%|" + HiveParser.KW_IF + "%|" + HiveParser.KW_CASE + "%|" + HiveParser.KW_WHEN + "%|" + HiveParser.KW_IN + "%|" + HiveParser.KW_ARRAY + "%|" + HiveParser.KW_MAP + "%|" + HiveParser.KW_STRUCT + "%|" + HiveParser.KW_EXISTS + "%|" + HiveParser.TOK_SUBQUERY_OP_NOTIN + "%"), tf.getStrExprProcessor());
opRules.put(new RuleRegExp("R4", HiveParser.KW_TRUE + "%|" + HiveParser.KW_FALSE + "%"), tf.getBoolExprProcessor());
opRules.put(new RuleRegExp("R5", HiveParser.TOK_DATELITERAL + "%|" + HiveParser.TOK_TIMESTAMPLITERAL + "%"), tf.getDateTimeExprProcessor());
opRules.put(new RuleRegExp("R6", HiveParser.TOK_INTERVAL_YEAR_MONTH_LITERAL + "%|" + HiveParser.TOK_INTERVAL_DAY_TIME_LITERAL + "%|" + HiveParser.TOK_INTERVAL_YEAR_LITERAL + "%|" + HiveParser.TOK_INTERVAL_MONTH_LITERAL + "%|" + HiveParser.TOK_INTERVAL_DAY_LITERAL + "%|" + HiveParser.TOK_INTERVAL_HOUR_LITERAL + "%|" + HiveParser.TOK_INTERVAL_MINUTE_LITERAL + "%|" + HiveParser.TOK_INTERVAL_SECOND_LITERAL + "%"), tf.getIntervalExprProcessor());
opRules.put(new RuleRegExp("R7", HiveParser.TOK_TABLE_OR_COL + "%"), tf.getColumnExprProcessor());
opRules.put(new RuleRegExp("R8", HiveParser.TOK_SUBQUERY_EXPR + "%"), tf.getSubQueryExprProcessor());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(tf.getDefaultExprProcessor(), opRules, tcCtx);
GraphWalker ogw = new ExpressionWalker(disp);
// Create a list of top nodes
ArrayList<Node> topNodes = Lists.<Node>newArrayList(expr);
HashMap<Node, Object> nodeOutputs = new LinkedHashMap<Node, Object>();
ogw.startWalking(topNodes, nodeOutputs);
return convert(nodeOutputs);
}
use of org.apache.hadoop.hive.ql.lib.GraphWalker in project hive by apache.
the class SparkCompiler method generateTaskTree.
/**
* TODO: need to turn on rules that's commented out and add more if necessary.
*/
@Override
protected void generateTaskTree(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx, List<Task<MoveWork>> mvTask, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
PERF_LOGGER.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_GENERATE_TASK_TREE);
GenSparkUtils utils = GenSparkUtils.getUtils();
utils.resetSequenceNumber();
ParseContext tempParseContext = getParseContext(pCtx, rootTasks);
GenSparkProcContext procCtx = new GenSparkProcContext(conf, tempParseContext, mvTask, rootTasks, inputs, outputs, pCtx.getTopOps());
// -------------------------------- First Pass ---------------------------------- //
// Identify SparkPartitionPruningSinkOperators, and break OP tree if necessary
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("Clone OP tree for PartitionPruningSink", SparkPartitionPruningSinkOperator.getOperatorName() + "%"), new SplitOpTreeForDPP());
Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
GraphWalker ogw = new GenSparkWorkWalker(disp, procCtx);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pCtx.getTopOps().values());
ogw.startWalking(topNodes, null);
// -------------------------------- Second Pass ---------------------------------- //
// Process operator tree in two steps: first we process the extra op trees generated
// in the first pass. Then we process the main op tree, and the result task will depend
// on the task generated in the first pass.
topNodes.clear();
topNodes.addAll(procCtx.topOps.values());
generateTaskTreeHelper(procCtx, topNodes);
// the partitions used.
if (!procCtx.clonedPruningTableScanSet.isEmpty()) {
SparkTask pruningTask = SparkUtilities.createSparkTask(conf);
SparkTask mainTask = procCtx.currentTask;
pruningTask.addDependentTask(procCtx.currentTask);
procCtx.rootTasks.remove(procCtx.currentTask);
procCtx.rootTasks.add(pruningTask);
procCtx.currentTask = pruningTask;
topNodes.clear();
topNodes.addAll(procCtx.clonedPruningTableScanSet);
generateTaskTreeHelper(procCtx, topNodes);
procCtx.currentTask = mainTask;
}
// we need to clone some operator plans and remove union operators still
for (BaseWork w : procCtx.workWithUnionOperators) {
GenSparkUtils.getUtils().removeUnionOperators(procCtx, w);
}
// we need to fill MapWork with 'local' work and bucket information for SMB Join.
GenSparkUtils.getUtils().annotateMapWork(procCtx);
// finally make sure the file sink operators are set up right
for (FileSinkOperator fileSink : procCtx.fileSinkSet) {
GenSparkUtils.getUtils().processFileSink(procCtx, fileSink);
}
// Process partition pruning sinks
for (Operator<?> prunerSink : procCtx.pruningSinkSet) {
utils.processPartitionPruningSink(procCtx, (SparkPartitionPruningSinkOperator) prunerSink);
}
PERF_LOGGER.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_GENERATE_TASK_TREE);
}
use of org.apache.hadoop.hive.ql.lib.GraphWalker in project hive by apache.
the class SparkCompiler method runJoinOptimizations.
private void runJoinOptimizations(OptimizeSparkProcContext procCtx) throws SemanticException {
ParseContext pCtx = procCtx.getParseContext();
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new TypeRule(JoinOperator.class), new SparkJoinOptimizer(pCtx));
opRules.put(new TypeRule(MapJoinOperator.class), new SparkJoinHintOptimizer(pCtx));
opRules.put(new RuleRegExp("Disabling Dynamic Partition Pruning By Size", SparkPartitionPruningSinkOperator.getOperatorName() + "%"), new SparkRemoveDynamicPruningBySize());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
GraphWalker ogw = new DefaultGraphWalker(disp);
// Create a list of topop nodes
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pCtx.getTopOps().values());
ogw.startWalking(topNodes, null);
}
use of org.apache.hadoop.hive.ql.lib.GraphWalker in project hive by apache.
the class AnnotateReduceSinkOutputOperator method transform.
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
// 1. We apply the transformation
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("R1", "(" + ReduceSinkOperator.getOperatorName() + "%)"), new ReduceSinkOutputOperatorAnnotator());
GraphWalker ogw = new DefaultGraphWalker(new DefaultRuleDispatcher(null, opRules, null));
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pctx.getTopOps().values());
ogw.startWalking(topNodes, null);
return pctx;
}
use of org.apache.hadoop.hive.ql.lib.GraphWalker in project hive by apache.
the class BucketMapJoinOptimizer method transform.
public ParseContext transform(ParseContext pctx) throws SemanticException {
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
BucketJoinProcCtx bucketMapJoinOptimizeCtx = new BucketJoinProcCtx(pctx.getConf());
// process map joins with no reducers pattern
opRules.put(new RuleRegExp("R1", MapJoinOperator.getOperatorName() + "%"), getBucketMapjoinProc(pctx));
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, bucketMapJoinOptimizeCtx);
GraphWalker ogw = new DefaultGraphWalker(disp);
// Create a list of topop nodes
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pctx.getTopOps().values());
ogw.startWalking(topNodes, null);
return pctx;
}
Aggregations