use of org.apache.hadoop.hive.ql.lib.RuleRegExp in project hive by apache.
the class TezCompiler method removeSemiJoinCyclesDueToMapsideJoins.
private static void removeSemiJoinCyclesDueToMapsideJoins(OptimizeTezProcContext procCtx) throws SemanticException {
if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) || procCtx.parseContext.getRsToSemiJoinBranchInfo().size() == 0) {
return;
}
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("R1", MapJoinOperator.getOperatorName() + "%" + MapJoinOperator.getOperatorName() + "%"), new SemiJoinCycleRemovalDueToMapsideJoins());
opRules.put(new RuleRegExp("R2", MapJoinOperator.getOperatorName() + "%" + CommonMergeJoinOperator.getOperatorName() + "%"), new SemiJoinCycleRemovalDueToMapsideJoins());
opRules.put(new RuleRegExp("R3", CommonMergeJoinOperator.getOperatorName() + "%" + MapJoinOperator.getOperatorName() + "%"), new SemiJoinCycleRemovalDueToMapsideJoins());
opRules.put(new RuleRegExp("R4", CommonMergeJoinOperator.getOperatorName() + "%" + CommonMergeJoinOperator.getOperatorName() + "%"), new SemiJoinCycleRemovalDueToMapsideJoins());
SemiJoinCycleRemovalDueTOMapsideJoinContext ctx = new SemiJoinCycleRemovalDueTOMapsideJoinContext();
Dispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(procCtx.parseContext.getTopOps().values());
GraphWalker ogw = new PreOrderOnceWalker(disp);
ogw.startWalking(topNodes, null);
// process the list
ParseContext pCtx = procCtx.parseContext;
for (Operator<?> parentJoin : ctx.childParentMap.keySet()) {
Operator<?> childJoin = ctx.childParentMap.get(parentJoin);
if (parentJoin.getChildOperators().size() == 1) {
continue;
}
for (Operator<?> child : parentJoin.getChildOperators()) {
if (!(child instanceof SelectOperator)) {
continue;
}
while (child.getChildOperators().size() > 0) {
child = child.getChildOperators().get(0);
}
if (!(child instanceof ReduceSinkOperator)) {
continue;
}
ReduceSinkOperator rs = ((ReduceSinkOperator) child);
SemiJoinBranchInfo sjInfo = pCtx.getRsToSemiJoinBranchInfo().get(rs);
if (sjInfo == null) {
continue;
}
TableScanOperator ts = sjInfo.getTsOp();
// cycle with childJoin.
for (Operator<?> parent : childJoin.getParentOperators()) {
if (parent == parentJoin) {
continue;
}
assert parent instanceof ReduceSinkOperator;
while (parent.getParentOperators().size() > 0) {
parent = parent.getParentOperators().get(0);
}
if (parent == ts) {
// We have a cycle!
if (sjInfo.getIsHint()) {
throw new SemanticException("Removing hinted semijoin as it is creating cycles with mapside joins " + rs + " : " + ts);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Semijoin cycle due to mapjoin. Removing semijoin " + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts));
}
GenTezUtils.removeBranch(rs);
GenTezUtils.removeSemiJoinOperator(pCtx, rs, ts);
}
}
}
}
}
use of org.apache.hadoop.hive.ql.lib.RuleRegExp in project hive by apache.
the class TezCompiler method runStatsDependentOptimizations.
private void runStatsDependentOptimizations(OptimizeTezProcContext procCtx, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
// Sequence of TableScan operators to be walked
Deque<Operator<?>> deque = new LinkedList<Operator<?>>();
deque.addAll(procCtx.parseContext.getTopOps().values());
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack.
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("Set parallelism - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), new SetReducerParallelism());
opRules.put(new RuleRegExp("Convert Join to Map-join", JoinOperator.getOperatorName() + "%"), new ConvertJoinMapJoin());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(procCtx.parseContext.getTopOps().values());
GraphWalker ogw = new ForwardWalker(disp);
ogw.startWalking(topNodes, null);
}
use of org.apache.hadoop.hive.ql.lib.RuleRegExp in project flink by apache.
the class HiveParserTypeCheckProcFactory method genExprNode.
public static Map<HiveParserASTNode, ExprNodeDesc> genExprNode(HiveParserASTNode expr, HiveParserTypeCheckCtx tcCtx, HiveParserTypeCheckProcFactory tf) throws SemanticException {
// Create the walker, the rules dispatcher and the context.
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack. The dispatcher
// generates the plan from the operator tree
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<>();
opRules.put(new RuleRegExp("R1", HiveASTParser.TOK_NULL + "%"), tf.getNullExprProcessor());
opRules.put(new RuleRegExp("R2", HiveASTParser.Number + "%|" + HiveASTParser.IntegralLiteral + "%|" + HiveASTParser.NumberLiteral + "%"), tf.getNumExprProcessor());
opRules.put(new RuleRegExp("R3", HiveASTParser.Identifier + "%|" + HiveASTParser.StringLiteral + "%|" + HiveASTParser.TOK_CHARSETLITERAL + "%|" + HiveASTParser.TOK_STRINGLITERALSEQUENCE + "%|" + "%|" + HiveASTParser.KW_IF + "%|" + HiveASTParser.KW_CASE + "%|" + HiveASTParser.KW_WHEN + "%|" + HiveASTParser.KW_IN + "%|" + HiveASTParser.KW_ARRAY + "%|" + HiveASTParser.KW_MAP + "%|" + HiveASTParser.KW_STRUCT + "%|" + HiveASTParser.KW_EXISTS + "%|" + HiveASTParser.TOK_SUBQUERY_OP_NOTIN + "%"), tf.getStrExprProcessor());
opRules.put(new RuleRegExp("R4", HiveASTParser.KW_TRUE + "%|" + HiveASTParser.KW_FALSE + "%"), tf.getBoolExprProcessor());
opRules.put(new RuleRegExp("R5", HiveASTParser.TOK_DATELITERAL + "%|" + HiveASTParser.TOK_TIMESTAMPLITERAL + "%"), tf.getDateTimeExprProcessor());
opRules.put(new RuleRegExp("R6", HiveASTParser.TOK_INTERVAL_YEAR_MONTH_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_DAY_TIME_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_YEAR_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_MONTH_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_DAY_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_HOUR_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_MINUTE_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_SECOND_LITERAL + "%"), tf.getIntervalExprProcessor());
opRules.put(new RuleRegExp("R7", HiveASTParser.TOK_TABLE_OR_COL + "%"), tf.getColumnExprProcessor());
opRules.put(new RuleRegExp("R8", HiveASTParser.TOK_SUBQUERY_EXPR + "%"), tf.getSubQueryExprProcessor());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(tf.getDefaultExprProcessor(), opRules, tcCtx);
GraphWalker ogw = new HiveParserExpressionWalker(disp);
// Create a list of top nodes
ArrayList<Node> topNodes = new ArrayList<>(Collections.singleton(expr));
HashMap<Node, Object> nodeOutputs = new LinkedHashMap<>();
ogw.startWalking(topNodes, nodeOutputs);
return convert(nodeOutputs);
}
use of org.apache.hadoop.hive.ql.lib.RuleRegExp in project flink by splunk.
the class HiveParserTypeCheckProcFactory method genExprNode.
public static Map<HiveParserASTNode, ExprNodeDesc> genExprNode(HiveParserASTNode expr, HiveParserTypeCheckCtx tcCtx, HiveParserTypeCheckProcFactory tf) throws SemanticException {
// Create the walker, the rules dispatcher and the context.
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack. The dispatcher
// generates the plan from the operator tree
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<>();
opRules.put(new RuleRegExp("R1", HiveASTParser.TOK_NULL + "%"), tf.getNullExprProcessor());
opRules.put(new RuleRegExp("R2", HiveASTParser.Number + "%|" + HiveASTParser.IntegralLiteral + "%|" + HiveASTParser.NumberLiteral + "%"), tf.getNumExprProcessor());
opRules.put(new RuleRegExp("R3", HiveASTParser.Identifier + "%|" + HiveASTParser.StringLiteral + "%|" + HiveASTParser.TOK_CHARSETLITERAL + "%|" + HiveASTParser.TOK_STRINGLITERALSEQUENCE + "%|" + "%|" + HiveASTParser.KW_IF + "%|" + HiveASTParser.KW_CASE + "%|" + HiveASTParser.KW_WHEN + "%|" + HiveASTParser.KW_IN + "%|" + HiveASTParser.KW_ARRAY + "%|" + HiveASTParser.KW_MAP + "%|" + HiveASTParser.KW_STRUCT + "%|" + HiveASTParser.KW_EXISTS + "%|" + HiveASTParser.TOK_SUBQUERY_OP_NOTIN + "%"), tf.getStrExprProcessor());
opRules.put(new RuleRegExp("R4", HiveASTParser.KW_TRUE + "%|" + HiveASTParser.KW_FALSE + "%"), tf.getBoolExprProcessor());
opRules.put(new RuleRegExp("R5", HiveASTParser.TOK_DATELITERAL + "%|" + HiveASTParser.TOK_TIMESTAMPLITERAL + "%"), tf.getDateTimeExprProcessor());
opRules.put(new RuleRegExp("R6", HiveASTParser.TOK_INTERVAL_YEAR_MONTH_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_DAY_TIME_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_YEAR_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_MONTH_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_DAY_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_HOUR_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_MINUTE_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_SECOND_LITERAL + "%"), tf.getIntervalExprProcessor());
opRules.put(new RuleRegExp("R7", HiveASTParser.TOK_TABLE_OR_COL + "%"), tf.getColumnExprProcessor());
opRules.put(new RuleRegExp("R8", HiveASTParser.TOK_SUBQUERY_EXPR + "%"), tf.getSubQueryExprProcessor());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(tf.getDefaultExprProcessor(), opRules, tcCtx);
GraphWalker ogw = new HiveParserExpressionWalker(disp);
// Create a list of top nodes
ArrayList<Node> topNodes = new ArrayList<>(Collections.singleton(expr));
HashMap<Node, Object> nodeOutputs = new LinkedHashMap<>();
ogw.startWalking(topNodes, nodeOutputs);
return convert(nodeOutputs);
}
Aggregations