Search in sources :

Example 91 with Dispatcher

use of org.apache.hadoop.hive.ql.lib.Dispatcher in project hive by apache.

the class TezCompiler method removeSemiJoinIfNoStats.

private void removeSemiJoinIfNoStats(OptimizeTezProcContext procCtx) throws SemanticException {
    if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION)) {
        // Not needed without semi-join reduction
        return;
    }
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%" + GroupByOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%"), new SemiJoinRemovalIfNoStatsProc());
    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(procCtx.parseContext.getTopOps().values());
    GraphWalker ogw = new PreOrderOnceWalker(disp);
    ogw.startWalking(topNodes, null);
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) PreOrderOnceWalker(org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker) Rule(org.apache.hadoop.hive.ql.lib.Rule) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Example 92 with Dispatcher

use of org.apache.hadoop.hive.ql.lib.Dispatcher in project hive by apache.

the class TezCompiler method runStatsDependentOptimizations.

private void runStatsDependentOptimizations(OptimizeTezProcContext procCtx, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
    // Sequence of TableScan operators to be walked
    Deque<Operator<?>> deque = new LinkedList<Operator<?>>();
    deque.addAll(procCtx.parseContext.getTopOps().values());
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack.
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("Set parallelism - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), new SetReducerParallelism());
    opRules.put(new RuleRegExp("Convert Join to Map-join", JoinOperator.getOperatorName() + "%"), new ConvertJoinMapJoin());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(procCtx.parseContext.getTopOps().values());
    GraphWalker ogw = new ForwardWalker(disp);
    ogw.startWalking(topNodes, null);
}
Also used : CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) ForwardWalker(org.apache.hadoop.hive.ql.lib.ForwardWalker) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SetReducerParallelism(org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedList(java.util.LinkedList) LinkedHashMap(java.util.LinkedHashMap) ConvertJoinMapJoin(org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin) Rule(org.apache.hadoop.hive.ql.lib.Rule) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Example 93 with Dispatcher

use of org.apache.hadoop.hive.ql.lib.Dispatcher in project hive by apache.

the class MacroSemanticAnalyzer method analyzeCreateMacro.

@SuppressWarnings("unchecked")
private void analyzeCreateMacro(ASTNode ast) throws SemanticException {
    String functionName = ast.getChild(0).getText();
    // Temp macros are not allowed to have qualified names.
    if (FunctionUtils.isQualifiedFunctionName(functionName)) {
        throw new SemanticException("Temporary macro cannot be created with a qualified name.");
    }
    List<FieldSchema> arguments = BaseSemanticAnalyzer.getColumns((ASTNode) ast.getChild(1), true);
    boolean isNoArgumentMacro = arguments.size() == 0;
    RowResolver rowResolver = new RowResolver();
    ArrayList<String> macroColNames = new ArrayList<String>(arguments.size());
    ArrayList<TypeInfo> macroColTypes = new ArrayList<TypeInfo>(arguments.size());
    final Set<String> actualColumnNames = new HashSet<String>();
    if (!isNoArgumentMacro) {
        /*
       * Walk down expression to see which arguments are actually used.
       */
        Node expression = (Node) ast.getChild(2);
        PreOrderWalker walker = new PreOrderWalker(new Dispatcher() {

            @Override
            public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException {
                if (nd instanceof ASTNode) {
                    ASTNode node = (ASTNode) nd;
                    if (node.getType() == HiveParser.TOK_TABLE_OR_COL) {
                        actualColumnNames.add(node.getChild(0).getText());
                    }
                }
                return null;
            }
        });
        walker.startWalking(Collections.singletonList(expression), null);
    }
    for (FieldSchema argument : arguments) {
        TypeInfo colType = TypeInfoUtils.getTypeInfoFromTypeString(argument.getType());
        rowResolver.put("", argument.getName(), new ColumnInfo(argument.getName(), colType, "", false));
        macroColNames.add(argument.getName());
        macroColTypes.add(colType);
    }
    Set<String> expectedColumnNames = new LinkedHashSet<String>(macroColNames);
    if (!expectedColumnNames.equals(actualColumnNames)) {
        throw new SemanticException("Expected columns " + expectedColumnNames + " but found " + actualColumnNames);
    }
    if (expectedColumnNames.size() != macroColNames.size()) {
        throw new SemanticException("At least one parameter name was used more than once " + macroColNames);
    }
    SemanticAnalyzer sa = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED) ? new CalcitePlanner(queryState) : new SemanticAnalyzer(queryState);
    ;
    ExprNodeDesc body;
    if (isNoArgumentMacro) {
        body = sa.genExprNodeDesc((ASTNode) ast.getChild(1), rowResolver);
    } else {
        body = sa.genExprNodeDesc((ASTNode) ast.getChild(2), rowResolver);
    }
    CreateMacroDesc desc = new CreateMacroDesc(functionName, macroColNames, macroColTypes, body);
    rootTasks.add(TaskFactory.get(new FunctionWork(desc)));
    addEntities();
}
Also used : LinkedHashSet(java.util.LinkedHashSet) CreateMacroDesc(org.apache.hadoop.hive.ql.plan.CreateMacroDesc) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) FunctionWork(org.apache.hadoop.hive.ql.plan.FunctionWork) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PreOrderWalker(org.apache.hadoop.hive.ql.lib.PreOrderWalker)

Example 94 with Dispatcher

use of org.apache.hadoop.hive.ql.lib.Dispatcher in project hive by apache.

the class SparkCompiler method generateTaskTreeHelper.

private void generateTaskTreeHelper(GenSparkProcContext procCtx, List<Node> topNodes) throws SemanticException {
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack. The dispatcher generates the plan from the operator tree
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    GenSparkWork genSparkWork = new GenSparkWork(GenSparkUtils.getUtils());
    opRules.put(new RuleRegExp("Split Work - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), genSparkWork);
    opRules.put(new RuleRegExp("Split Work - SparkPartitionPruningSink", SparkPartitionPruningSinkOperator.getOperatorName() + "%"), genSparkWork);
    opRules.put(new TypeRule(MapJoinOperator.class), new SparkReduceSinkMapJoinProc());
    opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"), new CompositeProcessor(new SparkFileSinkProcessor(), genSparkWork));
    opRules.put(new RuleRegExp("Handle Analyze Command", TableScanOperator.getOperatorName() + "%"), new SparkProcessAnalyzeTable(GenSparkUtils.getUtils()));
    opRules.put(new RuleRegExp("Remember union", UnionOperator.getOperatorName() + "%"), new NodeProcessor() {

        @Override
        public Object process(Node n, Stack<Node> s, NodeProcessorCtx procCtx, Object... os) throws SemanticException {
            GenSparkProcContext context = (GenSparkProcContext) procCtx;
            UnionOperator union = (UnionOperator) n;
            // simply need to remember that we've seen a union.
            context.currentUnionOperators.add(union);
            return null;
        }
    });
    /**
     *  SMB join case:   (Big)   (Small)  (Small)
     *                     TS       TS       TS
     *                      \       |       /
     *                       \      DS     DS
     *                         \   |    /
     *                         SMBJoinOP
     *
     * Some of the other processors are expecting only one traversal beyond SMBJoinOp.
     * We need to traverse from the big-table path only, and stop traversing on the
     * small-table path once we reach SMBJoinOp.
     * Also add some SMB join information to the context, so we can properly annotate
     * the MapWork later on.
     */
    opRules.put(new TypeRule(SMBMapJoinOperator.class), new NodeProcessor() {

        @Override
        public Object process(Node currNode, Stack<Node> stack, NodeProcessorCtx procCtx, Object... os) throws SemanticException {
            GenSparkProcContext context = (GenSparkProcContext) procCtx;
            SMBMapJoinOperator currSmbNode = (SMBMapJoinOperator) currNode;
            SparkSMBMapJoinInfo smbMapJoinCtx = context.smbMapJoinCtxMap.get(currSmbNode);
            if (smbMapJoinCtx == null) {
                smbMapJoinCtx = new SparkSMBMapJoinInfo();
                context.smbMapJoinCtxMap.put(currSmbNode, smbMapJoinCtx);
            }
            for (Node stackNode : stack) {
                if (stackNode instanceof DummyStoreOperator) {
                    // If coming from small-table side, do some book-keeping, and skip traversal.
                    smbMapJoinCtx.smallTableRootOps.add(context.currentRootOperator);
                    return true;
                }
            }
            // If coming from big-table side, do some book-keeping, and continue traversal
            smbMapJoinCtx.bigTableRootOp = context.currentRootOperator;
            return false;
        }
    });
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    GraphWalker ogw = new GenSparkWorkWalker(disp, procCtx);
    ogw.startWalking(topNodes, null);
}
Also used : Node(org.apache.hadoop.hive.ql.lib.Node) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) NodeProcessorCtx(org.apache.hadoop.hive.ql.lib.NodeProcessorCtx) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) CompositeProcessor(org.apache.hadoop.hive.ql.lib.CompositeProcessor) SparkReduceSinkMapJoinProc(org.apache.hadoop.hive.ql.optimizer.spark.SparkReduceSinkMapJoinProc) Rule(org.apache.hadoop.hive.ql.lib.Rule) TypeRule(org.apache.hadoop.hive.ql.lib.TypeRule) TypeRule(org.apache.hadoop.hive.ql.lib.TypeRule)

Example 95 with Dispatcher

use of org.apache.hadoop.hive.ql.lib.Dispatcher in project hive by apache.

the class ExprWalkerProcFactory method extractPushdownPreds.

/**
 * Extracts pushdown predicates from the given list of predicate expression.
 *
 * @param opContext
 *          operator context used for resolving column references
 * @param op
 *          operator of the predicates being processed
 * @param preds
 * @return The expression walker information
 * @throws SemanticException
 */
public static ExprWalkerInfo extractPushdownPreds(OpWalkerInfo opContext, Operator<? extends OperatorDesc> op, List<ExprNodeDesc> preds) throws SemanticException {
    // Create the walker, the rules dispatcher and the context.
    ExprWalkerInfo exprContext = new ExprWalkerInfo(op);
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack. The dispatcher
    // generates the plan from the operator tree
    Map<Rule, NodeProcessor> exprRules = new LinkedHashMap<Rule, NodeProcessor>();
    exprRules.put(new TypeRule(ExprNodeColumnDesc.class), getColumnProcessor());
    exprRules.put(new TypeRule(ExprNodeFieldDesc.class), getFieldProcessor());
    exprRules.put(new TypeRule(ExprNodeGenericFuncDesc.class), getGenericFuncProcessor());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(), exprRules, exprContext);
    GraphWalker egw = new DefaultGraphWalker(disp);
    List<Node> startNodes = new ArrayList<Node>();
    List<ExprNodeDesc> clonedPreds = new ArrayList<ExprNodeDesc>();
    for (ExprNodeDesc node : preds) {
        ExprNodeDesc clone = node.clone();
        clonedPreds.add(clone);
        exprContext.getNewToOldExprMap().put(clone, node);
    }
    startNodes.addAll(clonedPreds);
    egw.startWalking(startNodes, null);
    HiveConf conf = opContext.getParseContext().getConf();
    // check the root expression for final candidates
    for (ExprNodeDesc pred : clonedPreds) {
        extractFinalCandidates(pred, exprContext, conf);
    }
    return exprContext;
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Rule(org.apache.hadoop.hive.ql.lib.Rule) TypeRule(org.apache.hadoop.hive.ql.lib.TypeRule) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) TypeRule(org.apache.hadoop.hive.ql.lib.TypeRule)

Aggregations

Dispatcher (org.apache.hadoop.hive.ql.lib.Dispatcher)97 Node (org.apache.hadoop.hive.ql.lib.Node)97 ArrayList (java.util.ArrayList)92 GraphWalker (org.apache.hadoop.hive.ql.lib.GraphWalker)87 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)86 NodeProcessor (org.apache.hadoop.hive.ql.lib.NodeProcessor)68 Rule (org.apache.hadoop.hive.ql.lib.Rule)68 LinkedHashMap (java.util.LinkedHashMap)67 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)62 RuleRegExp (org.apache.hadoop.hive.ql.lib.RuleRegExp)59 HashMap (java.util.HashMap)21 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)17 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)16 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)15 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)14 TypeRule (org.apache.hadoop.hive.ql.lib.TypeRule)11 List (java.util.List)10 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)10 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)10 Test (org.junit.Test)10