Search in sources :

Example 1 with PreOrderWalker

use of org.apache.hadoop.hive.ql.lib.PreOrderWalker in project hive by apache.

the class SparkCompiler method runSetReducerParallelism.

private void runSetReducerParallelism(OptimizeSparkProcContext procCtx) throws SemanticException {
    ParseContext pCtx = procCtx.getParseContext();
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("Set parallelism - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), new SetSparkReducerParallelism(pCtx.getConf()));
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    GraphWalker ogw = new PreOrderWalker(disp);
    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pCtx.getTopOps().values());
    ogw.startWalking(topNodes, null);
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) SetSparkReducerParallelism(org.apache.hadoop.hive.ql.optimizer.spark.SetSparkReducerParallelism) LinkedHashMap(java.util.LinkedHashMap) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) Rule(org.apache.hadoop.hive.ql.lib.Rule) TypeRule(org.apache.hadoop.hive.ql.lib.TypeRule) PreOrderWalker(org.apache.hadoop.hive.ql.lib.PreOrderWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)

Example 2 with PreOrderWalker

use of org.apache.hadoop.hive.ql.lib.PreOrderWalker in project hive by apache.

the class SparkSkewJoinResolver method resolve.

@Override
public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException {
    SparkSkewJoinProcFactory.getVisitedJoinOp().clear();
    Dispatcher disp = new SparkSkewJoinTaskDispatcher(pctx);
    // since we may split current task, use a pre-order walker
    GraphWalker ogw = new PreOrderWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getRootTasks());
    ogw.startWalking(topNodes, null);
    return pctx;
}
Also used : Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) PreOrderWalker(org.apache.hadoop.hive.ql.lib.PreOrderWalker) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Example 3 with PreOrderWalker

use of org.apache.hadoop.hive.ql.lib.PreOrderWalker in project hive by apache.

the class MacroSemanticAnalyzer method analyzeCreateMacro.

@SuppressWarnings("unchecked")
private void analyzeCreateMacro(ASTNode ast) throws SemanticException {
    String functionName = ast.getChild(0).getText();
    // Temp macros are not allowed to have qualified names.
    if (FunctionUtils.isQualifiedFunctionName(functionName)) {
        throw new SemanticException("Temporary macro cannot be created with a qualified name.");
    }
    List<FieldSchema> arguments = BaseSemanticAnalyzer.getColumns((ASTNode) ast.getChild(1), true);
    boolean isNoArgumentMacro = arguments.size() == 0;
    RowResolver rowResolver = new RowResolver();
    ArrayList<String> macroColNames = new ArrayList<String>(arguments.size());
    ArrayList<TypeInfo> macroColTypes = new ArrayList<TypeInfo>(arguments.size());
    final Set<String> actualColumnNames = new HashSet<String>();
    if (!isNoArgumentMacro) {
        /*
       * Walk down expression to see which arguments are actually used.
       */
        Node expression = (Node) ast.getChild(2);
        PreOrderWalker walker = new PreOrderWalker(new Dispatcher() {

            @Override
            public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException {
                if (nd instanceof ASTNode) {
                    ASTNode node = (ASTNode) nd;
                    if (node.getType() == HiveParser.TOK_TABLE_OR_COL) {
                        actualColumnNames.add(node.getChild(0).getText());
                    }
                }
                return null;
            }
        });
        walker.startWalking(Collections.singletonList(expression), null);
    }
    for (FieldSchema argument : arguments) {
        TypeInfo colType = TypeInfoUtils.getTypeInfoFromTypeString(argument.getType());
        rowResolver.put("", argument.getName(), new ColumnInfo(argument.getName(), colType, "", false));
        macroColNames.add(argument.getName());
        macroColTypes.add(colType);
    }
    Set<String> expectedColumnNames = new LinkedHashSet<String>(macroColNames);
    if (!expectedColumnNames.equals(actualColumnNames)) {
        throw new SemanticException("Expected columns " + expectedColumnNames + " but found " + actualColumnNames);
    }
    if (expectedColumnNames.size() != macroColNames.size()) {
        throw new SemanticException("At least one parameter name was used more than once " + macroColNames);
    }
    SemanticAnalyzer sa = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED) ? new CalcitePlanner(queryState) : new SemanticAnalyzer(queryState);
    ;
    ExprNodeDesc body;
    if (isNoArgumentMacro) {
        body = sa.genExprNodeDesc((ASTNode) ast.getChild(1), rowResolver);
    } else {
        body = sa.genExprNodeDesc((ASTNode) ast.getChild(2), rowResolver);
    }
    CreateMacroDesc desc = new CreateMacroDesc(functionName, macroColNames, macroColTypes, body);
    rootTasks.add(TaskFactory.get(new FunctionWork(desc), conf));
    addEntities();
}
Also used : LinkedHashSet(java.util.LinkedHashSet) CreateMacroDesc(org.apache.hadoop.hive.ql.plan.CreateMacroDesc) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) FunctionWork(org.apache.hadoop.hive.ql.plan.FunctionWork) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PreOrderWalker(org.apache.hadoop.hive.ql.lib.PreOrderWalker)

Example 4 with PreOrderWalker

use of org.apache.hadoop.hive.ql.lib.PreOrderWalker in project hive by apache.

the class BucketingSortingInferenceOptimizer method inferBucketingSorting.

/**
   * For each map reduce task, if it has a reducer, infer whether or not the final output of the
   * reducer is bucketed and/or sorted
   *
   * @param mapRedTasks
   * @throws SemanticException
   */
private void inferBucketingSorting(List<ExecDriver> mapRedTasks) throws SemanticException {
    for (ExecDriver mapRedTask : mapRedTasks) {
        // of the outputs of intermediate map reduce jobs.
        if (!mapRedTask.getWork().isFinalMapRed()) {
            continue;
        }
        if (mapRedTask.getWork().getReduceWork() == null) {
            continue;
        }
        Operator<? extends OperatorDesc> reducer = mapRedTask.getWork().getReduceWork().getReducer();
        // uses sampling, which means it's not bucketed
        boolean disableBucketing = mapRedTask.getWork().getMapWork().getSamplingType() > 0;
        BucketingSortingCtx bCtx = new BucketingSortingCtx(disableBucketing);
        // RuleRegExp rules are used to match operators anywhere in the tree
        // RuleExactMatch rules are used to specify exactly what the tree should look like
        // In particular, this guarantees that the first operator is the reducer
        // (and its parent(s) are ReduceSinkOperators) since it begins walking the tree from
        // the reducer.
        Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
        opRules.put(new RuleRegExp("R1", SelectOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getSelProc());
        // Matches only GroupByOperators which are reducers, rather than map group by operators,
        // or multi group by optimization specific operators
        opRules.put(new RuleExactMatch("R2", new String[] { GroupByOperator.getOperatorName() }), BucketingSortingOpProcFactory.getGroupByProc());
        // Matches only JoinOperators which are reducers, rather than map joins, SMB map joins, etc.
        opRules.put(new RuleExactMatch("R3", new String[] { JoinOperator.getOperatorName() }), BucketingSortingOpProcFactory.getJoinProc());
        opRules.put(new RuleRegExp("R5", FileSinkOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getFileSinkProc());
        opRules.put(new RuleRegExp("R7", FilterOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getFilterProc());
        opRules.put(new RuleRegExp("R8", LimitOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getLimitProc());
        opRules.put(new RuleRegExp("R9", LateralViewForwardOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getLateralViewForwardProc());
        opRules.put(new RuleRegExp("R10", LateralViewJoinOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getLateralViewJoinProc());
        // Matches only ForwardOperators which are preceded by some other operator in the tree,
        // in particular it can't be a reducer (and hence cannot be one of the ForwardOperators
        // added by the multi group by optimization)
        opRules.put(new RuleRegExp("R11", ".+" + ForwardOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getForwardProc());
        // Matches only ForwardOperators which are reducers and are followed by GroupByOperators
        // (specific to the multi group by optimization)
        opRules.put(new RuleExactMatch("R12", new String[] { ForwardOperator.getOperatorName(), GroupByOperator.getOperatorName() }), BucketingSortingOpProcFactory.getMultiGroupByProc());
        // The dispatcher fires the processor corresponding to the closest matching rule and passes
        // the context along
        Dispatcher disp = new DefaultRuleDispatcher(BucketingSortingOpProcFactory.getDefaultProc(), opRules, bCtx);
        GraphWalker ogw = new PreOrderWalker(disp);
        // Create a list of topop nodes
        ArrayList<Node> topNodes = new ArrayList<Node>();
        topNodes.add(reducer);
        ogw.startWalking(topNodes, null);
        mapRedTask.getWork().getMapWork().getBucketedColsByDirectory().putAll(bCtx.getBucketedColsByDirectory());
        mapRedTask.getWork().getMapWork().getSortedColsByDirectory().putAll(bCtx.getSortedColsByDirectory());
    }
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) RuleExactMatch(org.apache.hadoop.hive.ql.lib.RuleExactMatch) ExecDriver(org.apache.hadoop.hive.ql.exec.mr.ExecDriver) Rule(org.apache.hadoop.hive.ql.lib.Rule) PreOrderWalker(org.apache.hadoop.hive.ql.lib.PreOrderWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Aggregations

ArrayList (java.util.ArrayList)4 Dispatcher (org.apache.hadoop.hive.ql.lib.Dispatcher)4 Node (org.apache.hadoop.hive.ql.lib.Node)4 PreOrderWalker (org.apache.hadoop.hive.ql.lib.PreOrderWalker)4 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)3 GraphWalker (org.apache.hadoop.hive.ql.lib.GraphWalker)3 LinkedHashMap (java.util.LinkedHashMap)2 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)2 NodeProcessor (org.apache.hadoop.hive.ql.lib.NodeProcessor)2 Rule (org.apache.hadoop.hive.ql.lib.Rule)2 RuleRegExp (org.apache.hadoop.hive.ql.lib.RuleRegExp)2 HashSet (java.util.HashSet)1 LinkedHashSet (java.util.LinkedHashSet)1 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)1 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)1 ExecDriver (org.apache.hadoop.hive.ql.exec.mr.ExecDriver)1 RuleExactMatch (org.apache.hadoop.hive.ql.lib.RuleExactMatch)1 TypeRule (org.apache.hadoop.hive.ql.lib.TypeRule)1 SetSparkReducerParallelism (org.apache.hadoop.hive.ql.optimizer.spark.SetSparkReducerParallelism)1 ParseContext (org.apache.hadoop.hive.ql.parse.ParseContext)1