Search in sources :

Example 41 with SemanticNodeProcessor

use of org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor in project hive by apache.

the class Generator method transform.

/* (non-Javadoc)
   * @see org.apache.hadoop.hive.ql.optimizer.Transform#transform(org.apache.hadoop.hive.ql.parse.ParseContext)
   */
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
    if (hooks != null && hooks.contains(ATLAS_HOOK_CLASSNAME)) {
        // Atlas would be interested in lineage information for insert,load,create etc.
        if (!pctx.getQueryProperties().isCTAS() && !pctx.getQueryProperties().isMaterializedView() && pctx.getQueryProperties().isQuery() && pctx.getCreateTable() == null && pctx.getCreateViewDesc() == null && (pctx.getLoadTableWork() == null || pctx.getLoadTableWork().isEmpty())) {
            LOG.debug("Not evaluating lineage");
            return pctx;
        }
    }
    Index index = pctx.getQueryState().getLineageState().getIndex();
    if (index == null) {
        index = new Index();
    }
    long sTime = System.currentTimeMillis();
    // Create the lineage context
    LineageCtx lCtx = new LineageCtx(pctx, index);
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%"), OpProcFactory.getTSProc());
    opRules.put(new RuleRegExp("R2", ScriptOperator.getOperatorName() + "%"), OpProcFactory.getTransformProc());
    opRules.put(new RuleRegExp("R3", UDTFOperator.getOperatorName() + "%"), OpProcFactory.getTransformProc());
    opRules.put(new RuleRegExp("R4", SelectOperator.getOperatorName() + "%"), OpProcFactory.getSelProc());
    opRules.put(new RuleRegExp("R5", GroupByOperator.getOperatorName() + "%"), OpProcFactory.getGroupByProc());
    opRules.put(new RuleRegExp("R6", UnionOperator.getOperatorName() + "%"), OpProcFactory.getUnionProc());
    opRules.put(new RuleRegExp("R7", CommonJoinOperator.getOperatorName() + "%|" + MapJoinOperator.getOperatorName() + "%"), OpProcFactory.getJoinProc());
    opRules.put(new RuleRegExp("R8", ReduceSinkOperator.getOperatorName() + "%"), OpProcFactory.getReduceSinkProc());
    opRules.put(new RuleRegExp("R9", LateralViewJoinOperator.getOperatorName() + "%"), OpProcFactory.getLateralViewJoinProc());
    opRules.put(new RuleRegExp("R10", PTFOperator.getOperatorName() + "%"), OpProcFactory.getTransformProc());
    opRules.put(new RuleRegExp("R11", FilterOperator.getOperatorName() + "%"), OpProcFactory.getFilterProc());
    // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(), opRules, lCtx);
    SemanticGraphWalker ogw = new LevelOrderWalker(disp, 2);
    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getTopOps().values());
    ogw.startWalking(topNodes, null);
    LOG.debug("Time taken for lineage transform={}", (System.currentTimeMillis() - sTime));
    return pctx;
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) Index(org.apache.hadoop.hive.ql.optimizer.lineage.LineageCtx.Index) LinkedHashMap(java.util.LinkedHashMap) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) LevelOrderWalker(org.apache.hadoop.hive.ql.lib.LevelOrderWalker)

Example 42 with SemanticNodeProcessor

use of org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor in project hive by apache.

the class SparkCompiler method generateTaskTreeHelper.

private void generateTaskTreeHelper(GenSparkProcContext procCtx, List<Node> topNodes) throws SemanticException {
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack. The dispatcher generates the plan from the operator tree
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    GenSparkWork genSparkWork = new GenSparkWork(GenSparkUtils.getUtils());
    opRules.put(new RuleRegExp("Split Work - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), genSparkWork);
    opRules.put(new RuleRegExp("Split Work - SparkPartitionPruningSink", SparkPartitionPruningSinkOperator.getOperatorName() + "%"), genSparkWork);
    opRules.put(new TypeRule(MapJoinOperator.class), new SparkReduceSinkMapJoinProc());
    opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"), new CompositeProcessor(new SparkFileSinkProcessor(), genSparkWork));
    opRules.put(new RuleRegExp("Handle Analyze Command", TableScanOperator.getOperatorName() + "%"), new SparkProcessAnalyzeTable(GenSparkUtils.getUtils()));
    opRules.put(new RuleRegExp("Remember union", UnionOperator.getOperatorName() + "%"), new SemanticNodeProcessor() {

        @Override
        public Object process(Node n, Stack<Node> s, NodeProcessorCtx procCtx, Object... os) throws SemanticException {
            GenSparkProcContext context = (GenSparkProcContext) procCtx;
            UnionOperator union = (UnionOperator) n;
            // simply need to remember that we've seen a union.
            context.currentUnionOperators.add(union);
            return null;
        }
    });
    /**
     *  SMB join case:   (Big)   (Small)  (Small)
     *                     TS       TS       TS
     *                      \       |       /
     *                       \      DS     DS
     *                         \   |    /
     *                         SMBJoinOP
     *
     * Some of the other processors are expecting only one traversal beyond SMBJoinOp.
     * We need to traverse from the big-table path only, and stop traversing on the
     * small-table path once we reach SMBJoinOp.
     * Also add some SMB join information to the context, so we can properly annotate
     * the MapWork later on.
     */
    opRules.put(new TypeRule(SMBMapJoinOperator.class), new SemanticNodeProcessor() {

        @Override
        public Object process(Node currNode, Stack<Node> stack, NodeProcessorCtx procCtx, Object... os) throws SemanticException {
            GenSparkProcContext context = (GenSparkProcContext) procCtx;
            SMBMapJoinOperator currSmbNode = (SMBMapJoinOperator) currNode;
            SparkSMBMapJoinInfo smbMapJoinCtx = context.smbMapJoinCtxMap.get(currSmbNode);
            if (smbMapJoinCtx == null) {
                smbMapJoinCtx = new SparkSMBMapJoinInfo();
                context.smbMapJoinCtxMap.put(currSmbNode, smbMapJoinCtx);
            }
            for (Node stackNode : stack) {
                if (stackNode instanceof DummyStoreOperator) {
                    // If coming from small-table side, do some book-keeping, and skip traversal.
                    smbMapJoinCtx.smallTableRootOps.add(context.currentRootOperator);
                    return true;
                }
            }
            // If coming from big-table side, do some book-keeping, and continue traversal
            smbMapJoinCtx.bigTableRootOp = context.currentRootOperator;
            return false;
        }
    });
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    SemanticGraphWalker ogw = new GenSparkWorkWalker(disp, procCtx);
    ogw.startWalking(topNodes, null);
}
Also used : Node(org.apache.hadoop.hive.ql.lib.Node) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) LinkedHashMap(java.util.LinkedHashMap) NodeProcessorCtx(org.apache.hadoop.hive.ql.lib.NodeProcessorCtx) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) CompositeProcessor(org.apache.hadoop.hive.ql.lib.CompositeProcessor) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) SparkReduceSinkMapJoinProc(org.apache.hadoop.hive.ql.optimizer.spark.SparkReduceSinkMapJoinProc) TypeRule(org.apache.hadoop.hive.ql.lib.TypeRule)

Example 43 with SemanticNodeProcessor

use of org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor in project hive by apache.

the class IndexPredicateAnalyzer method analyzePredicate.

/**
 * Analyzes a predicate.
 *
 * @param predicate predicate to be analyzed
 *
 * @param searchConditions receives conditions produced by analysis
 *
 * @return residual predicate which could not be translated to
 * searchConditions
 */
public ExprNodeDesc analyzePredicate(ExprNodeDesc predicate, final List<IndexSearchCondition> searchConditions) {
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    SemanticNodeProcessor nodeProcessor = new SemanticNodeProcessor() {

        @Override
        public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
            // a pure conjunction:  reject OR, CASE, etc.
            for (Node ancestor : stack) {
                if (nd == ancestor) {
                    break;
                }
                if (!FunctionRegistry.isOpAnd((ExprNodeDesc) ancestor)) {
                    return nd;
                }
            }
            if (nd instanceof ExprNodeGenericFuncDesc) {
                return analyzeExpr((ExprNodeGenericFuncDesc) nd, searchConditions, nodeOutputs);
            } else {
                return nd;
            }
        }
    };
    SemanticDispatcher disp = new DefaultRuleDispatcher(nodeProcessor, opRules, null);
    SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.add(predicate);
    HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
    try {
        ogw.startWalking(topNodes, nodeOutput);
    } catch (SemanticException ex) {
        throw new RuntimeException(ex);
    }
    ExprNodeDesc residualPredicate = (ExprNodeDesc) nodeOutput.get(predicate);
    return residualPredicate;
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Node(org.apache.hadoop.hive.ql.lib.Node) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) LinkedHashMap(java.util.LinkedHashMap) Stack(java.util.Stack) NodeProcessorCtx(org.apache.hadoop.hive.ql.lib.NodeProcessorCtx) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 44 with SemanticNodeProcessor

use of org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor in project hive by apache.

the class ExprWalkerProcFactory method extractPushdownPreds.

/**
 * Extracts pushdown predicates from the given list of predicate expression.
 *
 * @param opContext
 *          operator context used for resolving column references
 * @param op
 *          operator of the predicates being processed
 * @param preds
 * @return The expression walker information
 * @throws SemanticException
 */
public static ExprWalkerInfo extractPushdownPreds(OpWalkerInfo opContext, Operator<? extends OperatorDesc> op, List<ExprNodeDesc> preds) throws SemanticException {
    // Create the walker, the rules dispatcher and the context.
    ExprWalkerInfo exprContext = new ExprWalkerInfo(op);
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack. The dispatcher
    // generates the plan from the operator tree
    Map<SemanticRule, SemanticNodeProcessor> exprRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    exprRules.put(new TypeRule(ExprNodeColumnDesc.class), getColumnProcessor());
    exprRules.put(new TypeRule(ExprNodeFieldDesc.class), getFieldProcessor());
    exprRules.put(new TypeRule(ExprNodeGenericFuncDesc.class), getGenericFuncProcessor());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(), exprRules, exprContext);
    SemanticGraphWalker egw = new ExpressionWalker(disp);
    List<Node> startNodes = new ArrayList<Node>();
    List<ExprNodeDesc> clonedPreds = new ArrayList<ExprNodeDesc>();
    for (ExprNodeDesc node : preds) {
        ExprNodeDesc clone = node.clone();
        clonedPreds.add(clone);
        exprContext.getNewToOldExprMap().put(clone, node);
    }
    startNodes.addAll(clonedPreds);
    egw.startWalking(startNodes, null);
    HiveConf conf = opContext.getParseContext().getConf();
    // check the root expression for final candidates
    for (ExprNodeDesc pred : clonedPreds) {
        extractFinalCandidates(pred, exprContext, conf);
    }
    return exprContext;
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) LinkedHashMap(java.util.LinkedHashMap) ExpressionWalker(org.apache.hadoop.hive.ql.lib.ExpressionWalker) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) TypeRule(org.apache.hadoop.hive.ql.lib.TypeRule)

Example 45 with SemanticNodeProcessor

use of org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor in project hive by apache.

the class BucketVersionPopulator method findOpGroups.

private Set<OpGroup> findOpGroups(ParseContext pctx) throws SemanticException {
    BucketVersionProcessorCtx ctx = new BucketVersionProcessorCtx();
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    SemanticDispatcher disp = new DefaultRuleDispatcher(new IdentifyBucketGroups(), opRules, ctx);
    SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getTopOps().values());
    ogw.startWalking(topNodes, null);
    return ctx.groups;
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) LinkedHashMap(java.util.LinkedHashMap) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)

Aggregations

SemanticNodeProcessor (org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)68 LinkedHashMap (java.util.LinkedHashMap)66 Node (org.apache.hadoop.hive.ql.lib.Node)66 SemanticGraphWalker (org.apache.hadoop.hive.ql.lib.SemanticGraphWalker)66 SemanticRule (org.apache.hadoop.hive.ql.lib.SemanticRule)66 SemanticDispatcher (org.apache.hadoop.hive.ql.lib.SemanticDispatcher)65 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)63 ArrayList (java.util.ArrayList)62 RuleRegExp (org.apache.hadoop.hive.ql.lib.RuleRegExp)57 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)36 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)11 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)11 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)9 Operator (org.apache.hadoop.hive.ql.exec.Operator)9 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)8 ForwardWalker (org.apache.hadoop.hive.ql.lib.ForwardWalker)8 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)7 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)7 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)6 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)6