Search in sources :

Example 1 with Index

use of org.apache.hadoop.hive.ql.optimizer.lineage.LineageCtx.Index in project hive by apache.

the class LineageLogger method run.

@Override
public void run(HookContext hookContext) {
    assert (hookContext.getHookType() == HookType.POST_EXEC_HOOK);
    QueryPlan plan = hookContext.getQueryPlan();
    Index index = hookContext.getIndex();
    SessionState ss = SessionState.get();
    if (ss != null && index != null && OPERATION_NAMES.contains(plan.getOperationName()) && !plan.isExplain()) {
        try {
            StringBuilderWriter out = new StringBuilderWriter(1024);
            JsonWriter writer = new JsonWriter(out);
            String queryStr = plan.getQueryStr().trim();
            writer.beginObject();
            writer.name("version").value(FORMAT_VERSION);
            HiveConf conf = ss.getConf();
            boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVE_IN_TEST);
            if (!testMode) {
                // Don't emit user/timestamp info in test mode,
                // so that the test golden output file is fixed.
                long queryTime = plan.getQueryStartTime().longValue();
                if (queryTime == 0)
                    queryTime = System.currentTimeMillis();
                long duration = System.currentTimeMillis() - queryTime;
                writer.name("user").value(hookContext.getUgi().getUserName());
                writer.name("timestamp").value(queryTime / 1000);
                writer.name("duration").value(duration);
                writer.name("jobIds");
                writer.beginArray();
                List<TaskRunner> tasks = hookContext.getCompleteTaskList();
                if (tasks != null && !tasks.isEmpty()) {
                    for (TaskRunner task : tasks) {
                        String jobId = task.getTask().getJobID();
                        if (jobId != null) {
                            writer.value(jobId);
                        }
                    }
                }
                writer.endArray();
            }
            writer.name("engine").value(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE));
            writer.name("database").value(ss.getCurrentDatabase());
            writer.name("hash").value(getQueryHash(queryStr));
            writer.name("queryText").value(queryStr);
            List<Edge> edges = getEdges(plan, index);
            Set<Vertex> vertices = getVertices(edges);
            writeEdges(writer, edges, hookContext.getConf());
            writeVertices(writer, vertices);
            writer.endObject();
            writer.close();
            // Logger the lineage info
            String lineage = out.toString();
            if (testMode) {
                // Logger to console
                log(lineage);
            } else {
                // In non-test mode, emit to a log file,
                // which can be different from the normal hive.log.
                // For example, using NoDeleteRollingFileAppender to
                // log to some file with different rolling policy.
                LOG.info(lineage);
            }
        } catch (Throwable t) {
            // Don't fail the query just because of any lineage issue.
            log("Failed to log lineage graph, query is not affected\n" + org.apache.hadoop.util.StringUtils.stringifyException(t));
        }
    }
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) StringBuilderWriter(org.apache.commons.io.output.StringBuilderWriter) Index(org.apache.hadoop.hive.ql.optimizer.lineage.LineageCtx.Index) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) JsonWriter(com.google.gson.stream.JsonWriter) TaskRunner(org.apache.hadoop.hive.ql.exec.TaskRunner) HiveConf(org.apache.hadoop.hive.conf.HiveConf)

Example 2 with Index

use of org.apache.hadoop.hive.ql.optimizer.lineage.LineageCtx.Index in project hive by apache.

the class Generator method transform.

/* (non-Javadoc)
   * @see org.apache.hadoop.hive.ql.optimizer.Transform#transform(org.apache.hadoop.hive.ql.parse.ParseContext)
   */
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
    if (hooks != null && hooks.contains(ATLAS_HOOK_CLASSNAME)) {
        // Atlas would be interested in lineage information for insert,load,create etc.
        if (!pctx.getQueryProperties().isCTAS() && !pctx.getQueryProperties().isMaterializedView() && pctx.getQueryProperties().isQuery() && pctx.getCreateTable() == null && pctx.getCreateViewDesc() == null && (pctx.getLoadTableWork() == null || pctx.getLoadTableWork().isEmpty())) {
            LOG.debug("Not evaluating lineage");
            return pctx;
        }
    }
    Index index = pctx.getQueryState().getLineageState().getIndex();
    if (index == null) {
        index = new Index();
    }
    long sTime = System.currentTimeMillis();
    // Create the lineage context
    LineageCtx lCtx = new LineageCtx(pctx, index);
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%"), OpProcFactory.getTSProc());
    opRules.put(new RuleRegExp("R2", ScriptOperator.getOperatorName() + "%"), OpProcFactory.getTransformProc());
    opRules.put(new RuleRegExp("R3", UDTFOperator.getOperatorName() + "%"), OpProcFactory.getTransformProc());
    opRules.put(new RuleRegExp("R4", SelectOperator.getOperatorName() + "%"), OpProcFactory.getSelProc());
    opRules.put(new RuleRegExp("R5", GroupByOperator.getOperatorName() + "%"), OpProcFactory.getGroupByProc());
    opRules.put(new RuleRegExp("R6", UnionOperator.getOperatorName() + "%"), OpProcFactory.getUnionProc());
    opRules.put(new RuleRegExp("R7", CommonJoinOperator.getOperatorName() + "%|" + MapJoinOperator.getOperatorName() + "%"), OpProcFactory.getJoinProc());
    opRules.put(new RuleRegExp("R8", ReduceSinkOperator.getOperatorName() + "%"), OpProcFactory.getReduceSinkProc());
    opRules.put(new RuleRegExp("R9", LateralViewJoinOperator.getOperatorName() + "%"), OpProcFactory.getLateralViewJoinProc());
    opRules.put(new RuleRegExp("R10", PTFOperator.getOperatorName() + "%"), OpProcFactory.getTransformProc());
    opRules.put(new RuleRegExp("R11", FilterOperator.getOperatorName() + "%"), OpProcFactory.getFilterProc());
    // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(), opRules, lCtx);
    SemanticGraphWalker ogw = new LevelOrderWalker(disp, 2);
    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getTopOps().values());
    ogw.startWalking(topNodes, null);
    LOG.debug("Time taken for lineage transform={}", (System.currentTimeMillis() - sTime));
    return pctx;
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) Index(org.apache.hadoop.hive.ql.optimizer.lineage.LineageCtx.Index) LinkedHashMap(java.util.LinkedHashMap) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) LevelOrderWalker(org.apache.hadoop.hive.ql.lib.LevelOrderWalker)

Aggregations

Index (org.apache.hadoop.hive.ql.optimizer.lineage.LineageCtx.Index)2 JsonWriter (com.google.gson.stream.JsonWriter)1 ArrayList (java.util.ArrayList)1 LinkedHashMap (java.util.LinkedHashMap)1 StringBuilderWriter (org.apache.commons.io.output.StringBuilderWriter)1 HiveConf (org.apache.hadoop.hive.conf.HiveConf)1 QueryPlan (org.apache.hadoop.hive.ql.QueryPlan)1 TaskRunner (org.apache.hadoop.hive.ql.exec.TaskRunner)1 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)1 LevelOrderWalker (org.apache.hadoop.hive.ql.lib.LevelOrderWalker)1 Node (org.apache.hadoop.hive.ql.lib.Node)1 RuleRegExp (org.apache.hadoop.hive.ql.lib.RuleRegExp)1 SemanticDispatcher (org.apache.hadoop.hive.ql.lib.SemanticDispatcher)1 SemanticGraphWalker (org.apache.hadoop.hive.ql.lib.SemanticGraphWalker)1 SemanticNodeProcessor (org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)1 SemanticRule (org.apache.hadoop.hive.ql.lib.SemanticRule)1 SessionState (org.apache.hadoop.hive.ql.session.SessionState)1