use of org.apache.hadoop.hive.ql.optimizer.lineage.LineageCtx.Index in project hive by apache.
the class LineageLogger method run.
@Override
public void run(HookContext hookContext) {
assert (hookContext.getHookType() == HookType.POST_EXEC_HOOK);
QueryPlan plan = hookContext.getQueryPlan();
Index index = hookContext.getIndex();
SessionState ss = SessionState.get();
if (ss != null && index != null && OPERATION_NAMES.contains(plan.getOperationName()) && !plan.isExplain()) {
try {
StringBuilderWriter out = new StringBuilderWriter(1024);
JsonWriter writer = new JsonWriter(out);
String queryStr = plan.getQueryStr().trim();
writer.beginObject();
writer.name("version").value(FORMAT_VERSION);
HiveConf conf = ss.getConf();
boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVE_IN_TEST);
if (!testMode) {
// Don't emit user/timestamp info in test mode,
// so that the test golden output file is fixed.
long queryTime = plan.getQueryStartTime().longValue();
if (queryTime == 0)
queryTime = System.currentTimeMillis();
long duration = System.currentTimeMillis() - queryTime;
writer.name("user").value(hookContext.getUgi().getUserName());
writer.name("timestamp").value(queryTime / 1000);
writer.name("duration").value(duration);
writer.name("jobIds");
writer.beginArray();
List<TaskRunner> tasks = hookContext.getCompleteTaskList();
if (tasks != null && !tasks.isEmpty()) {
for (TaskRunner task : tasks) {
String jobId = task.getTask().getJobID();
if (jobId != null) {
writer.value(jobId);
}
}
}
writer.endArray();
}
writer.name("engine").value(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE));
writer.name("database").value(ss.getCurrentDatabase());
writer.name("hash").value(getQueryHash(queryStr));
writer.name("queryText").value(queryStr);
List<Edge> edges = getEdges(plan, index);
Set<Vertex> vertices = getVertices(edges);
writeEdges(writer, edges, hookContext.getConf());
writeVertices(writer, vertices);
writer.endObject();
writer.close();
// Logger the lineage info
String lineage = out.toString();
if (testMode) {
// Logger to console
log(lineage);
} else {
// In non-test mode, emit to a log file,
// which can be different from the normal hive.log.
// For example, using NoDeleteRollingFileAppender to
// log to some file with different rolling policy.
LOG.info(lineage);
}
} catch (Throwable t) {
// Don't fail the query just because of any lineage issue.
log("Failed to log lineage graph, query is not affected\n" + org.apache.hadoop.util.StringUtils.stringifyException(t));
}
}
}
use of org.apache.hadoop.hive.ql.optimizer.lineage.LineageCtx.Index in project hive by apache.
the class Generator method transform.
/* (non-Javadoc)
* @see org.apache.hadoop.hive.ql.optimizer.Transform#transform(org.apache.hadoop.hive.ql.parse.ParseContext)
*/
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
if (hooks != null && hooks.contains(ATLAS_HOOK_CLASSNAME)) {
// Atlas would be interested in lineage information for insert,load,create etc.
if (!pctx.getQueryProperties().isCTAS() && !pctx.getQueryProperties().isMaterializedView() && pctx.getQueryProperties().isQuery() && pctx.getCreateTable() == null && pctx.getCreateViewDesc() == null && (pctx.getLoadTableWork() == null || pctx.getLoadTableWork().isEmpty())) {
LOG.debug("Not evaluating lineage");
return pctx;
}
}
Index index = pctx.getQueryState().getLineageState().getIndex();
if (index == null) {
index = new Index();
}
long sTime = System.currentTimeMillis();
// Create the lineage context
LineageCtx lCtx = new LineageCtx(pctx, index);
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%"), OpProcFactory.getTSProc());
opRules.put(new RuleRegExp("R2", ScriptOperator.getOperatorName() + "%"), OpProcFactory.getTransformProc());
opRules.put(new RuleRegExp("R3", UDTFOperator.getOperatorName() + "%"), OpProcFactory.getTransformProc());
opRules.put(new RuleRegExp("R4", SelectOperator.getOperatorName() + "%"), OpProcFactory.getSelProc());
opRules.put(new RuleRegExp("R5", GroupByOperator.getOperatorName() + "%"), OpProcFactory.getGroupByProc());
opRules.put(new RuleRegExp("R6", UnionOperator.getOperatorName() + "%"), OpProcFactory.getUnionProc());
opRules.put(new RuleRegExp("R7", CommonJoinOperator.getOperatorName() + "%|" + MapJoinOperator.getOperatorName() + "%"), OpProcFactory.getJoinProc());
opRules.put(new RuleRegExp("R8", ReduceSinkOperator.getOperatorName() + "%"), OpProcFactory.getReduceSinkProc());
opRules.put(new RuleRegExp("R9", LateralViewJoinOperator.getOperatorName() + "%"), OpProcFactory.getLateralViewJoinProc());
opRules.put(new RuleRegExp("R10", PTFOperator.getOperatorName() + "%"), OpProcFactory.getTransformProc());
opRules.put(new RuleRegExp("R11", FilterOperator.getOperatorName() + "%"), OpProcFactory.getFilterProc());
// The dispatcher fires the processor corresponding to the closest matching rule and passes the context along
SemanticDispatcher disp = new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(), opRules, lCtx);
SemanticGraphWalker ogw = new LevelOrderWalker(disp, 2);
// Create a list of topop nodes
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pctx.getTopOps().values());
ogw.startWalking(topNodes, null);
LOG.debug("Time taken for lineage transform={}", (System.currentTimeMillis() - sTime));
return pctx;
}
Aggregations