use of org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo in project atlas by apache.
the class CreateHiveProcess method processColumnLineage.
private void processColumnLineage(AtlasEntity hiveProcess, AtlasEntitiesWithExtInfo entities) {
LineageInfo lineageInfo = getHiveContext().getLinfo();
if (lineageInfo == null || CollectionUtils.isEmpty(lineageInfo.entrySet())) {
return;
}
for (Map.Entry<DependencyKey, Dependency> entry : lineageInfo.entrySet()) {
String outputColName = getQualifiedName(entry.getKey());
AtlasEntity outputColumn = context.getEntity(outputColName);
if (outputColumn == null) {
LOG.warn("column-lineage: non-existing output-column {}", outputColName);
continue;
}
List<AtlasEntity> inputColumns = new ArrayList<>();
for (BaseColumnInfo baseColumn : entry.getValue().getBaseCols()) {
String inputColName = getQualifiedName(baseColumn);
AtlasEntity inputColumn = context.getEntity(inputColName);
if (inputColumn == null) {
LOG.warn("column-lineage: non-existing input-column {} for output-column={}", inputColName, outputColName);
continue;
}
inputColumns.add(inputColumn);
}
if (inputColumns.isEmpty()) {
continue;
}
AtlasEntity columnLineageProcess = new AtlasEntity(HIVE_TYPE_COLUMN_LINEAGE);
columnLineageProcess.setAttribute(ATTRIBUTE_NAME, hiveProcess.getAttribute(ATTRIBUTE_NAME) + ":" + outputColumn.getAttribute(ATTRIBUTE_NAME));
columnLineageProcess.setAttribute(ATTRIBUTE_QUALIFIED_NAME, hiveProcess.getAttribute(ATTRIBUTE_QUALIFIED_NAME) + ":" + outputColumn.getAttribute(ATTRIBUTE_NAME));
columnLineageProcess.setAttribute(ATTRIBUTE_INPUTS, getObjectIds(inputColumns));
columnLineageProcess.setAttribute(ATTRIBUTE_OUTPUTS, Collections.singletonList(getObjectId(outputColumn)));
columnLineageProcess.setAttribute(ATTRIBUTE_QUERY, getObjectId(hiveProcess));
columnLineageProcess.setAttribute(ATTRIBUTE_DEPENDENCY_TYPE, entry.getValue().getType());
columnLineageProcess.setAttribute(ATTRIBUTE_EXPRESSION, entry.getValue().getExpr());
entities.addEntity(columnLineageProcess);
}
}
use of org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo in project hive by apache.
the class PostExecutePrinter method run.
public void run(QueryState queryState, Set<ReadEntity> inputs, Set<WriteEntity> outputs, LineageInfo linfo, UserGroupInformation ugi) throws Exception {
LogHelper console = SessionState.getConsole();
if (console == null) {
return;
}
if (queryState != null) {
console.printInfo("POSTHOOK: query: " + queryState.getQueryString().trim(), false);
console.printInfo("POSTHOOK: type: " + queryState.getCommandType(), false);
}
PreExecutePrinter.printEntities(console, inputs, "POSTHOOK: Input: ");
PreExecutePrinter.printEntities(console, outputs, "POSTHOOK: Output: ");
// Also print out the generic lineage information if there is any
if (linfo != null) {
LinkedList<Map.Entry<DependencyKey, Dependency>> entry_list = new LinkedList<Map.Entry<DependencyKey, Dependency>>(linfo.entrySet());
Collections.sort(entry_list, new DependencyKeyComp());
Iterator<Map.Entry<DependencyKey, Dependency>> iter = entry_list.iterator();
while (iter.hasNext()) {
Map.Entry<DependencyKey, Dependency> it = iter.next();
Dependency dep = it.getValue();
DependencyKey depK = it.getKey();
if (dep == null) {
continue;
}
StringBuilder sb = new StringBuilder();
sb.append("POSTHOOK: Lineage: ");
if (depK.getDataContainer().isPartition()) {
Partition part = depK.getDataContainer().getPartition();
sb.append(part.getTableName());
sb.append(" PARTITION(");
int i = 0;
for (FieldSchema fs : depK.getDataContainer().getTable().getPartitionKeys()) {
if (i != 0) {
sb.append(",");
}
sb.append(fs.getName() + "=" + part.getValues().get(i++));
}
sb.append(")");
} else {
sb.append(depK.getDataContainer().getTable().getTableName());
}
sb.append("." + depK.getFieldSchema().getName() + " " + dep.getType() + " ");
sb.append("[");
for (BaseColumnInfo col : dep.getBaseCols()) {
sb.append("(" + col.getTabAlias().getTable().getTableName() + ")" + col.getTabAlias().getAlias() + "." + col.getColumn() + ", ");
}
sb.append("]");
console.printInfo(sb.toString(), false);
}
}
}
use of org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo in project hive by apache.
the class LineageLogger method createSourceVertices.
/**
* Convert a list of columns to a set of vertices.
* Use cached vertices if possible.
*/
private static Set<Vertex> createSourceVertices(Map<String, Vertex> vertexCache, Collection<BaseColumnInfo> baseCols) {
Set<Vertex> sources = new LinkedHashSet<Vertex>();
if (baseCols != null && !baseCols.isEmpty()) {
for (BaseColumnInfo col : baseCols) {
Table table = col.getTabAlias().getTable();
if (table.isTemporary()) {
// Ignore temporary tables
continue;
}
Vertex.Type type = Vertex.Type.TABLE;
String tableName = Warehouse.getQualifiedName(table);
FieldSchema fieldSchema = col.getColumn();
String label = tableName;
if (fieldSchema != null) {
type = Vertex.Type.COLUMN;
label = tableName + "." + fieldSchema.getName();
}
sources.add(getOrCreateVertex(vertexCache, label, type));
}
}
return sources;
}
use of org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo in project hive by apache.
the class ExprProcFactory method getExprString.
/**
* Get the expression string of an expression node.
*/
public static String getExprString(RowSchema rs, ExprNodeDesc expr, LineageCtx lctx, Operator<? extends OperatorDesc> inpOp, Predicate cond) {
if (expr instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc col = (ExprNodeColumnDesc) expr;
String internalName = col.getColumn();
String alias = internalName;
String tabAlias = col.getTabAlias();
ColumnInfo ci = rs.getColumnInfo(internalName);
if (ci != null) {
if (ci.getAlias() != null) {
alias = ci.getAlias();
}
if (ci.getTabAlias() != null) {
tabAlias = ci.getTabAlias();
}
}
Dependency dep = lctx.getIndex().getDependency(inpOp, internalName);
if ((tabAlias == null || tabAlias.startsWith("_") || tabAlias.startsWith("$")) && (dep != null && dep.getType() == DependencyType.SIMPLE)) {
Set<BaseColumnInfo> baseCols = dep.getBaseCols();
if (baseCols != null && !baseCols.isEmpty()) {
BaseColumnInfo baseCol = baseCols.iterator().next();
tabAlias = baseCol.getTabAlias().getAlias();
alias = baseCol.getColumn().getName();
}
}
if (tabAlias != null && tabAlias.length() > 0 && !tabAlias.startsWith("_") && !tabAlias.startsWith("$")) {
if (cond != null && !findSourceColumn(lctx, cond, tabAlias, alias) && dep != null) {
cond.getBaseCols().addAll(dep.getBaseCols());
}
return tabAlias + "." + alias;
}
if (dep != null) {
if (cond != null) {
cond.getBaseCols().addAll(dep.getBaseCols());
}
if (dep.getExpr() != null) {
return dep.getExpr();
}
}
if (alias.startsWith("_")) {
ci = inpOp.getSchema().getColumnInfo(internalName);
if (ci != null && ci.getAlias() != null) {
alias = ci.getAlias();
}
}
return alias;
} else if (expr instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc func = (ExprNodeGenericFuncDesc) expr;
List<ExprNodeDesc> children = func.getChildren();
String[] childrenExprStrings = new String[children.size()];
for (int i = 0; i < childrenExprStrings.length; i++) {
childrenExprStrings[i] = getExprString(rs, children.get(i), lctx, inpOp, cond);
}
return func.getGenericUDF().getDisplayString(childrenExprStrings);
}
return expr.getExprString();
}
use of org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo in project hive by apache.
the class ExprProcFactory method findSourceColumn.
private static boolean findSourceColumn(LineageCtx lctx, Predicate cond, String tabAlias, String alias) {
for (Map.Entry<String, TableScanOperator> topOpMap : lctx.getParseCtx().getTopOps().entrySet()) {
TableScanOperator tableScanOp = topOpMap.getValue();
Table tbl = tableScanOp.getConf().getTableMetadata();
if (tbl.getTableName().equals(tabAlias) || tabAlias.equals(tableScanOp.getConf().getAlias())) {
for (FieldSchema column : tbl.getCols()) {
if (column.getName().equals(alias)) {
TableAliasInfo table = new TableAliasInfo();
table.setTable(tbl.getTTable());
table.setAlias(tabAlias);
BaseColumnInfo colInfo = new BaseColumnInfo();
colInfo.setColumn(column);
colInfo.setTabAlias(table);
cond.getBaseCols().add(colInfo);
return true;
}
}
}
}
return false;
}
Aggregations