use of org.apache.hadoop.hive.ql.hooks.LineageInfo in project atlas by apache.
the class CreateHiveProcess method processColumnLineage.
private void processColumnLineage(AtlasEntity hiveProcess, AtlasEntitiesWithExtInfo entities) {
LineageInfo lineageInfo = getHiveContext().getLinfo();
if (lineageInfo == null || CollectionUtils.isEmpty(lineageInfo.entrySet())) {
return;
}
for (Map.Entry<DependencyKey, Dependency> entry : lineageInfo.entrySet()) {
String outputColName = getQualifiedName(entry.getKey());
AtlasEntity outputColumn = context.getEntity(outputColName);
if (outputColumn == null) {
LOG.warn("column-lineage: non-existing output-column {}", outputColName);
continue;
}
List<AtlasEntity> inputColumns = new ArrayList<>();
for (BaseColumnInfo baseColumn : entry.getValue().getBaseCols()) {
String inputColName = getQualifiedName(baseColumn);
AtlasEntity inputColumn = context.getEntity(inputColName);
if (inputColumn == null) {
LOG.warn("column-lineage: non-existing input-column {} for output-column={}", inputColName, outputColName);
continue;
}
inputColumns.add(inputColumn);
}
if (inputColumns.isEmpty()) {
continue;
}
AtlasEntity columnLineageProcess = new AtlasEntity(HIVE_TYPE_COLUMN_LINEAGE);
columnLineageProcess.setAttribute(ATTRIBUTE_NAME, hiveProcess.getAttribute(ATTRIBUTE_NAME) + ":" + outputColumn.getAttribute(ATTRIBUTE_NAME));
columnLineageProcess.setAttribute(ATTRIBUTE_QUALIFIED_NAME, hiveProcess.getAttribute(ATTRIBUTE_QUALIFIED_NAME) + ":" + outputColumn.getAttribute(ATTRIBUTE_NAME));
columnLineageProcess.setAttribute(ATTRIBUTE_INPUTS, getObjectIds(inputColumns));
columnLineageProcess.setAttribute(ATTRIBUTE_OUTPUTS, Collections.singletonList(getObjectId(outputColumn)));
columnLineageProcess.setAttribute(ATTRIBUTE_QUERY, getObjectId(hiveProcess));
columnLineageProcess.setAttribute(ATTRIBUTE_DEPENDENCY_TYPE, entry.getValue().getType());
columnLineageProcess.setAttribute(ATTRIBUTE_EXPRESSION, entry.getValue().getExpr());
entities.addEntity(columnLineageProcess);
}
}
Aggregations