Search in sources :

Example 1 with LineageInfo

use of org.apache.hadoop.hive.ql.hooks.LineageInfo in project atlas by apache.

the class CreateHiveProcess method processColumnLineage.

private void processColumnLineage(AtlasEntity hiveProcess, AtlasEntitiesWithExtInfo entities) {
    LineageInfo lineageInfo = getHiveContext().getLinfo();
    if (lineageInfo == null || CollectionUtils.isEmpty(lineageInfo.entrySet())) {
        return;
    }
    for (Map.Entry<DependencyKey, Dependency> entry : lineageInfo.entrySet()) {
        String outputColName = getQualifiedName(entry.getKey());
        AtlasEntity outputColumn = context.getEntity(outputColName);
        if (outputColumn == null) {
            LOG.warn("column-lineage: non-existing output-column {}", outputColName);
            continue;
        }
        List<AtlasEntity> inputColumns = new ArrayList<>();
        for (BaseColumnInfo baseColumn : entry.getValue().getBaseCols()) {
            String inputColName = getQualifiedName(baseColumn);
            AtlasEntity inputColumn = context.getEntity(inputColName);
            if (inputColumn == null) {
                LOG.warn("column-lineage: non-existing input-column {} for output-column={}", inputColName, outputColName);
                continue;
            }
            inputColumns.add(inputColumn);
        }
        if (inputColumns.isEmpty()) {
            continue;
        }
        AtlasEntity columnLineageProcess = new AtlasEntity(HIVE_TYPE_COLUMN_LINEAGE);
        columnLineageProcess.setAttribute(ATTRIBUTE_NAME, hiveProcess.getAttribute(ATTRIBUTE_NAME) + ":" + outputColumn.getAttribute(ATTRIBUTE_NAME));
        columnLineageProcess.setAttribute(ATTRIBUTE_QUALIFIED_NAME, hiveProcess.getAttribute(ATTRIBUTE_QUALIFIED_NAME) + ":" + outputColumn.getAttribute(ATTRIBUTE_NAME));
        columnLineageProcess.setAttribute(ATTRIBUTE_INPUTS, getObjectIds(inputColumns));
        columnLineageProcess.setAttribute(ATTRIBUTE_OUTPUTS, Collections.singletonList(getObjectId(outputColumn)));
        columnLineageProcess.setAttribute(ATTRIBUTE_QUERY, getObjectId(hiveProcess));
        columnLineageProcess.setAttribute(ATTRIBUTE_DEPENDENCY_TYPE, entry.getValue().getType());
        columnLineageProcess.setAttribute(ATTRIBUTE_EXPRESSION, entry.getValue().getExpr());
        entities.addEntity(columnLineageProcess);
    }
}
Also used : AtlasEntity(org.apache.atlas.model.instance.AtlasEntity) ArrayList(java.util.ArrayList) DependencyKey(org.apache.hadoop.hive.ql.hooks.LineageInfo.DependencyKey) Dependency(org.apache.hadoop.hive.ql.hooks.LineageInfo.Dependency) BaseColumnInfo(org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo) LineageInfo(org.apache.hadoop.hive.ql.hooks.LineageInfo) Map(java.util.Map)

Aggregations

ArrayList (java.util.ArrayList)1 Map (java.util.Map)1 AtlasEntity (org.apache.atlas.model.instance.AtlasEntity)1 LineageInfo (org.apache.hadoop.hive.ql.hooks.LineageInfo)1 BaseColumnInfo (org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo)1 Dependency (org.apache.hadoop.hive.ql.hooks.LineageInfo.Dependency)1 DependencyKey (org.apache.hadoop.hive.ql.hooks.LineageInfo.DependencyKey)1