Search in sources :

Example 21 with AtlasLineageInfo

use of org.apache.atlas.model.lineage.AtlasLineageInfo in project atlas by apache.

the class HiveHookIT method testColumnLevelLineage.

/*
       The test is disabled by default
       Reason : Atlas uses Hive version 1.2.x and the Hive patch HIVE-13112 which enables column level lineage is not
       committed in Hive version 1.2.x
       This test will fail if the lineage information is not available from Hive
       Once the patch for HIVE-13112 is committed to Hive branch 1.2.x, the test can be enabled
       Please track HIVE-14706 to know the status of column lineage availability in latest Hive versions i.e 2.1.x
        */
@Test(enabled = false)
public void testColumnLevelLineage() throws Exception {
    String sourceTable = "table" + random();
    runCommand("create table " + sourceTable + "(a int, b int)");
    String sourceTableGUID = assertTableIsRegistered(DEFAULT_DB, sourceTable);
    String a_guid = assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, sourceTable), "a"));
    String b_guid = assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, sourceTable), "b"));
    String ctasTableName = "table" + random();
    String query = "create table " + ctasTableName + " as " + "select sum(a+b) as a, count(*) as b from " + sourceTable;
    runCommand(query);
    String dest_a_guid = assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, ctasTableName), "a"));
    String dest_b_guid = assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, ctasTableName), "b"));
    Set<ReadEntity> inputs = getInputs(sourceTable, Entity.Type.TABLE);
    Set<WriteEntity> outputs = getOutputs(ctasTableName, Entity.Type.TABLE);
    HiveEventContext event = constructEvent(query, HiveOperation.CREATETABLE_AS_SELECT, inputs, outputs);
    assertProcessIsRegistered(event);
    assertTableIsRegistered(DEFAULT_DB, ctasTableName);
    String processQName = sortEventsAndGetProcessQualifiedName(event);
    List<String> aLineageInputs = Arrays.asList(a_guid, b_guid);
    String aLineageProcessName = processQName + ":" + "a";
    LOG.debug("Searching for column lineage process {} ", aLineageProcessName);
    String guid = assertEntityIsRegistered(HiveDataTypes.HIVE_COLUMN_LINEAGE.getName(), ATTRIBUTE_QUALIFIED_NAME, aLineageProcessName, null);
    AtlasEntity colLineageEntity = atlasClientV2.getEntityByGuid(guid).getEntity();
    List<AtlasObjectId> processInputs = toAtlasObjectIdList(colLineageEntity.getAttribute("inputs"));
    List<String> processInputsAsString = new ArrayList<>();
    for (AtlasObjectId input : processInputs) {
        processInputsAsString.add(input.getGuid());
    }
    Collections.sort(processInputsAsString);
    Collections.sort(aLineageInputs);
    Assert.assertEquals(processInputsAsString, aLineageInputs);
    List<String> bLineageInputs = Arrays.asList(sourceTableGUID);
    String bLineageProcessName = processQName + ":" + "b";
    LOG.debug("Searching for column lineage process {} ", bLineageProcessName);
    String guid1 = assertEntityIsRegistered(HiveDataTypes.HIVE_COLUMN_LINEAGE.getName(), ATTRIBUTE_QUALIFIED_NAME, bLineageProcessName, null);
    AtlasEntity colLineageEntity1 = atlasClientV2.getEntityByGuid(guid1).getEntity();
    List<AtlasObjectId> bProcessInputs = toAtlasObjectIdList(colLineageEntity1.getAttribute("inputs"));
    List<String> bProcessInputsAsString = new ArrayList<>();
    for (AtlasObjectId input : bProcessInputs) {
        bProcessInputsAsString.add(input.getGuid());
    }
    Collections.sort(bProcessInputsAsString);
    Collections.sort(bLineageInputs);
    Assert.assertEquals(bProcessInputsAsString, bLineageInputs);
    // Test lineage API response
    AtlasLineageInfo atlasLineageInfoInput = atlasClientV2.getLineageInfo(dest_a_guid, AtlasLineageInfo.LineageDirection.INPUT, 0);
    Map<String, AtlasEntityHeader> entityMap = atlasLineageInfoInput.getGuidEntityMap();
    ObjectNode response = atlasClient.getInputGraphForEntity(dest_a_guid);
    JsonNode vertices = response.get("values").get("vertices");
    JsonNode dest_a_val = vertices.get(dest_a_guid);
    JsonNode src_a_val = vertices.get(a_guid);
    JsonNode src_b_val = vertices.get(b_guid);
    Assert.assertNotNull(dest_a_val);
    Assert.assertNotNull(src_a_val);
    Assert.assertNotNull(src_b_val);
    ObjectNode b_response = atlasClient.getInputGraphForEntity(dest_b_guid);
    JsonNode b_vertices = b_response.get("values").get("vertices");
    JsonNode b_val = b_vertices.get(dest_b_guid);
    JsonNode src_tbl_val = b_vertices.get(sourceTableGUID);
    Assert.assertNotNull(b_val);
    Assert.assertNotNull(src_tbl_val);
}
Also used : AtlasLineageInfo(org.apache.atlas.model.lineage.AtlasLineageInfo) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) AtlasObjectId(org.apache.atlas.model.instance.AtlasObjectId) JsonNode(com.fasterxml.jackson.databind.JsonNode) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) AtlasEntity(org.apache.atlas.model.instance.AtlasEntity) AtlasEntityHeader(org.apache.atlas.model.instance.AtlasEntityHeader) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Test(org.testng.annotations.Test)

Example 22 with AtlasLineageInfo

use of org.apache.atlas.model.lineage.AtlasLineageInfo in project atlas by apache.

the class QuickStartV2 method lineage.

private void lineage() throws AtlasServiceException {
    System.out.println("\nSample Lineage Info: ");
    AtlasLineageInfo lineageInfo = atlasClientV2.getLineageInfo(getTableId(SALES_FACT_DAILY_MV_TABLE), LineageDirection.BOTH, 0);
    Set<LineageRelation> relations = lineageInfo.getRelations();
    Map<String, AtlasEntityHeader> guidEntityMap = lineageInfo.getGuidEntityMap();
    for (LineageRelation relation : relations) {
        AtlasEntityHeader fromEntity = guidEntityMap.get(relation.getFromEntityId());
        AtlasEntityHeader toEntity = guidEntityMap.get(relation.getToEntityId());
        System.out.println(fromEntity.getDisplayText() + "(" + fromEntity.getTypeName() + ") -> " + toEntity.getDisplayText() + "(" + toEntity.getTypeName() + ")");
    }
}
Also used : AtlasLineageInfo(org.apache.atlas.model.lineage.AtlasLineageInfo) LineageRelation(org.apache.atlas.model.lineage.AtlasLineageInfo.LineageRelation) AtlasEntityHeader(org.apache.atlas.model.instance.AtlasEntityHeader)

Example 23 with AtlasLineageInfo

use of org.apache.atlas.model.lineage.AtlasLineageInfo in project atlas by apache.

the class DataSetLineageResource method outputsGraph.

/**
 * Returns the outputs graph for a given entity.
 *
 * @param tableName table name
 */
@GET
@Path("table/{tableName}/outputs/graph")
@Consumes(Servlets.JSON_MEDIA_TYPE)
@Produces(Servlets.JSON_MEDIA_TYPE)
public DataSetLineageResponse outputsGraph(@Context HttpServletRequest request, @PathParam("tableName") String tableName) {
    if (LOG.isDebugEnabled()) {
        LOG.debug("==> DataSetLineageResource.outputsGraph({})", tableName);
    }
    DataSetLineageResponse ret = new DataSetLineageResponse();
    AtlasPerfTracer perf = null;
    try {
        if (AtlasPerfTracer.isPerfTraceEnabled(PERF_LOG)) {
            perf = AtlasPerfTracer.getPerfTracer(PERF_LOG, "DataSetLineageResource.outputsGraph(tableName=" + tableName + ")");
        }
        String guid = getGuid(tableName);
        AtlasLineageInfo lineageInfo = atlasLineageService.getAtlasLineageInfo(guid, LineageDirection.OUTPUT, -1);
        ret.setTableName(tableName);
        ret.setRequestId(Servlets.getRequestId());
        ret.setResults(LineageUtils.toLineageStruct(lineageInfo, typeRegistry));
        return ret;
    } catch (IllegalArgumentException e) {
        LOG.error("Unable to get lineage outputs graph for table {}", tableName, e);
        throw new WebApplicationException(Servlets.getErrorResponse(e, Response.Status.BAD_REQUEST));
    } catch (WebApplicationException e) {
        LOG.error("Unable to get lineage outputs graph for table {}", tableName, e);
        throw e;
    } catch (Throwable e) {
        LOG.error("Unable to get lineage outputs graph for table {}", tableName, e);
        throw new WebApplicationException(Servlets.getErrorResponse(e, Response.Status.INTERNAL_SERVER_ERROR));
    } finally {
        AtlasPerfTracer.log(perf);
    }
}
Also used : AtlasLineageInfo(org.apache.atlas.model.lineage.AtlasLineageInfo) WebApplicationException(javax.ws.rs.WebApplicationException) AtlasPerfTracer(org.apache.atlas.utils.AtlasPerfTracer) DataSetLineageResponse(org.apache.atlas.v1.model.lineage.DataSetLineageResponse) Path(javax.ws.rs.Path) Consumes(javax.ws.rs.Consumes) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET)

Example 24 with AtlasLineageInfo

use of org.apache.atlas.model.lineage.AtlasLineageInfo in project atlas by apache.

the class LineageResource method outputsGraph.

/**
 * Returns the outputs graph for a given entity id.
 *
 * @param guid dataset entity id
 */
@GET
@Path("{guid}/outputs/graph")
@Consumes(Servlets.JSON_MEDIA_TYPE)
@Produces(Servlets.JSON_MEDIA_TYPE)
public LineageResponse outputsGraph(@PathParam("guid") String guid) {
    if (LOG.isDebugEnabled()) {
        LOG.debug("==> LineageResource.outputsGraph({})", guid);
    }
    LineageResponse ret = new LineageResponse();
    AtlasPerfTracer perf = null;
    try {
        if (AtlasPerfTracer.isPerfTraceEnabled(PERF_LOG)) {
            perf = AtlasPerfTracer.getPerfTracer(PERF_LOG, "LineageResource.outputsGraph(" + guid + ")");
        }
        AtlasLineageInfo lineageInfo = atlasLineageService.getAtlasLineageInfo(guid, LineageDirection.OUTPUT, -1);
        ret.setRequestId(Servlets.getRequestId());
        ret.setResults(LineageUtils.toLineageStruct(lineageInfo, typeRegistry));
        return ret;
    } catch (AtlasBaseException e) {
        LOG.error("Unable to get lineage outputs graph for entity guid={}", guid, e);
        throw new WebApplicationException(Servlets.getErrorResponse(e));
    } catch (WebApplicationException e) {
        LOG.error("Unable to get lineage outputs graph for entity guid={}", guid, e);
        throw e;
    } finally {
        AtlasPerfTracer.log(perf);
        if (LOG.isDebugEnabled()) {
            LOG.debug("<== LineageResource.outputsGraph({})", guid);
        }
    }
}
Also used : AtlasLineageInfo(org.apache.atlas.model.lineage.AtlasLineageInfo) AtlasBaseException(org.apache.atlas.exception.AtlasBaseException) WebApplicationException(javax.ws.rs.WebApplicationException) AtlasPerfTracer(org.apache.atlas.utils.AtlasPerfTracer) LineageResponse(org.apache.atlas.v1.model.lineage.LineageResponse) Path(javax.ws.rs.Path) Consumes(javax.ws.rs.Consumes) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET)

Example 25 with AtlasLineageInfo

use of org.apache.atlas.model.lineage.AtlasLineageInfo in project atlas by apache.

the class EntityLineageService method getLineageInfo.

private AtlasLineageInfo getLineageInfo(String guid, LineageDirection direction, int depth) throws AtlasBaseException {
    Map<String, AtlasEntityHeader> entities = new HashMap<>();
    Set<LineageRelation> relations = new HashSet<>();
    String lineageQuery = getLineageQuery(guid, direction, depth);
    List edgeMapList = (List) graph.executeGremlinScript(lineageQuery, false);
    if (CollectionUtils.isNotEmpty(edgeMapList)) {
        for (Object edgeMap : edgeMapList) {
            if (edgeMap instanceof Map) {
                for (final Object o : ((Map) edgeMap).entrySet()) {
                    final Map.Entry entry = (Map.Entry) o;
                    Object value = entry.getValue();
                    if (value instanceof List) {
                        for (Object elem : (List) value) {
                            if (elem instanceof AtlasEdge) {
                                processEdge((AtlasEdge) elem, entities, relations);
                            } else {
                                LOG.warn("Invalid value of type {} found, ignoring", (elem != null ? elem.getClass().getSimpleName() : "null"));
                            }
                        }
                    } else if (value instanceof AtlasEdge) {
                        processEdge((AtlasEdge) value, entities, relations);
                    } else {
                        LOG.warn("Invalid value of type {} found, ignoring", (value != null ? value.getClass().getSimpleName() : "null"));
                    }
                }
            }
        }
    }
    return new AtlasLineageInfo(guid, entities, relations, direction, depth);
}
Also used : AtlasLineageInfo(org.apache.atlas.model.lineage.AtlasLineageInfo) HashMap(java.util.HashMap) LineageRelation(org.apache.atlas.model.lineage.AtlasLineageInfo.LineageRelation) AtlasEdge(org.apache.atlas.repository.graphdb.AtlasEdge) AtlasEntityHeader(org.apache.atlas.model.instance.AtlasEntityHeader) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) HashSet(java.util.HashSet)

Aggregations

AtlasLineageInfo (org.apache.atlas.model.lineage.AtlasLineageInfo)32 AtlasEntityHeader (org.apache.atlas.model.instance.AtlasEntityHeader)24 Test (org.testng.annotations.Test)17 LineageRelation (org.apache.atlas.model.lineage.AtlasLineageInfo.LineageRelation)11 MultivaluedMapImpl (com.sun.jersey.core.util.MultivaluedMapImpl)6 Consumes (javax.ws.rs.Consumes)6 GET (javax.ws.rs.GET)6 Path (javax.ws.rs.Path)6 Produces (javax.ws.rs.Produces)6 WebApplicationException (javax.ws.rs.WebApplicationException)6 AtlasPerfTracer (org.apache.atlas.utils.AtlasPerfTracer)6 ArrayList (java.util.ArrayList)5 BaseRepositoryTest (org.apache.atlas.BaseRepositoryTest)5 AtlasBaseException (org.apache.atlas.exception.AtlasBaseException)5 JSONObject (org.codehaus.jettison.json.JSONObject)5 ObjectNode (com.fasterxml.jackson.databind.node.ObjectNode)4 HashMap (java.util.HashMap)4 List (java.util.List)4 HashSet (java.util.HashSet)2 DataSetLineageResponse (org.apache.atlas.v1.model.lineage.DataSetLineageResponse)2