Search in sources :

Example 1 with Lineage

use of io.cdap.cdap.data2.metadata.lineage.Lineage in project cdap by caskdata.

the class LineageAdminTest method testSimpleLoopLineage.

@Test
public void testSimpleLoopLineage() throws Exception {
    // Lineage for D1 -> P1 -> D2 -> P2 -> D3 -> P3 -> D4
    // |                 |
    // |                 V
    // |<-----------------
    // 
    LineageStore lineageStore = new LineageStore(getTxExecFactory(), getDatasetFramework(), NamespaceId.DEFAULT.dataset("testSimpleLoopLineage"));
    Store store = getInjector().getInstance(Store.class);
    MetadataStore metadataStore = getInjector().getInstance(MetadataStore.class);
    LineageAdmin lineageAdmin = new LineageAdmin(lineageStore, store, metadataStore, new NoOpEntityExistenceVerifier());
    // Add access
    addRuns(store, run1, run2, run3, run4, run5);
    // It is okay to use current time here since access time is ignore during assertions
    lineageStore.addAccess(run1, dataset1, AccessType.READ, System.currentTimeMillis(), flowlet1);
    lineageStore.addAccess(run1, dataset2, AccessType.WRITE, System.currentTimeMillis(), flowlet1);
    lineageStore.addAccess(run2, dataset2, AccessType.READ, System.currentTimeMillis(), flowlet2);
    lineageStore.addAccess(run2, dataset1, AccessType.WRITE, System.currentTimeMillis(), flowlet2);
    lineageStore.addAccess(run2, dataset3, AccessType.WRITE, System.currentTimeMillis(), flowlet2);
    lineageStore.addAccess(run3, dataset3, AccessType.READ, System.currentTimeMillis());
    lineageStore.addAccess(run3, dataset4, AccessType.WRITE, System.currentTimeMillis());
    Lineage expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset2, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset2, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset4, program3, AccessType.WRITE, twillRunId(run3), emptySet()), new Relation(dataset3, program3, AccessType.READ, twillRunId(run3), emptySet())));
    // Lineage for D1
    Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset1, 500, 20000, 100));
    // Lineage for D2
    Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset2, 500, 20000, 100));
    // Lineage for D1 for one level D1 -> P1 -> D2 -> P2 -> D3
    // |                 |
    // |                 V
    // |<-----------------
    // 
    Lineage oneLevelLineage = lineageAdmin.computeLineage(dataset1, 500, 20000, 1);
    Assert.assertEquals(ImmutableSet.of(new Relation(dataset2, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset2, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2))), oneLevelLineage.getRelations());
}
Also used : MetadataStore(co.cask.cdap.data2.metadata.store.MetadataStore) Relation(co.cask.cdap.data2.metadata.lineage.Relation) LineageStore(co.cask.cdap.data2.metadata.lineage.LineageStore) Lineage(co.cask.cdap.data2.metadata.lineage.Lineage) Store(co.cask.cdap.app.store.Store) LineageStore(co.cask.cdap.data2.metadata.lineage.LineageStore) MetadataStore(co.cask.cdap.data2.metadata.store.MetadataStore) Test(org.junit.Test)

Example 2 with Lineage

use of io.cdap.cdap.data2.metadata.lineage.Lineage in project cdap by caskdata.

the class LineageAdminTest method testDirectCycleTwoRuns.

@Test
public void testDirectCycleTwoRuns() throws Exception {
    // Lineage for:
    // 
    // D1 -> P1 (run1)
    // 
    // D1 <- P1 (run2)
    // 
    LineageStore lineageStore = new LineageStore(getTxExecFactory(), getDatasetFramework(), NamespaceId.DEFAULT.dataset("testDirectCycleTwoRuns"));
    Store store = getInjector().getInstance(Store.class);
    MetadataStore metadataStore = getInjector().getInstance(MetadataStore.class);
    LineageAdmin lineageAdmin = new LineageAdmin(lineageStore, store, metadataStore, new NoOpEntityExistenceVerifier());
    // Add accesses
    addRuns(store, run1, run2, run3, run4, run5);
    // It is okay to use current time here since access time is ignore during assertions
    lineageStore.addAccess(run1, dataset1, AccessType.READ, System.currentTimeMillis(), flowlet1);
    // Write is in a different run
    lineageStore.addAccess(new ProgramRunId(run1.getNamespace(), run1.getApplication(), run1.getParent().getType(), run1.getProgram(), run2.getEntityName()), dataset1, AccessType.WRITE, System.currentTimeMillis(), flowlet1);
    Lineage expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.WRITE, twillRunId(run2), toSet(flowlet1))));
    Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset1, 500, 20000, 100));
}
Also used : MetadataStore(co.cask.cdap.data2.metadata.store.MetadataStore) Relation(co.cask.cdap.data2.metadata.lineage.Relation) LineageStore(co.cask.cdap.data2.metadata.lineage.LineageStore) Lineage(co.cask.cdap.data2.metadata.lineage.Lineage) Store(co.cask.cdap.app.store.Store) LineageStore(co.cask.cdap.data2.metadata.lineage.LineageStore) MetadataStore(co.cask.cdap.data2.metadata.store.MetadataStore) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) Test(org.junit.Test)

Example 3 with Lineage

use of io.cdap.cdap.data2.metadata.lineage.Lineage in project cdap by caskdata.

the class LineageAdminTest method testBranchLoopLineage.

@Test
public void testBranchLoopLineage() throws Exception {
    // Lineage for:
    // 
    // |-------------------------------------|
    // |                                     |
    // |                                     |
    // |    -> D4       -> D5 -> P3 -> D6 -> P5
    // |    |           |                    ^
    // V    |           |                    |
    // D1 -> P1 -> D2 -> P2 -> D3 ----------->|
    // |     |           |
    // |     |           |
    // S1 -->|     ---------------> P4 -> D7
    LineageStore lineageStore = new LineageStore(getTxExecFactory(), getDatasetFramework(), NamespaceId.DEFAULT.dataset("testBranchLoopLineage"));
    Store store = getInjector().getInstance(Store.class);
    MetadataStore metadataStore = getInjector().getInstance(MetadataStore.class);
    LineageAdmin lineageAdmin = new LineageAdmin(lineageStore, store, metadataStore, new NoOpEntityExistenceVerifier());
    // Add accesses
    addRuns(store, run1, run2, run3, run4, run5);
    // It is okay to use current time here since access time is ignore during assertions
    lineageStore.addAccess(run1, stream1, AccessType.READ, System.currentTimeMillis(), flowlet1);
    lineageStore.addAccess(run1, dataset1, AccessType.READ, System.currentTimeMillis(), flowlet1);
    lineageStore.addAccess(run1, dataset2, AccessType.WRITE, System.currentTimeMillis(), flowlet1);
    lineageStore.addAccess(run1, dataset4, AccessType.WRITE, System.currentTimeMillis(), flowlet1);
    lineageStore.addAccess(run2, dataset2, AccessType.READ, System.currentTimeMillis(), flowlet2);
    lineageStore.addAccess(run2, dataset3, AccessType.WRITE, System.currentTimeMillis(), flowlet2);
    lineageStore.addAccess(run2, dataset5, AccessType.WRITE, System.currentTimeMillis(), flowlet2);
    lineageStore.addAccess(run3, dataset5, AccessType.READ, System.currentTimeMillis());
    lineageStore.addAccess(run3, dataset6, AccessType.WRITE, System.currentTimeMillis());
    lineageStore.addAccess(run4, dataset2, AccessType.READ, System.currentTimeMillis());
    lineageStore.addAccess(run4, dataset3, AccessType.READ, System.currentTimeMillis());
    lineageStore.addAccess(run4, dataset7, AccessType.WRITE, System.currentTimeMillis());
    lineageStore.addAccess(run5, dataset3, AccessType.READ, System.currentTimeMillis());
    lineageStore.addAccess(run5, dataset6, AccessType.READ, System.currentTimeMillis());
    lineageStore.addAccess(run5, dataset1, AccessType.WRITE, System.currentTimeMillis());
    Lineage expectedLineage = new Lineage(ImmutableSet.of(new Relation(stream1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset4, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset5, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset5, program3, AccessType.READ, twillRunId(run3), emptySet()), new Relation(dataset6, program3, AccessType.WRITE, twillRunId(run3), emptySet()), new Relation(dataset2, program4, AccessType.READ, twillRunId(run4), emptySet()), new Relation(dataset3, program4, AccessType.READ, twillRunId(run4), emptySet()), new Relation(dataset7, program4, AccessType.WRITE, twillRunId(run4), emptySet()), new Relation(dataset3, program5, AccessType.READ, twillRunId(run5), emptySet()), new Relation(dataset6, program5, AccessType.READ, twillRunId(run5), emptySet()), new Relation(dataset1, program5, AccessType.WRITE, twillRunId(run5), emptySet())));
    // Lineage for D1
    Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset1, 500, 20000, 100));
    // Lineage for D5
    Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset5, 500, 20000, 100));
    // Lineage for D7
    Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset7, 500, 20000, 100));
    // Lineage for S1
    Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(stream1, 500, 20000, 100));
    // Lineage for D5 for one level
    // -> D5 -> P3 -> D6
    // |
    // |
    // D2 -> P2 -> D3
    Lineage oneLevelLineage = lineageAdmin.computeLineage(dataset5, 500, 20000, 1);
    Assert.assertEquals(ImmutableSet.of(new Relation(dataset2, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset5, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset5, program3, AccessType.READ, twillRunId(run3), emptySet()), new Relation(dataset6, program3, AccessType.WRITE, twillRunId(run3), emptySet())), oneLevelLineage.getRelations());
    // Lineage for S1 for one level
    // 
    // -> D4
    // |
    // |
    // D1 -> P1 -> D2
    // |
    // |
    // S1 -->|
    oneLevelLineage = lineageAdmin.computeLineage(stream1, 500, 20000, 1);
    Assert.assertEquals(ImmutableSet.of(new Relation(stream1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset4, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1))), oneLevelLineage.getRelations());
}
Also used : MetadataStore(co.cask.cdap.data2.metadata.store.MetadataStore) Relation(co.cask.cdap.data2.metadata.lineage.Relation) LineageStore(co.cask.cdap.data2.metadata.lineage.LineageStore) Lineage(co.cask.cdap.data2.metadata.lineage.Lineage) Store(co.cask.cdap.app.store.Store) LineageStore(co.cask.cdap.data2.metadata.lineage.LineageStore) MetadataStore(co.cask.cdap.data2.metadata.store.MetadataStore) Test(org.junit.Test)

Example 4 with Lineage

use of io.cdap.cdap.data2.metadata.lineage.Lineage in project cdap by caskdata.

the class LineageHandler method streamLineage.

@GET
@Path("/namespaces/{namespace-id}/streams/{stream-id}/lineage")
public void streamLineage(HttpRequest request, HttpResponder responder, @PathParam("namespace-id") String namespaceId, @PathParam("stream-id") String stream, @QueryParam("start") String startStr, @QueryParam("end") String endStr, @QueryParam("levels") @DefaultValue("10") int levels, @QueryParam("collapse") List<String> collapse, @QueryParam("rollup") String rollup) throws Exception {
    checkLevels(levels);
    TimeRange range = parseRange(startStr, endStr);
    StreamId streamId = new StreamId(namespaceId, stream);
    Lineage lineage = lineageAdmin.computeLineage(streamId, range.getStart(), range.getEnd(), levels, rollup);
    responder.sendJson(HttpResponseStatus.OK, GSON.toJson(LineageSerializer.toLineageRecord(TimeUnit.MILLISECONDS.toSeconds(range.getStart()), TimeUnit.MILLISECONDS.toSeconds(range.getEnd()), lineage, getCollapseTypes(collapse)), LineageRecord.class));
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) LineageRecord(co.cask.cdap.proto.metadata.lineage.LineageRecord) Lineage(co.cask.cdap.data2.metadata.lineage.Lineage) Path(javax.ws.rs.Path) GET(javax.ws.rs.GET)

Example 5 with Lineage

use of io.cdap.cdap.data2.metadata.lineage.Lineage in project cdap by caskdata.

the class LineageHTTPHandler method datasetFieldLineageDetails.

/**
 * Get the operation details about the specified field in one dataset.
 *
 * @param field the field name to compute field operation details
 * @param directionStr the direction to compute the field level lineage, can be INCOMING, OUTGOING or BOTH
 * @param startStr the start time string, it can be a specific timestamp in milliseconds or a relative time,
 *                 using now and times added to it.
 * @param endStr the end time string, it can be a specific timestamp in milliseconds or a relative time,
 *               using now and times added to it.
 */
@GET
@Path("/namespaces/{namespace-id}/datasets/{dataset-id}/lineage/fields/{field-name}/operations")
public void datasetFieldLineageDetails(HttpRequest request, HttpResponder responder, @PathParam("namespace-id") String namespaceId, @PathParam("dataset-id") String datasetId, @PathParam("field-name") String field, @QueryParam("direction") @DefaultValue("both") String directionStr, @QueryParam("start") String startStr, @QueryParam("end") String endStr) throws Exception {
    accessEnforcer.enforce(new DatasetId(namespaceId, datasetId), authenticationContext.getPrincipal(), StandardPermission.GET);
    TimeRange range = parseRange(startStr, endStr);
    Constants.FieldLineage.Direction direction = parseDirection(directionStr);
    EndPointField endPointField = new EndPointField(EndPoint.of(namespaceId, datasetId), field);
    FieldLineageDetails details = fieldLineageAdmin.getOperationDetails(direction, endPointField, range.getStart(), range.getEnd());
    responder.sendJson(HttpResponseStatus.OK, GSON.toJson(details));
}
Also used : EndPointField(io.cdap.cdap.data2.metadata.lineage.field.EndPointField) FieldLineageDetails(io.cdap.cdap.proto.metadata.lineage.FieldLineageDetails) DatasetId(io.cdap.cdap.proto.id.DatasetId) Path(javax.ws.rs.Path) GET(javax.ws.rs.GET)

Aggregations

Test (org.junit.Test)22 Lineage (io.cdap.cdap.data2.metadata.lineage.Lineage)13 Lineage (co.cask.cdap.data2.metadata.lineage.Lineage)12 Relation (io.cdap.cdap.data2.metadata.lineage.Relation)12 DatasetId (io.cdap.cdap.proto.id.DatasetId)12 LineageStoreReader (io.cdap.cdap.data2.metadata.lineage.LineageStoreReader)11 LineageWriter (io.cdap.cdap.data2.metadata.writer.LineageWriter)11 Relation (co.cask.cdap.data2.metadata.lineage.Relation)10 Store (io.cdap.cdap.app.store.Store)9 DefaultLineageStoreReader (io.cdap.cdap.data2.metadata.lineage.DefaultLineageStoreReader)9 TransactionRunner (io.cdap.cdap.spi.data.transaction.TransactionRunner)9 EndPointField (io.cdap.cdap.data2.metadata.lineage.field.EndPointField)8 BasicLineageWriter (io.cdap.cdap.data2.metadata.writer.BasicLineageWriter)8 ProgramRunId (io.cdap.cdap.proto.id.ProgramRunId)8 Store (co.cask.cdap.app.store.Store)7 LineageStore (co.cask.cdap.data2.metadata.lineage.LineageStore)7 MetadataStore (co.cask.cdap.data2.metadata.store.MetadataStore)7 ProgramId (io.cdap.cdap.proto.id.ProgramId)7 HashSet (java.util.HashSet)7 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)6