use of co.cask.cdap.data2.metadata.lineage.Lineage in project cdap by caskdata.
the class LineageAdminTest method testSimpleLoopLineage.
@Test
public void testSimpleLoopLineage() throws Exception {
// Lineage for D1 -> P1 -> D2 -> P2 -> D3 -> P3 -> D4
// | |
// | V
// |<-----------------
//
LineageStore lineageStore = new LineageStore(getTxExecFactory(), getDatasetFramework(), NamespaceId.DEFAULT.dataset("testSimpleLoopLineage"));
Store store = getInjector().getInstance(Store.class);
MetadataStore metadataStore = getInjector().getInstance(MetadataStore.class);
LineageAdmin lineageAdmin = new LineageAdmin(lineageStore, store, metadataStore, new NoOpEntityExistenceVerifier());
// Add access
addRuns(store, run1, run2, run3, run4, run5);
// It is okay to use current time here since access time is ignore during assertions
lineageStore.addAccess(run1, dataset1, AccessType.READ, System.currentTimeMillis(), flowlet1);
lineageStore.addAccess(run1, dataset2, AccessType.WRITE, System.currentTimeMillis(), flowlet1);
lineageStore.addAccess(run2, dataset2, AccessType.READ, System.currentTimeMillis(), flowlet2);
lineageStore.addAccess(run2, dataset1, AccessType.WRITE, System.currentTimeMillis(), flowlet2);
lineageStore.addAccess(run2, dataset3, AccessType.WRITE, System.currentTimeMillis(), flowlet2);
lineageStore.addAccess(run3, dataset3, AccessType.READ, System.currentTimeMillis());
lineageStore.addAccess(run3, dataset4, AccessType.WRITE, System.currentTimeMillis());
Lineage expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset2, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset2, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset4, program3, AccessType.WRITE, twillRunId(run3), emptySet()), new Relation(dataset3, program3, AccessType.READ, twillRunId(run3), emptySet())));
// Lineage for D1
Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset1, 500, 20000, 100));
// Lineage for D2
Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset2, 500, 20000, 100));
// Lineage for D1 for one level D1 -> P1 -> D2 -> P2 -> D3
// | |
// | V
// |<-----------------
//
Lineage oneLevelLineage = lineageAdmin.computeLineage(dataset1, 500, 20000, 1);
Assert.assertEquals(ImmutableSet.of(new Relation(dataset2, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset2, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2))), oneLevelLineage.getRelations());
}
use of co.cask.cdap.data2.metadata.lineage.Lineage in project cdap by caskdata.
the class LineageAdminTest method testDirectCycleTwoRuns.
@Test
public void testDirectCycleTwoRuns() throws Exception {
// Lineage for:
//
// D1 -> P1 (run1)
//
// D1 <- P1 (run2)
//
LineageStore lineageStore = new LineageStore(getTxExecFactory(), getDatasetFramework(), NamespaceId.DEFAULT.dataset("testDirectCycleTwoRuns"));
Store store = getInjector().getInstance(Store.class);
MetadataStore metadataStore = getInjector().getInstance(MetadataStore.class);
LineageAdmin lineageAdmin = new LineageAdmin(lineageStore, store, metadataStore, new NoOpEntityExistenceVerifier());
// Add accesses
addRuns(store, run1, run2, run3, run4, run5);
// It is okay to use current time here since access time is ignore during assertions
lineageStore.addAccess(run1, dataset1, AccessType.READ, System.currentTimeMillis(), flowlet1);
// Write is in a different run
lineageStore.addAccess(new ProgramRunId(run1.getNamespace(), run1.getApplication(), run1.getParent().getType(), run1.getProgram(), run2.getEntityName()), dataset1, AccessType.WRITE, System.currentTimeMillis(), flowlet1);
Lineage expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.WRITE, twillRunId(run2), toSet(flowlet1))));
Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset1, 500, 20000, 100));
}
use of co.cask.cdap.data2.metadata.lineage.Lineage in project cdap by caskdata.
the class LineageAdminTest method testBranchLoopLineage.
@Test
public void testBranchLoopLineage() throws Exception {
// Lineage for:
//
// |-------------------------------------|
// | |
// | |
// | -> D4 -> D5 -> P3 -> D6 -> P5
// | | | ^
// V | | |
// D1 -> P1 -> D2 -> P2 -> D3 ----------->|
// | | |
// | | |
// S1 -->| ---------------> P4 -> D7
LineageStore lineageStore = new LineageStore(getTxExecFactory(), getDatasetFramework(), NamespaceId.DEFAULT.dataset("testBranchLoopLineage"));
Store store = getInjector().getInstance(Store.class);
MetadataStore metadataStore = getInjector().getInstance(MetadataStore.class);
LineageAdmin lineageAdmin = new LineageAdmin(lineageStore, store, metadataStore, new NoOpEntityExistenceVerifier());
// Add accesses
addRuns(store, run1, run2, run3, run4, run5);
// It is okay to use current time here since access time is ignore during assertions
lineageStore.addAccess(run1, stream1, AccessType.READ, System.currentTimeMillis(), flowlet1);
lineageStore.addAccess(run1, dataset1, AccessType.READ, System.currentTimeMillis(), flowlet1);
lineageStore.addAccess(run1, dataset2, AccessType.WRITE, System.currentTimeMillis(), flowlet1);
lineageStore.addAccess(run1, dataset4, AccessType.WRITE, System.currentTimeMillis(), flowlet1);
lineageStore.addAccess(run2, dataset2, AccessType.READ, System.currentTimeMillis(), flowlet2);
lineageStore.addAccess(run2, dataset3, AccessType.WRITE, System.currentTimeMillis(), flowlet2);
lineageStore.addAccess(run2, dataset5, AccessType.WRITE, System.currentTimeMillis(), flowlet2);
lineageStore.addAccess(run3, dataset5, AccessType.READ, System.currentTimeMillis());
lineageStore.addAccess(run3, dataset6, AccessType.WRITE, System.currentTimeMillis());
lineageStore.addAccess(run4, dataset2, AccessType.READ, System.currentTimeMillis());
lineageStore.addAccess(run4, dataset3, AccessType.READ, System.currentTimeMillis());
lineageStore.addAccess(run4, dataset7, AccessType.WRITE, System.currentTimeMillis());
lineageStore.addAccess(run5, dataset3, AccessType.READ, System.currentTimeMillis());
lineageStore.addAccess(run5, dataset6, AccessType.READ, System.currentTimeMillis());
lineageStore.addAccess(run5, dataset1, AccessType.WRITE, System.currentTimeMillis());
Lineage expectedLineage = new Lineage(ImmutableSet.of(new Relation(stream1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset4, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset5, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset5, program3, AccessType.READ, twillRunId(run3), emptySet()), new Relation(dataset6, program3, AccessType.WRITE, twillRunId(run3), emptySet()), new Relation(dataset2, program4, AccessType.READ, twillRunId(run4), emptySet()), new Relation(dataset3, program4, AccessType.READ, twillRunId(run4), emptySet()), new Relation(dataset7, program4, AccessType.WRITE, twillRunId(run4), emptySet()), new Relation(dataset3, program5, AccessType.READ, twillRunId(run5), emptySet()), new Relation(dataset6, program5, AccessType.READ, twillRunId(run5), emptySet()), new Relation(dataset1, program5, AccessType.WRITE, twillRunId(run5), emptySet())));
// Lineage for D1
Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset1, 500, 20000, 100));
// Lineage for D5
Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset5, 500, 20000, 100));
// Lineage for D7
Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset7, 500, 20000, 100));
// Lineage for S1
Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(stream1, 500, 20000, 100));
// Lineage for D5 for one level
// -> D5 -> P3 -> D6
// |
// |
// D2 -> P2 -> D3
Lineage oneLevelLineage = lineageAdmin.computeLineage(dataset5, 500, 20000, 1);
Assert.assertEquals(ImmutableSet.of(new Relation(dataset2, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset5, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset5, program3, AccessType.READ, twillRunId(run3), emptySet()), new Relation(dataset6, program3, AccessType.WRITE, twillRunId(run3), emptySet())), oneLevelLineage.getRelations());
// Lineage for S1 for one level
//
// -> D4
// |
// |
// D1 -> P1 -> D2
// |
// |
// S1 -->|
oneLevelLineage = lineageAdmin.computeLineage(stream1, 500, 20000, 1);
Assert.assertEquals(ImmutableSet.of(new Relation(stream1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset4, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1))), oneLevelLineage.getRelations());
}
use of co.cask.cdap.data2.metadata.lineage.Lineage in project cdap by caskdata.
the class LineageHandler method streamLineage.
@GET
@Path("/namespaces/{namespace-id}/streams/{stream-id}/lineage")
public void streamLineage(HttpRequest request, HttpResponder responder, @PathParam("namespace-id") String namespaceId, @PathParam("stream-id") String stream, @QueryParam("start") String startStr, @QueryParam("end") String endStr, @QueryParam("levels") @DefaultValue("10") int levels, @QueryParam("collapse") List<String> collapse, @QueryParam("rollup") String rollup) throws Exception {
checkLevels(levels);
TimeRange range = parseRange(startStr, endStr);
StreamId streamId = new StreamId(namespaceId, stream);
Lineage lineage = lineageAdmin.computeLineage(streamId, range.getStart(), range.getEnd(), levels, rollup);
responder.sendJson(HttpResponseStatus.OK, GSON.toJson(LineageSerializer.toLineageRecord(TimeUnit.MILLISECONDS.toSeconds(range.getStart()), TimeUnit.MILLISECONDS.toSeconds(range.getEnd()), lineage, getCollapseTypes(collapse)), LineageRecord.class));
}
use of co.cask.cdap.data2.metadata.lineage.Lineage in project cdap by caskdata.
the class LineageTestRun method testFlowLineage.
@Test
public void testFlowLineage() throws Exception {
NamespaceId namespace = new NamespaceId("testFlowLineage");
ApplicationId app = namespace.app(AllProgramsApp.NAME);
ProgramId flow = app.flow(AllProgramsApp.NoOpFlow.NAME);
DatasetId dataset = namespace.dataset(AllProgramsApp.DATASET_NAME);
StreamId stream = namespace.stream(AllProgramsApp.STREAM_NAME);
namespaceClient.create(new NamespaceMeta.Builder().setName(namespace).build());
try {
appClient.deploy(namespace, createAppJarFile(AllProgramsApp.class));
// Add metadata to applicaton
ImmutableMap<String, String> appProperties = ImmutableMap.of("app-key1", "app-value1");
addProperties(app, appProperties);
Assert.assertEquals(appProperties, getProperties(app, MetadataScope.USER));
ImmutableSet<String> appTags = ImmutableSet.of("app-tag1");
addTags(app, appTags);
Assert.assertEquals(appTags, getTags(app, MetadataScope.USER));
// Add metadata to flow
ImmutableMap<String, String> flowProperties = ImmutableMap.of("flow-key1", "flow-value1");
addProperties(flow, flowProperties);
Assert.assertEquals(flowProperties, getProperties(flow, MetadataScope.USER));
ImmutableSet<String> flowTags = ImmutableSet.of("flow-tag1", "flow-tag2");
addTags(flow, flowTags);
Assert.assertEquals(flowTags, getTags(flow, MetadataScope.USER));
// Add metadata to dataset
ImmutableMap<String, String> dataProperties = ImmutableMap.of("data-key1", "data-value1");
addProperties(dataset, dataProperties);
Assert.assertEquals(dataProperties, getProperties(dataset, MetadataScope.USER));
ImmutableSet<String> dataTags = ImmutableSet.of("data-tag1", "data-tag2");
addTags(dataset, dataTags);
Assert.assertEquals(dataTags, getTags(dataset, MetadataScope.USER));
// Add metadata to stream
ImmutableMap<String, String> streamProperties = ImmutableMap.of("stream-key1", "stream-value1");
addProperties(stream, streamProperties);
Assert.assertEquals(streamProperties, getProperties(stream, MetadataScope.USER));
ImmutableSet<String> streamTags = ImmutableSet.of("stream-tag1", "stream-tag2");
addTags(stream, streamTags);
Assert.assertEquals(streamTags, getTags(stream, MetadataScope.USER));
long startTime = TimeMathParser.nowInSeconds();
RunId flowRunId = runAndWait(flow);
// Wait for few seconds so that the stop time secs is more than start time secs.
TimeUnit.SECONDS.sleep(2);
waitForStop(flow, true);
long stopTime = TimeMathParser.nowInSeconds();
// Fetch dataset lineage
LineageRecord lineage = fetchLineage(dataset, startTime, stopTime, 10);
LineageRecord expected = LineageSerializer.toLineageRecord(startTime, stopTime, new Lineage(ImmutableSet.of(new Relation(dataset, flow, AccessType.UNKNOWN, flowRunId, ImmutableSet.of(flow.flowlet(AllProgramsApp.A.NAME))), new Relation(stream, flow, AccessType.READ, flowRunId, ImmutableSet.of(flow.flowlet(AllProgramsApp.A.NAME))))), Collections.<CollapseType>emptySet());
Assert.assertEquals(expected, lineage);
// Fetch dataset lineage with time strings
lineage = fetchLineage(dataset, "now-1h", "now+1h", 10);
Assert.assertEquals(expected.getRelations(), lineage.getRelations());
// Fetch stream lineage
lineage = fetchLineage(stream, startTime, stopTime, 10);
// same as dataset's lineage
Assert.assertEquals(expected, lineage);
// Fetch stream lineage with time strings
lineage = fetchLineage(stream, "now-1h", "now+1h", 10);
// same as dataset's lineage
Assert.assertEquals(expected.getRelations(), lineage.getRelations());
// Assert metadata
// Id.Flow needs conversion to Id.Program JIRA - CDAP-3658
Assert.assertEquals(toSet(new MetadataRecord(app, MetadataScope.USER, appProperties, appTags), new MetadataRecord(flow, MetadataScope.USER, flowProperties, flowTags), new MetadataRecord(dataset, MetadataScope.USER, dataProperties, dataTags), new MetadataRecord(stream, MetadataScope.USER, streamProperties, streamTags)), fetchRunMetadata(flow.run(flowRunId.getId())));
// Assert with a time range after the flow run should return no results
long laterStartTime = stopTime + 1000;
long laterEndTime = stopTime + 5000;
// Fetch stream lineage
lineage = fetchLineage(stream, laterStartTime, laterEndTime, 10);
Assert.assertEquals(LineageSerializer.toLineageRecord(laterStartTime, laterEndTime, new Lineage(ImmutableSet.<Relation>of()), Collections.<CollapseType>emptySet()), lineage);
// Assert with a time range before the flow run should return no results
long earlierStartTime = startTime - 5000;
long earlierEndTime = startTime - 1000;
// Fetch stream lineage
lineage = fetchLineage(stream, earlierStartTime, earlierEndTime, 10);
Assert.assertEquals(LineageSerializer.toLineageRecord(earlierStartTime, earlierEndTime, new Lineage(ImmutableSet.<Relation>of()), Collections.<CollapseType>emptySet()), lineage);
// Test bad time ranges
fetchLineage(dataset, "sometime", "sometime", 10, BadRequestException.class);
fetchLineage(dataset, "now+1h", "now-1h", 10, BadRequestException.class);
// Test non-existent run
assertRunMetadataNotFound(flow.run(RunIds.generate(1000).getId()));
} finally {
namespaceClient.delete(namespace);
}
}
Aggregations