use of io.cdap.cdap.data2.metadata.lineage.Relation in project cdap by cdapio.
the class LineageCollapserTest method testCollapseComponent.
@Test
public void testCollapseComponent() {
Set<Relation> relations = ImmutableSet.of(new Relation(data1, service1, AccessType.READ, runId1), new Relation(data1, service1, AccessType.WRITE, runId1), new Relation(data1, service1, AccessType.READ, runId1));
// Collapse on component
Assert.assertEquals(toSet(new CollapsedRelation(data1, service1, toSet(AccessType.READ), toSet(runId1), Collections.emptySet()), new CollapsedRelation(data1, service1, toSet(AccessType.WRITE), toSet(runId1), Collections.emptySet())), LineageCollapser.collapseRelations(relations, ImmutableSet.of(CollapseType.COMPONENT)));
}
use of io.cdap.cdap.data2.metadata.lineage.Relation in project cdap by cdapio.
the class LineageCollapserTest method testCollapseAccess.
@Test
public void testCollapseAccess() {
Set<Relation> relations = ImmutableSet.of(new Relation(data1, service1, AccessType.READ, runId1), new Relation(data1, service1, AccessType.WRITE, runId1), new Relation(data1, service1, AccessType.READ, runId1));
// Collapse on access
Assert.assertEquals(toSet(new CollapsedRelation(data1, service1, toSet(AccessType.READ, AccessType.WRITE), toSet(runId1), Collections.emptySet())), LineageCollapser.collapseRelations(relations, ImmutableSet.of(CollapseType.ACCESS)));
}
use of io.cdap.cdap.data2.metadata.lineage.Relation in project cdap by cdapio.
the class LineageCollapserTest method testCollapseMulti.
@Test
public void testCollapseMulti() {
Set<Relation> relations = ImmutableSet.of(new Relation(data1, service1, AccessType.READ, runId1), new Relation(data1, service1, AccessType.WRITE, runId1), new Relation(data1, service1, AccessType.READ, runId1), new Relation(data1, service2, AccessType.READ, runId1), new Relation(data1, service2, AccessType.READ, runId1), new Relation(data2, service1, AccessType.READ, runId1), new Relation(data2, service1, AccessType.READ, runId1));
// Collapse on access
Assert.assertEquals(toSet(new CollapsedRelation(data1, service1, toSet(AccessType.READ, AccessType.WRITE), toSet(runId1), Collections.emptySet()), new CollapsedRelation(data1, service2, toSet(AccessType.READ), toSet(runId1), Collections.emptySet()), new CollapsedRelation(data2, service1, toSet(AccessType.READ), toSet(runId1), Collections.emptySet())), LineageCollapser.collapseRelations(relations, ImmutableSet.of(CollapseType.ACCESS)));
}
use of io.cdap.cdap.data2.metadata.lineage.Relation in project cdap by cdapio.
the class LineageCollapserTest method testCollapseRun.
@Test
public void testCollapseRun() {
Set<Relation> relations = ImmutableSet.of(new Relation(data1, service1, AccessType.READ, runId1), new Relation(data1, service1, AccessType.WRITE, runId1), new Relation(data1, service1, AccessType.READ, runId2));
// Collapse on run
Assert.assertEquals(toSet(new CollapsedRelation(data1, service1, toSet(AccessType.READ), toSet(runId1, runId2), Collections.emptySet()), new CollapsedRelation(data1, service1, toSet(AccessType.WRITE), toSet(runId1), Collections.emptySet())), LineageCollapser.collapseRelations(relations, ImmutableSet.of(CollapseType.RUN)));
}
use of io.cdap.cdap.data2.metadata.lineage.Relation in project cdap by cdapio.
the class LineageHttpHandlerTestRun method testAllProgramsLineage.
@Test
public void testAllProgramsLineage() throws Exception {
NamespaceId namespace = new NamespaceId("testAllProgramsLineage");
ApplicationId app = namespace.app(AllProgramsApp.NAME);
ProgramId mapreduce = app.mr(AllProgramsApp.NoOpMR.NAME);
ProgramId mapreduce2 = app.mr(AllProgramsApp.NoOpMR2.NAME);
ProgramId spark = app.spark(AllProgramsApp.NoOpSpark.NAME);
ProgramId service = app.service(AllProgramsApp.NoOpService.NAME);
ProgramId worker = app.worker(AllProgramsApp.NoOpWorker.NAME);
ProgramId workflow = app.workflow(AllProgramsApp.NoOpWorkflow.NAME);
DatasetId dataset = namespace.dataset(AllProgramsApp.DATASET_NAME);
DatasetId dataset2 = namespace.dataset(AllProgramsApp.DATASET_NAME2);
DatasetId dataset3 = namespace.dataset(AllProgramsApp.DATASET_NAME3);
namespaceClient.create(new NamespaceMeta.Builder().setName(namespace.getNamespace()).build());
try {
appClient.deploy(namespace, createAppJarFile(AllProgramsApp.class));
// Add metadata
ImmutableSet<String> sparkTags = ImmutableSet.of("spark-tag1", "spark-tag2");
addTags(spark, sparkTags);
Assert.assertEquals(sparkTags, getTags(spark, MetadataScope.USER));
ImmutableSet<String> workerTags = ImmutableSet.of("worker-tag1");
addTags(worker, workerTags);
Assert.assertEquals(workerTags, getTags(worker, MetadataScope.USER));
ImmutableMap<String, String> datasetProperties = ImmutableMap.of("data-key1", "data-value1");
addProperties(dataset, datasetProperties);
Assert.assertEquals(datasetProperties, getProperties(dataset, MetadataScope.USER));
// Start all programs
RunId mrRunId = runAndWait(mapreduce);
RunId mrRunId2 = runAndWait(mapreduce2);
RunId sparkRunId = runAndWait(spark);
runAndWait(workflow);
RunId workflowMrRunId = getRunId(mapreduce, mrRunId);
RunId serviceRunId = runAndWait(service);
// Worker makes a call to service to make it access datasets,
// hence need to make sure service starts before worker, and stops after it.
RunId workerRunId = runAndWait(worker);
// Wait for programs to finish
waitForStop(mapreduce, false);
waitForStop(mapreduce2, false);
waitForStop(spark, false);
waitForStop(workflow, false);
waitForStop(worker, false);
waitForStop(service, true);
long now = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis());
long oneHour = TimeUnit.HOURS.toSeconds(1);
// Fetch dataset lineage
LineageRecord lineage = fetchLineage(dataset, now - oneHour, now + oneHour, toSet(CollapseType.ACCESS), 10);
// dataset is accessed by all programs
LineageRecord expected = LineageSerializer.toLineageRecord(now - oneHour, now + oneHour, new Lineage(ImmutableSet.of(// Dataset access
new Relation(dataset, mapreduce, AccessType.WRITE, mrRunId), new Relation(dataset3, mapreduce, AccessType.READ, mrRunId), new Relation(dataset, mapreduce2, AccessType.WRITE, mrRunId2), new Relation(dataset2, mapreduce2, AccessType.READ, mrRunId2), new Relation(dataset, spark, AccessType.READ, sparkRunId), new Relation(dataset2, spark, AccessType.WRITE, sparkRunId), new Relation(dataset3, spark, AccessType.READ, sparkRunId), new Relation(dataset3, spark, AccessType.WRITE, sparkRunId), new Relation(dataset, mapreduce, AccessType.WRITE, workflowMrRunId), new Relation(dataset3, mapreduce, AccessType.READ, workflowMrRunId), new Relation(dataset, service, AccessType.WRITE, serviceRunId), new Relation(dataset, worker, AccessType.WRITE, workerRunId))), toSet(CollapseType.ACCESS));
Assert.assertEquals(expected, lineage);
} finally {
namespaceClient.delete(namespace);
}
}
Aggregations