use of io.cdap.cdap.proto.metadata.lineage.LineageRecord in project cdap by cdapio.
the class LineageSerializer method toLineageRecord.
public static LineageRecord toLineageRecord(long start, long end, Lineage lineage, Set<CollapseType> collapseTypes) {
Set<RelationRecord> relationBuilder = new HashSet<>();
Map<String, ProgramRecord> programBuilder = new HashMap<>();
Map<String, DataRecord> dataBuilder = new HashMap<>();
Set<CollapsedRelation> collapsedRelations = LineageCollapser.collapseRelations(lineage.getRelations(), collapseTypes);
for (CollapsedRelation relation : collapsedRelations) {
String dataKey = makeDataKey(relation.getData());
String programKey = makeProgramKey(relation.getProgram());
RelationRecord relationRecord = new RelationRecord(dataKey, programKey, convertAccessType(relation.getAccess()), convertRuns(relation.getRuns()), convertComponents(relation.getComponents()));
relationBuilder.add(relationRecord);
programBuilder.put(programKey, new ProgramRecord(relation.getProgram()));
dataBuilder.put(dataKey, new DataRecord(relation.getData()));
}
return new LineageRecord(start, end, relationBuilder, programBuilder, dataBuilder);
}
use of io.cdap.cdap.proto.metadata.lineage.LineageRecord in project cdap by cdapio.
the class GetDatasetLineageCommand method perform.
@Override
public void perform(Arguments arguments, PrintStream output) throws Exception {
long currentTime = System.currentTimeMillis();
DatasetId dataset = cliConfig.getCurrentNamespace().dataset(arguments.get(ArgumentName.DATASET.toString()));
long start = getTimestamp(arguments.getOptional("start", "min"), currentTime);
long end = getTimestamp(arguments.getOptional("end", "max"), currentTime);
Integer levels = arguments.getIntOptional("levels", null);
LineageRecord lineage = client.getLineage(dataset, start, end, levels);
Table table = Table.builder().setHeader("start", "end", "relations", "programs", "data").setRows(Collections.<List<String>>singletonList(Lists.newArrayList(Long.toString(lineage.getStart()), Long.toString(lineage.getEnd()), GSON.toJson(lineage.getRelations()), GSON.toJson(lineage.getPrograms()), GSON.toJson(lineage.getData())))).build();
cliConfig.getTableRenderer().render(cliConfig, output, table);
}
use of io.cdap.cdap.proto.metadata.lineage.LineageRecord in project cdap by cdapio.
the class LineageHttpHandlerTestRun method testAllProgramsLineage.
@Test
public void testAllProgramsLineage() throws Exception {
NamespaceId namespace = new NamespaceId("testAllProgramsLineage");
ApplicationId app = namespace.app(AllProgramsApp.NAME);
ProgramId mapreduce = app.mr(AllProgramsApp.NoOpMR.NAME);
ProgramId mapreduce2 = app.mr(AllProgramsApp.NoOpMR2.NAME);
ProgramId spark = app.spark(AllProgramsApp.NoOpSpark.NAME);
ProgramId service = app.service(AllProgramsApp.NoOpService.NAME);
ProgramId worker = app.worker(AllProgramsApp.NoOpWorker.NAME);
ProgramId workflow = app.workflow(AllProgramsApp.NoOpWorkflow.NAME);
DatasetId dataset = namespace.dataset(AllProgramsApp.DATASET_NAME);
DatasetId dataset2 = namespace.dataset(AllProgramsApp.DATASET_NAME2);
DatasetId dataset3 = namespace.dataset(AllProgramsApp.DATASET_NAME3);
namespaceClient.create(new NamespaceMeta.Builder().setName(namespace.getNamespace()).build());
try {
appClient.deploy(namespace, createAppJarFile(AllProgramsApp.class));
// Add metadata
ImmutableSet<String> sparkTags = ImmutableSet.of("spark-tag1", "spark-tag2");
addTags(spark, sparkTags);
Assert.assertEquals(sparkTags, getTags(spark, MetadataScope.USER));
ImmutableSet<String> workerTags = ImmutableSet.of("worker-tag1");
addTags(worker, workerTags);
Assert.assertEquals(workerTags, getTags(worker, MetadataScope.USER));
ImmutableMap<String, String> datasetProperties = ImmutableMap.of("data-key1", "data-value1");
addProperties(dataset, datasetProperties);
Assert.assertEquals(datasetProperties, getProperties(dataset, MetadataScope.USER));
// Start all programs
RunId mrRunId = runAndWait(mapreduce);
RunId mrRunId2 = runAndWait(mapreduce2);
RunId sparkRunId = runAndWait(spark);
runAndWait(workflow);
RunId workflowMrRunId = getRunId(mapreduce, mrRunId);
RunId serviceRunId = runAndWait(service);
// Worker makes a call to service to make it access datasets,
// hence need to make sure service starts before worker, and stops after it.
RunId workerRunId = runAndWait(worker);
// Wait for programs to finish
waitForStop(mapreduce, false);
waitForStop(mapreduce2, false);
waitForStop(spark, false);
waitForStop(workflow, false);
waitForStop(worker, false);
waitForStop(service, true);
long now = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis());
long oneHour = TimeUnit.HOURS.toSeconds(1);
// Fetch dataset lineage
LineageRecord lineage = fetchLineage(dataset, now - oneHour, now + oneHour, toSet(CollapseType.ACCESS), 10);
// dataset is accessed by all programs
LineageRecord expected = LineageSerializer.toLineageRecord(now - oneHour, now + oneHour, new Lineage(ImmutableSet.of(// Dataset access
new Relation(dataset, mapreduce, AccessType.WRITE, mrRunId), new Relation(dataset3, mapreduce, AccessType.READ, mrRunId), new Relation(dataset, mapreduce2, AccessType.WRITE, mrRunId2), new Relation(dataset2, mapreduce2, AccessType.READ, mrRunId2), new Relation(dataset, spark, AccessType.READ, sparkRunId), new Relation(dataset2, spark, AccessType.WRITE, sparkRunId), new Relation(dataset3, spark, AccessType.READ, sparkRunId), new Relation(dataset3, spark, AccessType.WRITE, sparkRunId), new Relation(dataset, mapreduce, AccessType.WRITE, workflowMrRunId), new Relation(dataset3, mapreduce, AccessType.READ, workflowMrRunId), new Relation(dataset, service, AccessType.WRITE, serviceRunId), new Relation(dataset, worker, AccessType.WRITE, workerRunId))), toSet(CollapseType.ACCESS));
Assert.assertEquals(expected, lineage);
} finally {
namespaceClient.delete(namespace);
}
}
use of io.cdap.cdap.proto.metadata.lineage.LineageRecord in project cdap by caskdata.
the class LineageHttpHandlerTestRun method testAllProgramsLineage.
@Test
public void testAllProgramsLineage() throws Exception {
NamespaceId namespace = new NamespaceId("testAllProgramsLineage");
ApplicationId app = namespace.app(AllProgramsApp.NAME);
ProgramId mapreduce = app.mr(AllProgramsApp.NoOpMR.NAME);
ProgramId mapreduce2 = app.mr(AllProgramsApp.NoOpMR2.NAME);
ProgramId spark = app.spark(AllProgramsApp.NoOpSpark.NAME);
ProgramId service = app.service(AllProgramsApp.NoOpService.NAME);
ProgramId worker = app.worker(AllProgramsApp.NoOpWorker.NAME);
ProgramId workflow = app.workflow(AllProgramsApp.NoOpWorkflow.NAME);
DatasetId dataset = namespace.dataset(AllProgramsApp.DATASET_NAME);
DatasetId dataset2 = namespace.dataset(AllProgramsApp.DATASET_NAME2);
DatasetId dataset3 = namespace.dataset(AllProgramsApp.DATASET_NAME3);
namespaceClient.create(new NamespaceMeta.Builder().setName(namespace.getNamespace()).build());
try {
appClient.deploy(namespace, createAppJarFile(AllProgramsApp.class));
// Add metadata
ImmutableSet<String> sparkTags = ImmutableSet.of("spark-tag1", "spark-tag2");
addTags(spark, sparkTags);
Assert.assertEquals(sparkTags, getTags(spark, MetadataScope.USER));
ImmutableSet<String> workerTags = ImmutableSet.of("worker-tag1");
addTags(worker, workerTags);
Assert.assertEquals(workerTags, getTags(worker, MetadataScope.USER));
ImmutableMap<String, String> datasetProperties = ImmutableMap.of("data-key1", "data-value1");
addProperties(dataset, datasetProperties);
Assert.assertEquals(datasetProperties, getProperties(dataset, MetadataScope.USER));
// Start all programs
RunId mrRunId = runAndWait(mapreduce);
RunId mrRunId2 = runAndWait(mapreduce2);
RunId sparkRunId = runAndWait(spark);
runAndWait(workflow);
RunId workflowMrRunId = getRunId(mapreduce, mrRunId);
RunId serviceRunId = runAndWait(service);
// Worker makes a call to service to make it access datasets,
// hence need to make sure service starts before worker, and stops after it.
RunId workerRunId = runAndWait(worker);
// Wait for programs to finish
waitForStop(mapreduce, false);
waitForStop(mapreduce2, false);
waitForStop(spark, false);
waitForStop(workflow, false);
waitForStop(worker, false);
waitForStop(service, true);
long now = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis());
long oneHour = TimeUnit.HOURS.toSeconds(1);
// Fetch dataset lineage
LineageRecord lineage = fetchLineage(dataset, now - oneHour, now + oneHour, toSet(CollapseType.ACCESS), 10);
// dataset is accessed by all programs
LineageRecord expected = LineageSerializer.toLineageRecord(now - oneHour, now + oneHour, new Lineage(ImmutableSet.of(// Dataset access
new Relation(dataset, mapreduce, AccessType.WRITE, mrRunId), new Relation(dataset3, mapreduce, AccessType.READ, mrRunId), new Relation(dataset, mapreduce2, AccessType.WRITE, mrRunId2), new Relation(dataset2, mapreduce2, AccessType.READ, mrRunId2), new Relation(dataset, spark, AccessType.READ, sparkRunId), new Relation(dataset2, spark, AccessType.WRITE, sparkRunId), new Relation(dataset3, spark, AccessType.READ, sparkRunId), new Relation(dataset3, spark, AccessType.WRITE, sparkRunId), new Relation(dataset, mapreduce, AccessType.WRITE, workflowMrRunId), new Relation(dataset3, mapreduce, AccessType.READ, workflowMrRunId), new Relation(dataset, service, AccessType.WRITE, serviceRunId), new Relation(dataset, worker, AccessType.WRITE, workerRunId))), toSet(CollapseType.ACCESS));
Assert.assertEquals(expected, lineage);
} finally {
namespaceClient.delete(namespace);
}
}
use of io.cdap.cdap.proto.metadata.lineage.LineageRecord in project cdap by caskdata.
the class LineageSerializer method toLineageRecord.
public static LineageRecord toLineageRecord(long start, long end, Lineage lineage, Set<CollapseType> collapseTypes) {
Set<RelationRecord> relationBuilder = new HashSet<>();
Map<String, ProgramRecord> programBuilder = new HashMap<>();
Map<String, DataRecord> dataBuilder = new HashMap<>();
Set<CollapsedRelation> collapsedRelations = LineageCollapser.collapseRelations(lineage.getRelations(), collapseTypes);
for (CollapsedRelation relation : collapsedRelations) {
String dataKey = makeDataKey(relation.getData());
String programKey = makeProgramKey(relation.getProgram());
RelationRecord relationRecord = new RelationRecord(dataKey, programKey, convertAccessType(relation.getAccess()), convertRuns(relation.getRuns()), convertComponents(relation.getComponents()));
relationBuilder.add(relationRecord);
programBuilder.put(programKey, new ProgramRecord(relation.getProgram()));
dataBuilder.put(dataKey, new DataRecord(relation.getData()));
}
return new LineageRecord(start, end, relationBuilder, programBuilder, dataBuilder);
}
Aggregations