use of io.cdap.cdap.proto.id.ProgramRunId in project cdap by caskdata.
the class LineageAdmin method doComputeLineage.
private Lineage doComputeLineage(DatasetId sourceData, long startMillis, long endMillis, int levels, @Nullable String rollup) {
LOG.trace("Computing lineage for data {}, startMillis {}, endMillis {}, levels {}", sourceData, startMillis, endMillis, levels);
boolean rollUpWorkflow = rollup != null && rollup.contains("workflow");
// Convert start time and end time period into scan keys in terms of program start times.
Set<RunId> runningInRange = store.getRunningInRange(TimeUnit.MILLISECONDS.toSeconds(startMillis), TimeUnit.MILLISECONDS.toSeconds(endMillis));
LOG.trace("Got {} rundIds in time range ({}, {})", runningInRange.size(), startMillis, endMillis);
ScanRangeWithFilter scanRange = getScanRange(runningInRange);
LOG.trace("Using scan start = {}, scan end = {}", scanRange.getStart(), scanRange.getEnd());
Multimap<RelationKey, Relation> relations = HashMultimap.create();
Set<DatasetId> visitedDatasets = new HashSet<>();
Set<DatasetId> toVisitDatasets = new HashSet<>();
Set<ProgramId> visitedPrograms = new HashSet<>();
Set<ProgramId> toVisitPrograms = new HashSet<>();
// this map is to map the inner program run id to the workflow run id, this is needed to collapse the inner
// program and local datasets
Map<ProgramRunId, ProgramRunId> programWorkflowMap = new HashMap<>();
toVisitDatasets.add(sourceData);
for (int i = 0; i < levels; ++i) {
LOG.trace("Level {}", i);
toVisitPrograms.clear();
for (DatasetId d : toVisitDatasets) {
if (visitedDatasets.add(d)) {
LOG.trace("Visiting dataset {}", d);
// Fetch related programs, the programs will be the inner programs which access the datasets. For example,
// mapreduce or spark program in a workflow
Set<Relation> programRelations = lineageStoreReader.getRelations(d, scanRange.getStart(), scanRange.getEnd(), scanRange.getFilter());
LOG.trace("Got program relations {}", programRelations);
// determine if a dataset is local dataset. The local dataset always ends with the workflow run id
if (rollUpWorkflow) {
computeWorkflowInnerPrograms(toVisitPrograms, programWorkflowMap, programRelations);
}
// add to the relations, replace the inner program with the workflow using the map, ignore the
// local datasets relations, the local dataset always ends with the run id of the workflow
filterAndAddRelations(rollUpWorkflow, relations, programWorkflowMap, programRelations);
toVisitPrograms.addAll(programRelations.stream().map(Relation::getProgram).collect(Collectors.toSet()));
}
}
toVisitDatasets.clear();
for (ProgramId p : toVisitPrograms) {
if (visitedPrograms.add(p)) {
LOG.trace("Visiting program {}", p);
// Fetch related datasets
Set<Relation> datasetRelations = lineageStoreReader.getRelations(p, scanRange.getStart(), scanRange.getEnd(), scanRange.getFilter());
LOG.trace("Got data relations {}", datasetRelations);
Set<DatasetId> localDatasets = filterAndAddRelations(rollUpWorkflow, relations, programWorkflowMap, datasetRelations);
toVisitDatasets.addAll(datasetRelations.stream().map(relation -> (DatasetId) relation.getData()).filter(datasetId -> !localDatasets.contains(datasetId)).collect(Collectors.toSet()));
}
}
}
Lineage lineage = new Lineage(Iterables.concat(Maps.transformValues(relations.asMap(), COLLAPSE_UNKNOWN_TYPE_FUNCTION::apply).values()));
LOG.trace("Got lineage {}", lineage);
return lineage;
}
use of io.cdap.cdap.proto.id.ProgramRunId in project cdap by caskdata.
the class SparkTwillRunnableModuleTest method testSpark.
@Test
public void testSpark() {
ProgramRunId programRunId = NamespaceId.DEFAULT.app("test").spark("spark").run(RunIds.generate());
for (ClusterMode mode : ClusterMode.values()) {
Module module = new SparkTwillRunnable("spark") {
@Override
protected ServiceAnnouncer getServiceAnnouncer() {
return new MockTwillContext();
}
}.createModule(CConfiguration.create(), new Configuration(), createProgramOptions(programRunId, mode), programRunId);
Injector injector = Guice.createInjector(module);
injector.getInstance(SparkProgramRunner.class);
injector.getInstance(ExploreClient.class);
Injector contextInjector = SparkRuntimeContextProvider.createInjector(CConfiguration.create(), new Configuration(), programRunId.getParent(), createProgramOptions(programRunId, mode));
contextInjector.getInstance(PluginFinder.class);
contextInjector.getInstance(ExploreClient.class);
}
}
use of io.cdap.cdap.proto.id.ProgramRunId in project cdap by caskdata.
the class ProgramTwillRunnableModuleTest method testService.
@Test
public void testService() {
ProgramRunId programRunId = NamespaceId.DEFAULT.app("test").service("service").run(RunIds.generate());
for (ClusterMode mode : ClusterMode.values()) {
Module module = new ServiceTwillRunnable("service") {
@Override
protected ServiceAnnouncer getServiceAnnouncer() {
return new MockTwillContext();
}
}.createModule(CConfiguration.create(), new Configuration(), createProgramOptions(programRunId, mode), programRunId);
Injector injector = Guice.createInjector(module);
injector.getInstance(ServiceProgramRunner.class);
injector.getInstance(ExploreClient.class);
}
}
use of io.cdap.cdap.proto.id.ProgramRunId in project cdap by caskdata.
the class ProgramTwillRunnableModuleTest method testWorkflow.
@Test
public void testWorkflow() {
ProgramRunId programRunId = NamespaceId.DEFAULT.app("test").workflow("workflow").run(RunIds.generate());
for (ClusterMode mode : ClusterMode.values()) {
Module module = new WorkflowTwillRunnable("workflow").createModule(CConfiguration.create(), new Configuration(), createProgramOptions(programRunId, mode), programRunId);
Injector injector = Guice.createInjector(module);
injector.getInstance(WorkflowProgramRunner.class);
// Workflow supports spark, which supports PluginFinder
injector.getInstance(PluginFinder.class);
injector.getInstance(ExploreClient.class);
}
}
use of io.cdap.cdap.proto.id.ProgramRunId in project cdap by caskdata.
the class ProgramTwillRunnableModuleTest method testMapReduce.
@Test
public void testMapReduce() {
ProgramRunId programRunId = NamespaceId.DEFAULT.app("test").mr("mapreduce").run(RunIds.generate());
for (ClusterMode mode : ClusterMode.values()) {
Module module = new MapReduceTwillRunnable("mapreduce").createModule(CConfiguration.create(), new Configuration(), createProgramOptions(programRunId, mode), programRunId);
Injector injector = Guice.createInjector(module);
injector.getInstance(MapReduceProgramRunner.class);
injector.getInstance(ExploreClient.class);
}
}
Aggregations