use of org.apache.twill.api.RunId in project cdap by caskdata.
the class LineageAdmin method doComputeLineage.
private Lineage doComputeLineage(final NamespacedEntityId sourceData, long startMillis, long endMillis, int levels, @Nullable String rollup) throws NotFoundException {
LOG.trace("Computing lineage for data {}, startMillis {}, endMillis {}, levels {}", sourceData, startMillis, endMillis, levels);
// Convert start time and end time period into scan keys in terms of program start times.
Set<RunId> runningInRange = store.getRunningInRange(TimeUnit.MILLISECONDS.toSeconds(startMillis), TimeUnit.MILLISECONDS.toSeconds(endMillis));
if (LOG.isTraceEnabled()) {
LOG.trace("Got {} rundIds in time range ({}, {})", runningInRange.size(), startMillis, endMillis);
}
ScanRangeWithFilter scanRange = getScanRange(runningInRange);
LOG.trace("Using scan start = {}, scan end = {}", scanRange.getStart(), scanRange.getEnd());
Multimap<RelationKey, Relation> relations = HashMultimap.create();
Set<NamespacedEntityId> visitedDatasets = new HashSet<>();
Set<NamespacedEntityId> toVisitDatasets = new HashSet<>();
Set<ProgramId> visitedPrograms = new HashSet<>();
Set<ProgramId> toVisitPrograms = new HashSet<>();
toVisitDatasets.add(sourceData);
for (int i = 0; i < levels; ++i) {
LOG.trace("Level {}", i);
toVisitPrograms.clear();
for (NamespacedEntityId d : toVisitDatasets) {
if (visitedDatasets.add(d)) {
LOG.trace("Visiting dataset {}", d);
// Fetch related programs
Iterable<Relation> programRelations = getProgramRelations(d, scanRange.getStart(), scanRange.getEnd(), scanRange.getFilter());
LOG.trace("Got program relations {}", programRelations);
for (Relation relation : programRelations) {
relations.put(new RelationKey(relation), relation);
}
Iterables.addAll(toVisitPrograms, Iterables.transform(programRelations, RELATION_TO_PROGRAM_FUNCTION));
}
}
toVisitDatasets.clear();
for (ProgramId p : toVisitPrograms) {
if (visitedPrograms.add(p)) {
LOG.trace("Visiting program {}", p);
// Fetch related datasets
Iterable<Relation> datasetRelations = lineageStoreReader.getRelations(p, scanRange.getStart(), scanRange.getEnd(), scanRange.getFilter());
LOG.trace("Got data relations {}", datasetRelations);
for (Relation relation : datasetRelations) {
relations.put(new RelationKey(relation), relation);
}
Iterables.addAll(toVisitDatasets, Iterables.transform(datasetRelations, RELATION_TO_DATA_FUNCTION));
}
}
}
if (rollup != null && rollup.contains("workflow")) {
relations = doComputeRollupLineage(relations);
}
Lineage lineage = new Lineage(Iterables.concat(Maps.transformValues(relations.asMap(), COLLAPSE_UNKNOWN_TYPE_FUNCTION).values()));
LOG.trace("Got lineage {}", lineage);
return lineage;
}
use of org.apache.twill.api.RunId in project cdap by caskdata.
the class LineageAdmin method getScanRange.
/**
* Convert a set of runIds into a scan range based on earliest runtime and latest runtime of runIds.
* Also, add a scan filter to include only runIds in the given set.
* @param runIds input runIds set
* @return scan range
*/
@VisibleForTesting
static ScanRangeWithFilter getScanRange(final Set<RunId> runIds) {
if (runIds.isEmpty()) {
return new ScanRangeWithFilter(0, 0, Predicates.<Relation>alwaysFalse());
}
// Pick the earliest start time and latest start time for lineage range
long earliest = Long.MAX_VALUE;
long latest = 0;
for (RunId runId : runIds) {
long runStartTime = RunIds.getTime(runId, TimeUnit.MILLISECONDS);
if (runStartTime < earliest) {
earliest = runStartTime;
}
if (runStartTime > latest) {
latest = runStartTime;
}
}
// scan end key is exclusive, so need to add 1 to to include the last runid
return new ScanRangeWithFilter(earliest, latest + 1, new Predicate<Relation>() {
@Override
public boolean apply(Relation input) {
return runIds.contains(input.getRun());
}
});
}
use of org.apache.twill.api.RunId in project cdap by caskdata.
the class WorkflowDataset method getRecord.
@Nullable
WorkflowRunRecord getRecord(WorkflowId id, String pid) {
RunId runId = RunIds.fromString(pid);
long startTime = RunIds.getTime(runId, TimeUnit.SECONDS);
MDSKey mdsKey = getRowKeyBuilder(id, startTime).build();
byte[] startRowKey = mdsKey.getKey();
Row indexRow = table.get(startRowKey);
if (indexRow.isEmpty()) {
return null;
}
Map<byte[], byte[]> columns = indexRow.getColumns();
String workflowRunId = Bytes.toString(columns.get(RUNID));
long timeTaken = Bytes.toLong(columns.get(TIME_TAKEN));
List<ProgramRun> actionRunsList = GSON.fromJson(Bytes.toString(columns.get(NODES)), PROGRAM_RUNS_TYPE);
return new WorkflowRunRecord(workflowRunId, timeTaken, actionRunsList);
}
use of org.apache.twill.api.RunId in project cdap by caskdata.
the class WorkerProgramRunner method run.
@Override
public ProgramController run(Program program, ProgramOptions options) {
ApplicationSpecification appSpec = program.getApplicationSpecification();
Preconditions.checkNotNull(appSpec, "Missing application specification.");
int instanceId = Integer.parseInt(options.getArguments().getOption(ProgramOptionConstants.INSTANCE_ID, "-1"));
Preconditions.checkArgument(instanceId >= 0, "Missing instance Id");
int instanceCount = Integer.parseInt(options.getArguments().getOption(ProgramOptionConstants.INSTANCES, "0"));
Preconditions.checkArgument(instanceCount > 0, "Invalid or missing instance count");
RunId runId = ProgramRunners.getRunId(options);
ProgramType programType = program.getType();
Preconditions.checkNotNull(programType, "Missing processor type.");
Preconditions.checkArgument(programType == ProgramType.WORKER, "Only Worker process type is supported.");
WorkerSpecification workerSpec = appSpec.getWorkers().get(program.getName());
Preconditions.checkArgument(workerSpec != null, "Missing Worker specification for %s", program.getId());
String instances = options.getArguments().getOption(ProgramOptionConstants.INSTANCES, String.valueOf(workerSpec.getInstances()));
WorkerSpecification newWorkerSpec = new WorkerSpecification(workerSpec.getClassName(), workerSpec.getName(), workerSpec.getDescription(), workerSpec.getProperties(), workerSpec.getDatasets(), workerSpec.getResources(), Integer.valueOf(instances));
// Setup dataset framework context, if required
if (datasetFramework instanceof ProgramContextAware) {
ProgramId programId = program.getId();
((ProgramContextAware) datasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
}
final PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
try {
BasicWorkerContext context = new BasicWorkerContext(newWorkerSpec, program, options, cConf, instanceId, instanceCount, metricsCollectionService, datasetFramework, txClient, discoveryServiceClient, streamWriterFactory, pluginInstantiator, secureStore, secureStoreManager, messagingService);
WorkerDriver worker = new WorkerDriver(program, newWorkerSpec, context);
// Add a service listener to make sure the plugin instantiator is closed when the worker driver finished.
worker.addListener(new ServiceListenerAdapter() {
@Override
public void terminated(Service.State from) {
Closeables.closeQuietly(pluginInstantiator);
}
@Override
public void failed(Service.State from, Throwable failure) {
Closeables.closeQuietly(pluginInstantiator);
}
}, Threads.SAME_THREAD_EXECUTOR);
ProgramController controller = new WorkerControllerServiceAdapter(worker, program.getId(), runId, workerSpec.getName() + "-" + instanceId);
worker.start();
return controller;
} catch (Throwable t) {
Closeables.closeQuietly(pluginInstantiator);
throw t;
}
}
use of org.apache.twill.api.RunId in project cdap by caskdata.
the class ProgramLifecycleService method findRuntimeInfo.
private List<ProgramRuntimeService.RuntimeInfo> findRuntimeInfo(ProgramId programId, @Nullable String runId) throws BadRequestException {
if (runId != null) {
RunId run;
try {
run = RunIds.fromString(runId);
} catch (IllegalArgumentException e) {
throw new BadRequestException("Error parsing run-id.", e);
}
ProgramRuntimeService.RuntimeInfo runtimeInfo = runtimeService.lookup(programId, run);
return runtimeInfo == null ? Collections.<RuntimeInfo>emptyList() : Collections.singletonList(runtimeInfo);
}
return new ArrayList<>(runtimeService.list(programId).values());
}
Aggregations