Search in sources :

Example 31 with RunId

use of org.apache.twill.api.RunId in project cdap by caskdata.

the class LineageAdmin method doComputeLineage.

private Lineage doComputeLineage(final NamespacedEntityId sourceData, long startMillis, long endMillis, int levels, @Nullable String rollup) throws NotFoundException {
    LOG.trace("Computing lineage for data {}, startMillis {}, endMillis {}, levels {}", sourceData, startMillis, endMillis, levels);
    // Convert start time and end time period into scan keys in terms of program start times.
    Set<RunId> runningInRange = store.getRunningInRange(TimeUnit.MILLISECONDS.toSeconds(startMillis), TimeUnit.MILLISECONDS.toSeconds(endMillis));
    if (LOG.isTraceEnabled()) {
        LOG.trace("Got {} rundIds in time range ({}, {})", runningInRange.size(), startMillis, endMillis);
    }
    ScanRangeWithFilter scanRange = getScanRange(runningInRange);
    LOG.trace("Using scan start = {}, scan end = {}", scanRange.getStart(), scanRange.getEnd());
    Multimap<RelationKey, Relation> relations = HashMultimap.create();
    Set<NamespacedEntityId> visitedDatasets = new HashSet<>();
    Set<NamespacedEntityId> toVisitDatasets = new HashSet<>();
    Set<ProgramId> visitedPrograms = new HashSet<>();
    Set<ProgramId> toVisitPrograms = new HashSet<>();
    toVisitDatasets.add(sourceData);
    for (int i = 0; i < levels; ++i) {
        LOG.trace("Level {}", i);
        toVisitPrograms.clear();
        for (NamespacedEntityId d : toVisitDatasets) {
            if (visitedDatasets.add(d)) {
                LOG.trace("Visiting dataset {}", d);
                // Fetch related programs
                Iterable<Relation> programRelations = getProgramRelations(d, scanRange.getStart(), scanRange.getEnd(), scanRange.getFilter());
                LOG.trace("Got program relations {}", programRelations);
                for (Relation relation : programRelations) {
                    relations.put(new RelationKey(relation), relation);
                }
                Iterables.addAll(toVisitPrograms, Iterables.transform(programRelations, RELATION_TO_PROGRAM_FUNCTION));
            }
        }
        toVisitDatasets.clear();
        for (ProgramId p : toVisitPrograms) {
            if (visitedPrograms.add(p)) {
                LOG.trace("Visiting program {}", p);
                // Fetch related datasets
                Iterable<Relation> datasetRelations = lineageStoreReader.getRelations(p, scanRange.getStart(), scanRange.getEnd(), scanRange.getFilter());
                LOG.trace("Got data relations {}", datasetRelations);
                for (Relation relation : datasetRelations) {
                    relations.put(new RelationKey(relation), relation);
                }
                Iterables.addAll(toVisitDatasets, Iterables.transform(datasetRelations, RELATION_TO_DATA_FUNCTION));
            }
        }
    }
    if (rollup != null && rollup.contains("workflow")) {
        relations = doComputeRollupLineage(relations);
    }
    Lineage lineage = new Lineage(Iterables.concat(Maps.transformValues(relations.asMap(), COLLAPSE_UNKNOWN_TYPE_FUNCTION).values()));
    LOG.trace("Got lineage {}", lineage);
    return lineage;
}
Also used : Lineage(co.cask.cdap.data2.metadata.lineage.Lineage) ProgramId(co.cask.cdap.proto.id.ProgramId) Relation(co.cask.cdap.data2.metadata.lineage.Relation) NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) RunId(org.apache.twill.api.RunId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) HashSet(java.util.HashSet)

Example 32 with RunId

use of org.apache.twill.api.RunId in project cdap by caskdata.

the class LineageAdmin method getScanRange.

/**
   * Convert a set of runIds into a scan range based on earliest runtime and latest runtime of runIds.
   * Also, add a scan filter to include only runIds in the given set.
   * @param runIds input runIds set
   * @return scan range
   */
@VisibleForTesting
static ScanRangeWithFilter getScanRange(final Set<RunId> runIds) {
    if (runIds.isEmpty()) {
        return new ScanRangeWithFilter(0, 0, Predicates.<Relation>alwaysFalse());
    }
    // Pick the earliest start time and latest start time for lineage range
    long earliest = Long.MAX_VALUE;
    long latest = 0;
    for (RunId runId : runIds) {
        long runStartTime = RunIds.getTime(runId, TimeUnit.MILLISECONDS);
        if (runStartTime < earliest) {
            earliest = runStartTime;
        }
        if (runStartTime > latest) {
            latest = runStartTime;
        }
    }
    // scan end key is exclusive, so need to add 1 to  to include the last runid
    return new ScanRangeWithFilter(earliest, latest + 1, new Predicate<Relation>() {

        @Override
        public boolean apply(Relation input) {
            return runIds.contains(input.getRun());
        }
    });
}
Also used : Relation(co.cask.cdap.data2.metadata.lineage.Relation) RunId(org.apache.twill.api.RunId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 33 with RunId

use of org.apache.twill.api.RunId in project cdap by caskdata.

the class WorkflowDataset method getRecord.

@Nullable
WorkflowRunRecord getRecord(WorkflowId id, String pid) {
    RunId runId = RunIds.fromString(pid);
    long startTime = RunIds.getTime(runId, TimeUnit.SECONDS);
    MDSKey mdsKey = getRowKeyBuilder(id, startTime).build();
    byte[] startRowKey = mdsKey.getKey();
    Row indexRow = table.get(startRowKey);
    if (indexRow.isEmpty()) {
        return null;
    }
    Map<byte[], byte[]> columns = indexRow.getColumns();
    String workflowRunId = Bytes.toString(columns.get(RUNID));
    long timeTaken = Bytes.toLong(columns.get(TIME_TAKEN));
    List<ProgramRun> actionRunsList = GSON.fromJson(Bytes.toString(columns.get(NODES)), PROGRAM_RUNS_TYPE);
    return new WorkflowRunRecord(workflowRunId, timeTaken, actionRunsList);
}
Also used : MDSKey(co.cask.cdap.data2.dataset2.lib.table.MDSKey) Row(co.cask.cdap.api.dataset.table.Row) RunId(org.apache.twill.api.RunId) Nullable(javax.annotation.Nullable)

Example 34 with RunId

use of org.apache.twill.api.RunId in project cdap by caskdata.

the class WorkerProgramRunner method run.

@Override
public ProgramController run(Program program, ProgramOptions options) {
    ApplicationSpecification appSpec = program.getApplicationSpecification();
    Preconditions.checkNotNull(appSpec, "Missing application specification.");
    int instanceId = Integer.parseInt(options.getArguments().getOption(ProgramOptionConstants.INSTANCE_ID, "-1"));
    Preconditions.checkArgument(instanceId >= 0, "Missing instance Id");
    int instanceCount = Integer.parseInt(options.getArguments().getOption(ProgramOptionConstants.INSTANCES, "0"));
    Preconditions.checkArgument(instanceCount > 0, "Invalid or missing instance count");
    RunId runId = ProgramRunners.getRunId(options);
    ProgramType programType = program.getType();
    Preconditions.checkNotNull(programType, "Missing processor type.");
    Preconditions.checkArgument(programType == ProgramType.WORKER, "Only Worker process type is supported.");
    WorkerSpecification workerSpec = appSpec.getWorkers().get(program.getName());
    Preconditions.checkArgument(workerSpec != null, "Missing Worker specification for %s", program.getId());
    String instances = options.getArguments().getOption(ProgramOptionConstants.INSTANCES, String.valueOf(workerSpec.getInstances()));
    WorkerSpecification newWorkerSpec = new WorkerSpecification(workerSpec.getClassName(), workerSpec.getName(), workerSpec.getDescription(), workerSpec.getProperties(), workerSpec.getDatasets(), workerSpec.getResources(), Integer.valueOf(instances));
    // Setup dataset framework context, if required
    if (datasetFramework instanceof ProgramContextAware) {
        ProgramId programId = program.getId();
        ((ProgramContextAware) datasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
    }
    final PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
    try {
        BasicWorkerContext context = new BasicWorkerContext(newWorkerSpec, program, options, cConf, instanceId, instanceCount, metricsCollectionService, datasetFramework, txClient, discoveryServiceClient, streamWriterFactory, pluginInstantiator, secureStore, secureStoreManager, messagingService);
        WorkerDriver worker = new WorkerDriver(program, newWorkerSpec, context);
        // Add a service listener to make sure the plugin instantiator is closed when the worker driver finished.
        worker.addListener(new ServiceListenerAdapter() {

            @Override
            public void terminated(Service.State from) {
                Closeables.closeQuietly(pluginInstantiator);
            }

            @Override
            public void failed(Service.State from, Throwable failure) {
                Closeables.closeQuietly(pluginInstantiator);
            }
        }, Threads.SAME_THREAD_EXECUTOR);
        ProgramController controller = new WorkerControllerServiceAdapter(worker, program.getId(), runId, workerSpec.getName() + "-" + instanceId);
        worker.start();
        return controller;
    } catch (Throwable t) {
        Closeables.closeQuietly(pluginInstantiator);
        throw t;
    }
}
Also used : ApplicationSpecification(co.cask.cdap.api.app.ApplicationSpecification) ProgramController(co.cask.cdap.app.runtime.ProgramController) WorkerSpecification(co.cask.cdap.api.worker.WorkerSpecification) MessagingService(co.cask.cdap.messaging.MessagingService) MetricsCollectionService(co.cask.cdap.api.metrics.MetricsCollectionService) Service(com.google.common.util.concurrent.Service) ServiceListenerAdapter(org.apache.twill.internal.ServiceListenerAdapter) ProgramId(co.cask.cdap.proto.id.ProgramId) BasicProgramContext(co.cask.cdap.internal.app.runtime.BasicProgramContext) PluginInstantiator(co.cask.cdap.internal.app.runtime.plugin.PluginInstantiator) ProgramType(co.cask.cdap.proto.ProgramType) RunId(org.apache.twill.api.RunId) ProgramContextAware(co.cask.cdap.data.ProgramContextAware)

Example 35 with RunId

use of org.apache.twill.api.RunId in project cdap by caskdata.

the class ProgramLifecycleService method findRuntimeInfo.

private List<ProgramRuntimeService.RuntimeInfo> findRuntimeInfo(ProgramId programId, @Nullable String runId) throws BadRequestException {
    if (runId != null) {
        RunId run;
        try {
            run = RunIds.fromString(runId);
        } catch (IllegalArgumentException e) {
            throw new BadRequestException("Error parsing run-id.", e);
        }
        ProgramRuntimeService.RuntimeInfo runtimeInfo = runtimeService.lookup(programId, run);
        return runtimeInfo == null ? Collections.<RuntimeInfo>emptyList() : Collections.singletonList(runtimeInfo);
    }
    return new ArrayList<>(runtimeService.list(programId).values());
}
Also used : ArrayList(java.util.ArrayList) BadRequestException(co.cask.cdap.common.BadRequestException) RuntimeInfo(co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo) RunId(org.apache.twill.api.RunId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) ProgramRuntimeService(co.cask.cdap.app.runtime.ProgramRuntimeService)

Aggregations

RunId (org.apache.twill.api.RunId)49 ProgramId (co.cask.cdap.proto.id.ProgramId)35 ProgramRunId (co.cask.cdap.proto.id.ProgramRunId)21 Test (org.junit.Test)19 ApplicationId (co.cask.cdap.proto.id.ApplicationId)13 ProgramType (co.cask.cdap.proto.ProgramType)12 ApplicationSpecification (co.cask.cdap.api.app.ApplicationSpecification)10 DatasetId (co.cask.cdap.proto.id.DatasetId)9 ProgramContextAware (co.cask.cdap.data.ProgramContextAware)6 BasicProgramContext (co.cask.cdap.internal.app.runtime.BasicProgramContext)6 NamespaceId (co.cask.cdap.proto.id.NamespaceId)6 Service (com.google.common.util.concurrent.Service)6 HashSet (java.util.HashSet)6 MetricsCollectionService (co.cask.cdap.api.metrics.MetricsCollectionService)5 ProgramController (co.cask.cdap.app.runtime.ProgramController)5 MessagingService (co.cask.cdap.messaging.MessagingService)5 NamespacedEntityId (co.cask.cdap.proto.id.NamespacedEntityId)5 StreamId (co.cask.cdap.proto.id.StreamId)5 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5