Search in sources :

Example 1 with RuntimeInfo

use of io.cdap.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo in project cdap by caskdata.

the class ProgramLifecycleService method updateLogLevels.

/**
 * Helper method to update log levels for Worker or Service.
 */
private void updateLogLevels(ProgramId programId, Map<String, LogEntry.Level> logLevels, @Nullable String runId) throws Exception {
    ProgramRuntimeService.RuntimeInfo runtimeInfo = findRuntimeInfo(programId, runId).values().stream().findFirst().orElse(null);
    if (runtimeInfo != null) {
        LogLevelUpdater logLevelUpdater = getLogLevelUpdater(runtimeInfo);
        logLevelUpdater.updateLogLevels(logLevels, null);
    }
}
Also used : RuntimeInfo(io.cdap.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo) LogLevelUpdater(io.cdap.cdap.app.runtime.LogLevelUpdater) ProgramRuntimeService(io.cdap.cdap.app.runtime.ProgramRuntimeService)

Example 2 with RuntimeInfo

use of io.cdap.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo in project cdap by caskdata.

the class ProgramLifecycleService method resetLogLevels.

/**
 * Helper method to reset log levels for Worker or Service.
 */
private void resetLogLevels(ProgramId programId, Set<String> loggerNames, @Nullable String runId) throws Exception {
    ProgramRuntimeService.RuntimeInfo runtimeInfo = findRuntimeInfo(programId, runId).values().stream().findFirst().orElse(null);
    if (runtimeInfo != null) {
        LogLevelUpdater logLevelUpdater = getLogLevelUpdater(runtimeInfo);
        logLevelUpdater.resetLogLevels(loggerNames, null);
    }
}
Also used : RuntimeInfo(io.cdap.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo) LogLevelUpdater(io.cdap.cdap.app.runtime.LogLevelUpdater) ProgramRuntimeService(io.cdap.cdap.app.runtime.ProgramRuntimeService)

Example 3 with RuntimeInfo

use of io.cdap.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo in project cdap by caskdata.

the class ProgramLifecycleService method setWorkerInstances.

private void setWorkerInstances(ProgramId programId, int instances) throws ExecutionException, InterruptedException, BadRequestException {
    int oldInstances = store.getWorkerInstances(programId);
    if (oldInstances != instances) {
        store.setWorkerInstances(programId, instances);
        ProgramRuntimeService.RuntimeInfo runtimeInfo = findRuntimeInfo(programId);
        if (runtimeInfo != null) {
            runtimeInfo.getController().command(ProgramOptionConstants.INSTANCES, ImmutableMap.of("runnable", programId.getProgram(), "newInstances", String.valueOf(instances), "oldInstances", String.valueOf(oldInstances))).get();
        }
    }
}
Also used : RuntimeInfo(io.cdap.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo) ProgramRuntimeService(io.cdap.cdap.app.runtime.ProgramRuntimeService)

Example 4 with RuntimeInfo

use of io.cdap.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo in project cdap by caskdata.

the class ProgramLifecycleService method setServiceInstances.

private void setServiceInstances(ProgramId programId, int instances) throws ExecutionException, InterruptedException, BadRequestException {
    int oldInstances = store.getServiceInstances(programId);
    if (oldInstances != instances) {
        store.setServiceInstances(programId, instances);
        ProgramRuntimeService.RuntimeInfo runtimeInfo = findRuntimeInfo(programId);
        if (runtimeInfo != null) {
            runtimeInfo.getController().command(ProgramOptionConstants.INSTANCES, ImmutableMap.of("runnable", programId.getProgram(), "newInstances", String.valueOf(instances), "oldInstances", String.valueOf(oldInstances))).get();
        }
    }
}
Also used : RuntimeInfo(io.cdap.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo) ProgramRuntimeService(io.cdap.cdap.app.runtime.ProgramRuntimeService)

Example 5 with RuntimeInfo

use of io.cdap.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo in project cdap by caskdata.

the class ProgramLifecycleService method issueStop.

/**
 * Issues a command to stop the specified {@link RunId} of the specified {@link ProgramId} and returns a
 * {@link ListenableFuture} with the {@link ProgramRunId} for the runs that were stopped.
 * Clients can wait for completion of the {@link ListenableFuture}.
 *
 * @param programId the {@link ProgramId program} to issue a stop for
 * @param runId the runId of the program run to stop. If null, all runs of the program as returned by
 *              {@link ProgramRuntimeService} are stopped.
 * @return a list of {@link ListenableFuture} with the {@link ProgramRunId} that clients can wait on for stop
 *         to complete.
 * @throws NotFoundException if the app, program or run was not found
 * @throws BadRequestException if an attempt is made to stop a program that is either not running or
 *                             was started by a workflow
 * @throws UnauthorizedException if the user issuing the command is not authorized to stop the program. To stop a
 *                               program, a user requires {@link ApplicationPermission#EXECUTE} permission on
 *                               the program.
 */
public List<ListenableFuture<ProgramRunId>> issueStop(ProgramId programId, @Nullable String runId) throws Exception {
    accessEnforcer.enforce(programId, authenticationContext.getPrincipal(), ApplicationPermission.EXECUTE);
    // See if the program is running as per the runtime service
    Map<RunId, RuntimeInfo> runtimeInfos = findRuntimeInfo(programId, runId);
    Map<ProgramRunId, RunRecordDetail> activeRunRecords = getActiveRuns(programId, runId);
    if (runtimeInfos.isEmpty() && activeRunRecords.isEmpty()) {
        // Error out if no run information from runtime service and from run record
        Store.ensureProgramExists(programId, store.getApplication(programId.getParent()));
        throw new BadRequestException(String.format("Program '%s' is not running.", programId));
    }
    // Stop the running program based on a combination of runtime info and run record
    // It's possible that some of them are not yet available from the runtimeService due to timing
    // differences between the run record was created vs being added to runtimeService
    // So we retry in a loop for up to 3 seconds max to cater for those cases
    Set<String> pendingStops = Stream.concat(runtimeInfos.keySet().stream().map(RunId::getId), activeRunRecords.keySet().stream().map(ProgramRunId::getRun)).collect(Collectors.toSet());
    List<ListenableFuture<ProgramRunId>> futures = new ArrayList<>();
    Stopwatch stopwatch = new Stopwatch().start();
    Set<ProgramRunId> cancelledProvisionRuns = new HashSet<>();
    while (!pendingStops.isEmpty() && stopwatch.elapsedTime(TimeUnit.SECONDS) < 3L) {
        Iterator<String> iterator = pendingStops.iterator();
        while (iterator.hasNext()) {
            ProgramRunId activeRunId = programId.run(iterator.next());
            RunRecordDetail runRecord = activeRunRecords.get(activeRunId);
            if (runRecord == null) {
                runRecord = store.getRun(activeRunId);
            }
            // Check if the program is actually started from workflow and the workflow is running
            if (runRecord != null && runRecord.getProperties().containsKey("workflowrunid") && runRecord.getStatus().equals(ProgramRunStatus.RUNNING)) {
                String workflowRunId = runRecord.getProperties().get("workflowrunid");
                throw new BadRequestException(String.format("Cannot stop the program '%s' started by the Workflow " + "run '%s'. Please stop the Workflow.", activeRunId, workflowRunId));
            }
            RuntimeInfo runtimeInfo = runtimeService.lookup(programId, RunIds.fromString(activeRunId.getRun()));
            // if there is a runtimeInfo, the run is in the 'starting' state or later
            if (runtimeInfo != null) {
                ListenableFuture<ProgramController> future = runtimeInfo.getController().stop();
                futures.add(Futures.transform(future, ProgramController::getProgramRunId));
                iterator.remove();
                // if it was in this set, it means we cancelled a task, but it had already sent a PROVISIONED message
                // by the time we cancelled it. We then waited for it to show up in the runtime service and got here.
                // We added a future for this run in the lines above, but we don't want to add another duplicate future
                // at the end of this loop, so remove this run from the cancelled provision runs.
                cancelledProvisionRuns.remove(activeRunId);
            } else {
                // if there is no runtimeInfo, the run could be in the provisioning state.
                Optional<ProvisioningTaskInfo> cancelledInfo = provisioningService.cancelProvisionTask(activeRunId);
                cancelledInfo.ifPresent(taskInfo -> {
                    cancelledProvisionRuns.add(activeRunId);
                    // This state check is to handle a race condition where we cancel the provision task, but not in time
                    // to prevent it from sending the PROVISIONED notification.
                    // If the notification was sent, but not yet consumed, we are *not* done stopping the run.
                    // We have to wait for the notification to be consumed, which will start the run, and place the controller
                    // in the runtimeService. The next time we loop, we can find it in the runtimeService and tell it to stop.
                    // If the notification was not sent, then we *are* done stopping the run.
                    // Therefore, if the state is CREATED, we don't remove it from the iterator so that the run will get
                    // checked again in the next loop, when we may get the controller from the runtimeService to stop it.
                    // No other task states have this race condition, as the PROVISIONED notification is only sent
                    // after the state transitions to CREATED. Therefore it is safe to remove the runId from the iterator,
                    // as we know we are done stopping it.
                    ProvisioningOp.Status taskState = taskInfo.getProvisioningOp().getStatus();
                    if (taskState != ProvisioningOp.Status.CREATED) {
                        iterator.remove();
                    }
                });
            }
        }
        if (!pendingStops.isEmpty()) {
            // If not able to stop all of them, it means there were some runs that didn't have a runtime info and
            // didn't have a provisioning task. This can happen if the run was already finished, or the run transitioned
            // from the provisioning state to the starting state during this stop operation.
            // We'll get the active runs again and filter it by the pending stops. Stop will be retried for those.
            Set<String> finalPendingStops = pendingStops;
            activeRunRecords = getActiveRuns(programId, runId).entrySet().stream().filter(e -> finalPendingStops.contains(e.getKey().getRun())).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
            pendingStops = activeRunRecords.keySet().stream().map(ProgramRunId::getRun).collect(Collectors.toSet());
            if (!pendingStops.isEmpty()) {
                TimeUnit.MILLISECONDS.sleep(200);
            }
        }
    }
    for (ProgramRunId cancelledProvisionRun : cancelledProvisionRuns) {
        SettableFuture<ProgramRunId> future = SettableFuture.create();
        future.set(cancelledProvisionRun);
        futures.add(future);
    }
    return futures;
}
Also used : ProgramController(io.cdap.cdap.app.runtime.ProgramController) RuntimeInfo(io.cdap.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo) RunRecordDetail(io.cdap.cdap.internal.app.store.RunRecordDetail) ArrayList(java.util.ArrayList) Stopwatch(com.google.common.base.Stopwatch) ProvisioningTaskInfo(io.cdap.cdap.internal.provision.ProvisioningTaskInfo) LogEntry(org.apache.twill.api.logging.LogEntry) BadRequestException(io.cdap.cdap.common.BadRequestException) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) ProvisioningOp(io.cdap.cdap.internal.provision.ProvisioningOp) RunId(org.apache.twill.api.RunId) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Aggregations

RuntimeInfo (io.cdap.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo)6 ProgramRuntimeService (io.cdap.cdap.app.runtime.ProgramRuntimeService)5 LogLevelUpdater (io.cdap.cdap.app.runtime.LogLevelUpdater)2 BadRequestException (io.cdap.cdap.common.BadRequestException)2 ProgramRunId (io.cdap.cdap.proto.id.ProgramRunId)2 RunId (org.apache.twill.api.RunId)2 Stopwatch (com.google.common.base.Stopwatch)1 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)1 ProgramController (io.cdap.cdap.app.runtime.ProgramController)1 RunRecordDetail (io.cdap.cdap.internal.app.store.RunRecordDetail)1 ProvisioningOp (io.cdap.cdap.internal.provision.ProvisioningOp)1 ProvisioningTaskInfo (io.cdap.cdap.internal.provision.ProvisioningTaskInfo)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 LinkedHashSet (java.util.LinkedHashSet)1 LogEntry (org.apache.twill.api.logging.LogEntry)1