Search in sources :

Example 1 with RuntimeInfo

use of co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo in project cdap by caskdata.

the class ProgramLifecycleServiceTest method testInvalidFlowRunRecord.

@Test
public void testInvalidFlowRunRecord() throws Exception {
    // Create App with Flow and the deploy
    HttpResponse response = deploy(WordCountApp.class, Constants.Gateway.API_VERSION_3_TOKEN, TEST_NAMESPACE1);
    Assert.assertEquals(200, response.getStatusLine().getStatusCode());
    final Id.Program wordcountFlow1 = Id.Program.from(TEST_NAMESPACE1, "WordCountApp", ProgramType.FLOW, "WordCountFlow");
    // flow is stopped initially
    Assert.assertEquals("STOPPED", getProgramStatus(wordcountFlow1));
    // start a flow and check the status
    startProgram(wordcountFlow1);
    waitState(wordcountFlow1, ProgramRunStatus.RUNNING.toString());
    // Wait until we have a run record
    Tasks.waitFor(1, new Callable<Integer>() {

        @Override
        public Integer call() throws Exception {
            return getProgramRuns(wordcountFlow1, ProgramRunStatus.RUNNING.toString()).size();
        }
    }, 5, TimeUnit.SECONDS);
    // Get the RunRecord
    List<RunRecord> runRecords = getProgramRuns(wordcountFlow1, ProgramRunStatus.RUNNING.toString());
    Assert.assertEquals(1, runRecords.size());
    final RunRecord rr = runRecords.get(0);
    // Check the RunRecords status
    Assert.assertEquals(ProgramRunStatus.RUNNING, rr.getStatus());
    // Lets set the runtime info to off
    RuntimeInfo runtimeInfo = runtimeService.lookup(wordcountFlow1.toEntityId(), RunIds.fromString(rr.getPid()));
    ProgramController programController = runtimeInfo.getController();
    programController.stop();
    // Verify that the status of that run is KILLED
    Tasks.waitFor(ProgramRunStatus.KILLED, new Callable<ProgramRunStatus>() {

        @Override
        public ProgramRunStatus call() throws Exception {
            RunRecordMeta runRecord = store.getRun(wordcountFlow1.toEntityId(), rr.getPid());
            return runRecord == null ? null : runRecord.getStatus();
        }
    }, 5, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
    // Use the store manipulate state to be RUNNING
    long now = System.currentTimeMillis();
    long nowSecs = TimeUnit.MILLISECONDS.toSeconds(now);
    store.setStart(wordcountFlow1.toEntityId(), rr.getPid(), nowSecs);
    // Now check again via Store to assume data store is wrong.
    RunRecord runRecordMeta = store.getRun(wordcountFlow1.toEntityId(), rr.getPid());
    Assert.assertNotNull(runRecordMeta);
    Assert.assertEquals(ProgramRunStatus.RUNNING, runRecordMeta.getStatus());
    // Verify there is NO FAILED run record for the application
    runRecords = getProgramRuns(wordcountFlow1, ProgramRunStatus.FAILED.toString());
    Assert.assertEquals(0, runRecords.size());
    // Lets fix it
    Set<String> processedInvalidRunRecordIds = Sets.newHashSet();
    programLifecycleService.validateAndCorrectRunningRunRecords(ProgramType.FLOW, processedInvalidRunRecordIds);
    // Verify there is one FAILED run record for the application
    runRecords = getProgramRuns(wordcountFlow1, ProgramRunStatus.FAILED.toString());
    Assert.assertEquals(1, runRecords.size());
    Assert.assertEquals(ProgramRunStatus.FAILED, runRecords.get(0).getStatus());
}
Also used : ProgramController(co.cask.cdap.app.runtime.ProgramController) RuntimeInfo(co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo) RunRecordMeta(co.cask.cdap.internal.app.store.RunRecordMeta) HttpResponse(org.apache.http.HttpResponse) RunRecord(co.cask.cdap.proto.RunRecord) ProgramRunStatus(co.cask.cdap.proto.ProgramRunStatus) Id(co.cask.cdap.proto.Id) Test(org.junit.Test)

Example 2 with RuntimeInfo

use of co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo in project cdap by caskdata.

the class ProgramLifecycleService method validateAndCorrectRunningRunRecords.

/**
   * Fix all the possible inconsistent states for RunRecords that shows it is in RUNNING state but actually not
   * via check to {@link ProgramRuntimeService} for a type of CDAP program.
   *
   * @param programType The type of program the run records need to validate and update.
   * @param processedInvalidRunRecordIds the {@link Set} of processed invalid run record ids.
   */
@VisibleForTesting
void validateAndCorrectRunningRunRecords(final ProgramType programType, final Set<String> processedInvalidRunRecordIds) {
    final Map<RunId, RuntimeInfo> runIdToRuntimeInfo = runtimeService.list(programType);
    LOG.trace("Start getting run records not actually running ...");
    Collection<RunRecordMeta> notActuallyRunning = store.getRuns(ProgramRunStatus.RUNNING, new com.google.common.base.Predicate<RunRecordMeta>() {

        @Override
        public boolean apply(RunRecordMeta input) {
            String runId = input.getPid();
            // Check if it is not actually running.
            return !runIdToRuntimeInfo.containsKey(RunIds.fromString(runId));
        }
    }).values();
    LOG.trace("End getting {} run records not actually running.", notActuallyRunning.size());
    final Map<String, ProgramId> runIdToProgramId = new HashMap<>();
    LOG.trace("Start getting invalid run records  ...");
    Collection<RunRecordMeta> invalidRunRecords = Collections2.filter(notActuallyRunning, new com.google.common.base.Predicate<RunRecordMeta>() {

        @Override
        public boolean apply(RunRecordMeta input) {
            String runId = input.getPid();
            // check for program Id for the run record, if null then it is invalid program type.
            ProgramId targetProgramId = retrieveProgramIdForRunRecord(programType, runId);
            // Check if run id is for the right program type
            if (targetProgramId != null) {
                runIdToProgramId.put(runId, targetProgramId);
                return true;
            } else {
                return false;
            }
        }
    });
    // don't correct run records for programs running inside a workflow
    // for instance, a MapReduce running in a Workflow will not be contained in the runtime info in this class
    invalidRunRecords = Collections2.filter(invalidRunRecords, new com.google.common.base.Predicate<RunRecordMeta>() {

        @Override
        public boolean apply(RunRecordMeta invalidRunRecordMeta) {
            boolean shouldCorrect = shouldCorrectForWorkflowChildren(invalidRunRecordMeta, processedInvalidRunRecordIds);
            if (!shouldCorrect) {
                LOG.trace("Will not correct invalid run record {} since it's parent workflow still running.", invalidRunRecordMeta);
                return false;
            }
            return true;
        }
    });
    LOG.trace("End getting invalid run records.");
    if (!invalidRunRecords.isEmpty()) {
        LOG.warn("Found {} RunRecords with RUNNING status and the program not actually running for program type {}", invalidRunRecords.size(), programType.getPrettyName());
    } else {
        LOG.trace("No RunRecords found with RUNNING status and the program not actually running for program type {}", programType.getPrettyName());
    }
    // Now lets correct the invalid RunRecords
    for (RunRecordMeta invalidRunRecordMeta : invalidRunRecords) {
        String runId = invalidRunRecordMeta.getPid();
        ProgramId targetProgramId = runIdToProgramId.get(runId);
        boolean updated = store.compareAndSetStatus(targetProgramId, runId, ProgramController.State.ALIVE.getRunStatus(), ProgramController.State.ERROR.getRunStatus());
        if (updated) {
            LOG.warn("Fixed RunRecord {} for program {} with RUNNING status because the program was not " + "actually running", runId, targetProgramId);
            processedInvalidRunRecordIds.add(runId);
        }
    }
}
Also used : RuntimeInfo(co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo) HashMap(java.util.HashMap) RunRecordMeta(co.cask.cdap.internal.app.store.RunRecordMeta) ProgramId(co.cask.cdap.proto.id.ProgramId) Predicate(co.cask.cdap.api.Predicate) RunId(org.apache.twill.api.RunId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 3 with RuntimeInfo

use of co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo in project cdap by caskdata.

the class ProgramLifecycleService method resetLogLevels.

/**
   * Helper method to reset log levels for Worker, Flow or Service.
   */
private void resetLogLevels(ProgramId programId, Set<String> loggerNames, @Nullable String component, @Nullable String runId) throws Exception {
    List<ProgramRuntimeService.RuntimeInfo> runtimeInfos = findRuntimeInfo(programId, runId);
    ProgramRuntimeService.RuntimeInfo runtimeInfo = runtimeInfos.isEmpty() ? null : runtimeInfos.get(0);
    if (runtimeInfo != null) {
        LogLevelUpdater logLevelUpdater = getLogLevelUpdater(runtimeInfo);
        logLevelUpdater.resetLogLevels(loggerNames, component);
    }
}
Also used : RuntimeInfo(co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo) RuntimeInfo(co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo) LogLevelUpdater(co.cask.cdap.app.runtime.LogLevelUpdater) ProgramRuntimeService(co.cask.cdap.app.runtime.ProgramRuntimeService)

Example 4 with RuntimeInfo

use of co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo in project cdap by caskdata.

the class ProgramLifecycleService method issueStop.

/**
   * Issues a command to stop the specified {@link RunId} of the specified {@link ProgramId} and returns a
   * {@link ListenableFuture} with the {@link ProgramController} for it.
   * Clients can wait for completion of the {@link ListenableFuture}.
   *
   * @param programId the {@link ProgramId program} to issue a stop for
   * @param runId the runId of the program run to stop. If null, all runs of the program as returned by
   *              {@link ProgramRuntimeService} are stopped.
   * @return a list of {@link ListenableFuture} with a {@link ProgramController} that clients can wait on for stop
   *         to complete.
   * @throws NotFoundException if the app, program or run was not found
   * @throws BadRequestException if an attempt is made to stop a program that is either not running or
   *                             was started by a workflow
   * @throws UnauthorizedException if the user issuing the command is not authorized to stop the program. To stop a
   *                               program, a user requires {@link Action#EXECUTE} permission on the program.
   */
public List<ListenableFuture<ProgramController>> issueStop(ProgramId programId, @Nullable String runId) throws Exception {
    authorizationEnforcer.enforce(programId, authenticationContext.getPrincipal(), Action.EXECUTE);
    List<ProgramRuntimeService.RuntimeInfo> runtimeInfos = findRuntimeInfo(programId, runId);
    if (runtimeInfos.isEmpty()) {
        if (!store.applicationExists(programId.getParent())) {
            throw new ApplicationNotFoundException(programId.getParent());
        } else if (!store.programExists(programId)) {
            throw new ProgramNotFoundException(programId);
        } else if (runId != null) {
            ProgramRunId programRunId = programId.run(runId);
            // Check if the program is running and is started by the Workflow
            RunRecordMeta runRecord = store.getRun(programId, runId);
            if (runRecord != null && runRecord.getProperties().containsKey("workflowrunid") && runRecord.getStatus().equals(ProgramRunStatus.RUNNING)) {
                String workflowRunId = runRecord.getProperties().get("workflowrunid");
                throw new BadRequestException(String.format("Cannot stop the program '%s' started by the Workflow " + "run '%s'. Please stop the Workflow.", programRunId, workflowRunId));
            }
            throw new NotFoundException(programRunId);
        }
        throw new BadRequestException(String.format("Program '%s' is not running.", programId));
    }
    List<ListenableFuture<ProgramController>> futures = new ArrayList<>();
    for (ProgramRuntimeService.RuntimeInfo runtimeInfo : runtimeInfos) {
        futures.add(runtimeInfo.getController().stop());
    }
    return futures;
}
Also used : RuntimeInfo(co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo) RunRecordMeta(co.cask.cdap.internal.app.store.RunRecordMeta) ArrayList(java.util.ArrayList) ProgramNotFoundException(co.cask.cdap.common.ProgramNotFoundException) ApplicationNotFoundException(co.cask.cdap.common.ApplicationNotFoundException) NotFoundException(co.cask.cdap.common.NotFoundException) RuntimeInfo(co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo) ApplicationNotFoundException(co.cask.cdap.common.ApplicationNotFoundException) BadRequestException(co.cask.cdap.common.BadRequestException) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) ProgramNotFoundException(co.cask.cdap.common.ProgramNotFoundException) ProgramRuntimeService(co.cask.cdap.app.runtime.ProgramRuntimeService)

Example 5 with RuntimeInfo

use of co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo in project cdap by caskdata.

the class ProgramLifecycleService method setWorkerInstances.

private void setWorkerInstances(ProgramId programId, int instances) throws ExecutionException, InterruptedException, BadRequestException {
    int oldInstances = store.getWorkerInstances(programId);
    if (oldInstances != instances) {
        store.setWorkerInstances(programId, instances);
        ProgramRuntimeService.RuntimeInfo runtimeInfo = findRuntimeInfo(programId);
        if (runtimeInfo != null) {
            runtimeInfo.getController().command(ProgramOptionConstants.INSTANCES, ImmutableMap.of("runnable", programId.getProgram(), "newInstances", String.valueOf(instances), "oldInstances", String.valueOf(oldInstances))).get();
        }
    }
}
Also used : RuntimeInfo(co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo) ProgramRuntimeService(co.cask.cdap.app.runtime.ProgramRuntimeService)

Aggregations

RuntimeInfo (co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo)12 ProgramRuntimeService (co.cask.cdap.app.runtime.ProgramRuntimeService)9 RunRecordMeta (co.cask.cdap.internal.app.store.RunRecordMeta)4 ProgramRunId (co.cask.cdap.proto.id.ProgramRunId)3 LogLevelUpdater (co.cask.cdap.app.runtime.LogLevelUpdater)2 ProgramController (co.cask.cdap.app.runtime.ProgramController)2 ApplicationNotFoundException (co.cask.cdap.common.ApplicationNotFoundException)2 BadRequestException (co.cask.cdap.common.BadRequestException)2 NotFoundException (co.cask.cdap.common.NotFoundException)2 ProgramNotFoundException (co.cask.cdap.common.ProgramNotFoundException)2 ProgramId (co.cask.cdap.proto.id.ProgramId)2 ArrayList (java.util.ArrayList)2 RunId (org.apache.twill.api.RunId)2 Predicate (co.cask.cdap.api.Predicate)1 ProgramSpecification (co.cask.cdap.api.ProgramSpecification)1 FlowSpecification (co.cask.cdap.api.flow.FlowSpecification)1 ProgramDescriptor (co.cask.cdap.app.program.ProgramDescriptor)1 AbstractListener (co.cask.cdap.internal.app.runtime.AbstractListener)1 BasicArguments (co.cask.cdap.internal.app.runtime.BasicArguments)1 SimpleProgramOptions (co.cask.cdap.internal.app.runtime.SimpleProgramOptions)1