Search in sources :

Example 6 with RunRecordMeta

use of co.cask.cdap.internal.app.store.RunRecordMeta in project cdap by caskdata.

the class ProgramLifecycleService method validateAndCorrectRunningRunRecords.

/**
   * Fix all the possible inconsistent states for RunRecords that shows it is in RUNNING state but actually not
   * via check to {@link ProgramRuntimeService} for a type of CDAP program.
   *
   * @param programType The type of program the run records need to validate and update.
   * @param processedInvalidRunRecordIds the {@link Set} of processed invalid run record ids.
   */
@VisibleForTesting
void validateAndCorrectRunningRunRecords(final ProgramType programType, final Set<String> processedInvalidRunRecordIds) {
    final Map<RunId, RuntimeInfo> runIdToRuntimeInfo = runtimeService.list(programType);
    LOG.trace("Start getting run records not actually running ...");
    Collection<RunRecordMeta> notActuallyRunning = store.getRuns(ProgramRunStatus.RUNNING, new com.google.common.base.Predicate<RunRecordMeta>() {

        @Override
        public boolean apply(RunRecordMeta input) {
            String runId = input.getPid();
            // Check if it is not actually running.
            return !runIdToRuntimeInfo.containsKey(RunIds.fromString(runId));
        }
    }).values();
    LOG.trace("End getting {} run records not actually running.", notActuallyRunning.size());
    final Map<String, ProgramId> runIdToProgramId = new HashMap<>();
    LOG.trace("Start getting invalid run records  ...");
    Collection<RunRecordMeta> invalidRunRecords = Collections2.filter(notActuallyRunning, new com.google.common.base.Predicate<RunRecordMeta>() {

        @Override
        public boolean apply(RunRecordMeta input) {
            String runId = input.getPid();
            // check for program Id for the run record, if null then it is invalid program type.
            ProgramId targetProgramId = retrieveProgramIdForRunRecord(programType, runId);
            // Check if run id is for the right program type
            if (targetProgramId != null) {
                runIdToProgramId.put(runId, targetProgramId);
                return true;
            } else {
                return false;
            }
        }
    });
    // don't correct run records for programs running inside a workflow
    // for instance, a MapReduce running in a Workflow will not be contained in the runtime info in this class
    invalidRunRecords = Collections2.filter(invalidRunRecords, new com.google.common.base.Predicate<RunRecordMeta>() {

        @Override
        public boolean apply(RunRecordMeta invalidRunRecordMeta) {
            boolean shouldCorrect = shouldCorrectForWorkflowChildren(invalidRunRecordMeta, processedInvalidRunRecordIds);
            if (!shouldCorrect) {
                LOG.trace("Will not correct invalid run record {} since it's parent workflow still running.", invalidRunRecordMeta);
                return false;
            }
            return true;
        }
    });
    LOG.trace("End getting invalid run records.");
    if (!invalidRunRecords.isEmpty()) {
        LOG.warn("Found {} RunRecords with RUNNING status and the program not actually running for program type {}", invalidRunRecords.size(), programType.getPrettyName());
    } else {
        LOG.trace("No RunRecords found with RUNNING status and the program not actually running for program type {}", programType.getPrettyName());
    }
    // Now lets correct the invalid RunRecords
    for (RunRecordMeta invalidRunRecordMeta : invalidRunRecords) {
        String runId = invalidRunRecordMeta.getPid();
        ProgramId targetProgramId = runIdToProgramId.get(runId);
        boolean updated = store.compareAndSetStatus(targetProgramId, runId, ProgramController.State.ALIVE.getRunStatus(), ProgramController.State.ERROR.getRunStatus());
        if (updated) {
            LOG.warn("Fixed RunRecord {} for program {} with RUNNING status because the program was not " + "actually running", runId, targetProgramId);
            processedInvalidRunRecordIds.add(runId);
        }
    }
}
Also used : RuntimeInfo(co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo) HashMap(java.util.HashMap) RunRecordMeta(co.cask.cdap.internal.app.store.RunRecordMeta) ProgramId(co.cask.cdap.proto.id.ProgramId) Predicate(co.cask.cdap.api.Predicate) RunId(org.apache.twill.api.RunId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 7 with RunRecordMeta

use of co.cask.cdap.internal.app.store.RunRecordMeta in project cdap by caskdata.

the class ProgramLifecycleService method issueStop.

/**
   * Issues a command to stop the specified {@link RunId} of the specified {@link ProgramId} and returns a
   * {@link ListenableFuture} with the {@link ProgramController} for it.
   * Clients can wait for completion of the {@link ListenableFuture}.
   *
   * @param programId the {@link ProgramId program} to issue a stop for
   * @param runId the runId of the program run to stop. If null, all runs of the program as returned by
   *              {@link ProgramRuntimeService} are stopped.
   * @return a list of {@link ListenableFuture} with a {@link ProgramController} that clients can wait on for stop
   *         to complete.
   * @throws NotFoundException if the app, program or run was not found
   * @throws BadRequestException if an attempt is made to stop a program that is either not running or
   *                             was started by a workflow
   * @throws UnauthorizedException if the user issuing the command is not authorized to stop the program. To stop a
   *                               program, a user requires {@link Action#EXECUTE} permission on the program.
   */
public List<ListenableFuture<ProgramController>> issueStop(ProgramId programId, @Nullable String runId) throws Exception {
    authorizationEnforcer.enforce(programId, authenticationContext.getPrincipal(), Action.EXECUTE);
    List<ProgramRuntimeService.RuntimeInfo> runtimeInfos = findRuntimeInfo(programId, runId);
    if (runtimeInfos.isEmpty()) {
        if (!store.applicationExists(programId.getParent())) {
            throw new ApplicationNotFoundException(programId.getParent());
        } else if (!store.programExists(programId)) {
            throw new ProgramNotFoundException(programId);
        } else if (runId != null) {
            ProgramRunId programRunId = programId.run(runId);
            // Check if the program is running and is started by the Workflow
            RunRecordMeta runRecord = store.getRun(programId, runId);
            if (runRecord != null && runRecord.getProperties().containsKey("workflowrunid") && runRecord.getStatus().equals(ProgramRunStatus.RUNNING)) {
                String workflowRunId = runRecord.getProperties().get("workflowrunid");
                throw new BadRequestException(String.format("Cannot stop the program '%s' started by the Workflow " + "run '%s'. Please stop the Workflow.", programRunId, workflowRunId));
            }
            throw new NotFoundException(programRunId);
        }
        throw new BadRequestException(String.format("Program '%s' is not running.", programId));
    }
    List<ListenableFuture<ProgramController>> futures = new ArrayList<>();
    for (ProgramRuntimeService.RuntimeInfo runtimeInfo : runtimeInfos) {
        futures.add(runtimeInfo.getController().stop());
    }
    return futures;
}
Also used : RuntimeInfo(co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo) RunRecordMeta(co.cask.cdap.internal.app.store.RunRecordMeta) ArrayList(java.util.ArrayList) ProgramNotFoundException(co.cask.cdap.common.ProgramNotFoundException) ApplicationNotFoundException(co.cask.cdap.common.ApplicationNotFoundException) NotFoundException(co.cask.cdap.common.NotFoundException) RuntimeInfo(co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo) ApplicationNotFoundException(co.cask.cdap.common.ApplicationNotFoundException) BadRequestException(co.cask.cdap.common.BadRequestException) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) ProgramNotFoundException(co.cask.cdap.common.ProgramNotFoundException) ProgramRuntimeService(co.cask.cdap.app.runtime.ProgramRuntimeService)

Example 8 with RunRecordMeta

use of co.cask.cdap.internal.app.store.RunRecordMeta in project cdap by caskdata.

the class ProgramLifecycleHttpHandler method programRunRecord.

/**
   * Returns run record for a particular run of a program of an app version.
   */
@GET
@Path("/apps/{app-name}/versions/{app-version}/{program-type}/{program-name}/runs/{run-id}")
public void programRunRecord(HttpRequest request, HttpResponder responder, @PathParam("namespace-id") String namespaceId, @PathParam("app-name") String appName, @PathParam("app-version") String appVersion, @PathParam("program-type") String type, @PathParam("program-name") String programName, @PathParam("run-id") String runid) throws NotFoundException {
    ProgramType programType = getProgramType(type);
    if (programType == null || programType == ProgramType.WEBAPP) {
        throw new NotFoundException(String.format("Program run record is not supported for program type '%s'.", programType));
    }
    ProgramId progId = new ApplicationId(namespaceId, appName, appVersion).program(programType, programName);
    RunRecordMeta runRecordMeta = store.getRun(progId, runid);
    if (runRecordMeta != null) {
        RunRecord runRecord = CONVERT_TO_RUN_RECORD.apply(runRecordMeta);
        responder.sendJson(HttpResponseStatus.OK, runRecord);
        return;
    }
    throw new NotFoundException(progId.run(runid));
}
Also used : RunRecord(co.cask.cdap.proto.RunRecord) RunRecordMeta(co.cask.cdap.internal.app.store.RunRecordMeta) NamespaceNotFoundException(co.cask.cdap.common.NamespaceNotFoundException) NotFoundException(co.cask.cdap.common.NotFoundException) ProgramType(co.cask.cdap.proto.ProgramType) ProgramId(co.cask.cdap.proto.id.ProgramId) ApplicationId(co.cask.cdap.proto.id.ApplicationId) Path(javax.ws.rs.Path) GET(javax.ws.rs.GET)

Example 9 with RunRecordMeta

use of co.cask.cdap.internal.app.store.RunRecordMeta in project cdap by caskdata.

the class ProgramLifecycleHttpHandler method getMapReduceInfo.

/**
   * Relays job-level and task-level information about a particular MapReduce program run.
   */
@GET
@Path("/apps/{app-id}/mapreduce/{mapreduce-id}/runs/{run-id}/info")
public void getMapReduceInfo(HttpRequest request, HttpResponder responder, @PathParam("namespace-id") String namespaceId, @PathParam("app-id") String appId, @PathParam("mapreduce-id") String mapreduceId, @PathParam("run-id") String runId) throws IOException, NotFoundException {
    ProgramId programId = new ProgramId(namespaceId, appId, ProgramType.MAPREDUCE, mapreduceId);
    ProgramRunId run = programId.run(runId);
    ApplicationSpecification appSpec = store.getApplication(programId.getParent());
    if (appSpec == null) {
        throw new NotFoundException(programId.getApplication());
    }
    if (!appSpec.getMapReduce().containsKey(mapreduceId)) {
        throw new NotFoundException(programId);
    }
    RunRecordMeta runRecordMeta = store.getRun(programId, runId);
    if (runRecordMeta == null) {
        throw new NotFoundException(run);
    }
    MRJobInfo mrJobInfo = mrJobInfoFetcher.getMRJobInfo(run.toId());
    mrJobInfo.setState(runRecordMeta.getStatus().name());
    // Multiple startTs / endTs by 1000, to be consistent with Task-level start/stop times returned by JobClient
    // in milliseconds. RunRecord returns seconds value.
    mrJobInfo.setStartTime(TimeUnit.SECONDS.toMillis(runRecordMeta.getStartTs()));
    Long stopTs = runRecordMeta.getStopTs();
    if (stopTs != null) {
        mrJobInfo.setStopTime(TimeUnit.SECONDS.toMillis(stopTs));
    }
    // JobClient (in DistributedMRJobInfoFetcher) can return NaN as some of the values, and GSON otherwise fails
    Gson gson = new GsonBuilder().serializeSpecialFloatingPointValues().create();
    responder.sendJson(HttpResponseStatus.OK, mrJobInfo, mrJobInfo.getClass(), gson);
}
Also used : ApplicationSpecification(co.cask.cdap.api.app.ApplicationSpecification) MRJobInfo(co.cask.cdap.proto.MRJobInfo) GsonBuilder(com.google.gson.GsonBuilder) RunRecordMeta(co.cask.cdap.internal.app.store.RunRecordMeta) NamespaceNotFoundException(co.cask.cdap.common.NamespaceNotFoundException) NotFoundException(co.cask.cdap.common.NotFoundException) Gson(com.google.gson.Gson) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) ProgramId(co.cask.cdap.proto.id.ProgramId) Path(javax.ws.rs.Path) GET(javax.ws.rs.GET)

Example 10 with RunRecordMeta

use of co.cask.cdap.internal.app.store.RunRecordMeta in project cdap by caskdata.

the class RemoteRuntimeStoreTest method testWorkflowMethods.

@Test
public void testWorkflowMethods() {
    ProgramId workflowId = new ProgramId(Id.Namespace.DEFAULT.getId(), "test_app", ProgramType.WORKFLOW, "test_workflow");
    long stopTime = System.currentTimeMillis() / 1000;
    long startTime = stopTime - 20;
    String pid = RunIds.generate(startTime * 1000).getId();
    String twillRunId = "twill_run_id";
    Map<String, String> runtimeArgs = ImmutableMap.of();
    Map<String, String> properties = ImmutableMap.of("runtimeArgs", GSON.toJson(runtimeArgs));
    Map<String, String> systemArgs = ImmutableMap.of();
    RunRecordMeta initialRunRecord = new RunRecordMeta(pid, startTime, null, ProgramRunStatus.RUNNING, properties, systemArgs, twillRunId);
    runtimeStore.setStart(workflowId, pid, startTime, twillRunId, runtimeArgs, systemArgs);
    Assert.assertEquals(initialRunRecord, store.getRun(workflowId, pid));
    ProgramId mapreduceId = new ProgramId(workflowId.getNamespace(), workflowId.getApplication(), ProgramType.MAPREDUCE, "test_mr");
    String mapreducePid = RunIds.generate(startTime * 1000).getId();
    // these system properties just have to be set on the system arguments of the program, in order for it to be
    // understood as a program in a workflow node
    Map<String, String> mrSystemArgs = ImmutableMap.of(ProgramOptionConstants.WORKFLOW_NODE_ID, "test_node_id", ProgramOptionConstants.WORKFLOW_NAME, workflowId.getProgram(), ProgramOptionConstants.WORKFLOW_RUN_ID, pid);
    runtimeStore.setStart(mapreduceId, mapreducePid, startTime, twillRunId, runtimeArgs, mrSystemArgs);
    BasicThrowable failureCause = new BasicThrowable(new IllegalArgumentException("failure", new RuntimeException("oops")));
    runtimeStore.setStop(mapreduceId, mapreducePid, stopTime, ProgramRunStatus.FAILED, failureCause);
    runtimeStore.setStop(workflowId, pid, stopTime, ProgramRunStatus.FAILED);
    RunRecordMeta completedWorkflowRecord = store.getRun(workflowId, pid);
    // we're not comparing properties, since runtime (such as starting/stopping inner programs) modifies it
    Assert.assertEquals(pid, completedWorkflowRecord.getPid());
    Assert.assertEquals(initialRunRecord.getStartTs(), completedWorkflowRecord.getStartTs());
    Assert.assertEquals((Long) stopTime, completedWorkflowRecord.getStopTs());
    Assert.assertEquals(ProgramRunStatus.FAILED, completedWorkflowRecord.getStatus());
    Assert.assertEquals(twillRunId, completedWorkflowRecord.getTwillRunId());
    Assert.assertEquals(systemArgs, completedWorkflowRecord.getSystemArgs());
    // test that the BasicThrowable was serialized properly by RemoteRuntimeStore
    ProgramRunId workflowRunId = workflowId.run(pid);
    List<WorkflowNodeStateDetail> workflowNodeStates = store.getWorkflowNodeStates(workflowRunId);
    Assert.assertEquals(1, workflowNodeStates.size());
    WorkflowNodeStateDetail workflowNodeStateDetail = workflowNodeStates.get(0);
    Assert.assertEquals("test_node_id", workflowNodeStateDetail.getNodeId());
    Assert.assertEquals(mapreducePid, workflowNodeStateDetail.getRunId());
    Assert.assertEquals(NodeStatus.FAILED, workflowNodeStateDetail.getNodeStatus());
    Assert.assertEquals(failureCause, workflowNodeStateDetail.getFailureCause());
}
Also used : RunRecordMeta(co.cask.cdap.internal.app.store.RunRecordMeta) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) ProgramId(co.cask.cdap.proto.id.ProgramId) BasicThrowable(co.cask.cdap.proto.BasicThrowable) WorkflowNodeStateDetail(co.cask.cdap.proto.WorkflowNodeStateDetail) Test(org.junit.Test)

Aggregations

RunRecordMeta (co.cask.cdap.internal.app.store.RunRecordMeta)25 ProgramRunId (co.cask.cdap.proto.id.ProgramRunId)11 ProgramId (co.cask.cdap.proto.id.ProgramId)9 GET (javax.ws.rs.GET)8 Path (javax.ws.rs.Path)8 LoggingContext (co.cask.cdap.common.logging.LoggingContext)6 RuntimeInfo (co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo)4 NotFoundException (co.cask.cdap.common.NotFoundException)4 ProgramType (co.cask.cdap.proto.ProgramType)4 MDSKey (co.cask.cdap.data2.dataset2.lib.table.MDSKey)3 RunId (org.apache.twill.api.RunId)3 Test (org.junit.Test)3 NamespaceNotFoundException (co.cask.cdap.common.NamespaceNotFoundException)2 Relation (co.cask.cdap.data2.metadata.lineage.Relation)2 SimpleRuntimeInfo (co.cask.cdap.internal.app.runtime.service.SimpleRuntimeInfo)2 RunRecord (co.cask.cdap.proto.RunRecord)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 Nullable (javax.annotation.Nullable)2 TwillController (org.apache.twill.api.TwillController)2