use of co.cask.cdap.internal.app.store.RunRecordMeta in project cdap by caskdata.
the class ProgramLifecycleService method validateAndCorrectRunningRunRecords.
/**
* Fix all the possible inconsistent states for RunRecords that shows it is in RUNNING state but actually not
* via check to {@link ProgramRuntimeService} for a type of CDAP program.
*
* @param programType The type of program the run records need to validate and update.
* @param processedInvalidRunRecordIds the {@link Set} of processed invalid run record ids.
*/
@VisibleForTesting
void validateAndCorrectRunningRunRecords(final ProgramType programType, final Set<String> processedInvalidRunRecordIds) {
final Map<RunId, RuntimeInfo> runIdToRuntimeInfo = runtimeService.list(programType);
LOG.trace("Start getting run records not actually running ...");
Collection<RunRecordMeta> notActuallyRunning = store.getRuns(ProgramRunStatus.RUNNING, new com.google.common.base.Predicate<RunRecordMeta>() {
@Override
public boolean apply(RunRecordMeta input) {
String runId = input.getPid();
// Check if it is not actually running.
return !runIdToRuntimeInfo.containsKey(RunIds.fromString(runId));
}
}).values();
LOG.trace("End getting {} run records not actually running.", notActuallyRunning.size());
final Map<String, ProgramId> runIdToProgramId = new HashMap<>();
LOG.trace("Start getting invalid run records ...");
Collection<RunRecordMeta> invalidRunRecords = Collections2.filter(notActuallyRunning, new com.google.common.base.Predicate<RunRecordMeta>() {
@Override
public boolean apply(RunRecordMeta input) {
String runId = input.getPid();
// check for program Id for the run record, if null then it is invalid program type.
ProgramId targetProgramId = retrieveProgramIdForRunRecord(programType, runId);
// Check if run id is for the right program type
if (targetProgramId != null) {
runIdToProgramId.put(runId, targetProgramId);
return true;
} else {
return false;
}
}
});
// don't correct run records for programs running inside a workflow
// for instance, a MapReduce running in a Workflow will not be contained in the runtime info in this class
invalidRunRecords = Collections2.filter(invalidRunRecords, new com.google.common.base.Predicate<RunRecordMeta>() {
@Override
public boolean apply(RunRecordMeta invalidRunRecordMeta) {
boolean shouldCorrect = shouldCorrectForWorkflowChildren(invalidRunRecordMeta, processedInvalidRunRecordIds);
if (!shouldCorrect) {
LOG.trace("Will not correct invalid run record {} since it's parent workflow still running.", invalidRunRecordMeta);
return false;
}
return true;
}
});
LOG.trace("End getting invalid run records.");
if (!invalidRunRecords.isEmpty()) {
LOG.warn("Found {} RunRecords with RUNNING status and the program not actually running for program type {}", invalidRunRecords.size(), programType.getPrettyName());
} else {
LOG.trace("No RunRecords found with RUNNING status and the program not actually running for program type {}", programType.getPrettyName());
}
// Now lets correct the invalid RunRecords
for (RunRecordMeta invalidRunRecordMeta : invalidRunRecords) {
String runId = invalidRunRecordMeta.getPid();
ProgramId targetProgramId = runIdToProgramId.get(runId);
boolean updated = store.compareAndSetStatus(targetProgramId, runId, ProgramController.State.ALIVE.getRunStatus(), ProgramController.State.ERROR.getRunStatus());
if (updated) {
LOG.warn("Fixed RunRecord {} for program {} with RUNNING status because the program was not " + "actually running", runId, targetProgramId);
processedInvalidRunRecordIds.add(runId);
}
}
}
use of co.cask.cdap.internal.app.store.RunRecordMeta in project cdap by caskdata.
the class ProgramLifecycleService method issueStop.
/**
* Issues a command to stop the specified {@link RunId} of the specified {@link ProgramId} and returns a
* {@link ListenableFuture} with the {@link ProgramController} for it.
* Clients can wait for completion of the {@link ListenableFuture}.
*
* @param programId the {@link ProgramId program} to issue a stop for
* @param runId the runId of the program run to stop. If null, all runs of the program as returned by
* {@link ProgramRuntimeService} are stopped.
* @return a list of {@link ListenableFuture} with a {@link ProgramController} that clients can wait on for stop
* to complete.
* @throws NotFoundException if the app, program or run was not found
* @throws BadRequestException if an attempt is made to stop a program that is either not running or
* was started by a workflow
* @throws UnauthorizedException if the user issuing the command is not authorized to stop the program. To stop a
* program, a user requires {@link Action#EXECUTE} permission on the program.
*/
public List<ListenableFuture<ProgramController>> issueStop(ProgramId programId, @Nullable String runId) throws Exception {
authorizationEnforcer.enforce(programId, authenticationContext.getPrincipal(), Action.EXECUTE);
List<ProgramRuntimeService.RuntimeInfo> runtimeInfos = findRuntimeInfo(programId, runId);
if (runtimeInfos.isEmpty()) {
if (!store.applicationExists(programId.getParent())) {
throw new ApplicationNotFoundException(programId.getParent());
} else if (!store.programExists(programId)) {
throw new ProgramNotFoundException(programId);
} else if (runId != null) {
ProgramRunId programRunId = programId.run(runId);
// Check if the program is running and is started by the Workflow
RunRecordMeta runRecord = store.getRun(programId, runId);
if (runRecord != null && runRecord.getProperties().containsKey("workflowrunid") && runRecord.getStatus().equals(ProgramRunStatus.RUNNING)) {
String workflowRunId = runRecord.getProperties().get("workflowrunid");
throw new BadRequestException(String.format("Cannot stop the program '%s' started by the Workflow " + "run '%s'. Please stop the Workflow.", programRunId, workflowRunId));
}
throw new NotFoundException(programRunId);
}
throw new BadRequestException(String.format("Program '%s' is not running.", programId));
}
List<ListenableFuture<ProgramController>> futures = new ArrayList<>();
for (ProgramRuntimeService.RuntimeInfo runtimeInfo : runtimeInfos) {
futures.add(runtimeInfo.getController().stop());
}
return futures;
}
use of co.cask.cdap.internal.app.store.RunRecordMeta in project cdap by caskdata.
the class ProgramLifecycleHttpHandler method programRunRecord.
/**
* Returns run record for a particular run of a program of an app version.
*/
@GET
@Path("/apps/{app-name}/versions/{app-version}/{program-type}/{program-name}/runs/{run-id}")
public void programRunRecord(HttpRequest request, HttpResponder responder, @PathParam("namespace-id") String namespaceId, @PathParam("app-name") String appName, @PathParam("app-version") String appVersion, @PathParam("program-type") String type, @PathParam("program-name") String programName, @PathParam("run-id") String runid) throws NotFoundException {
ProgramType programType = getProgramType(type);
if (programType == null || programType == ProgramType.WEBAPP) {
throw new NotFoundException(String.format("Program run record is not supported for program type '%s'.", programType));
}
ProgramId progId = new ApplicationId(namespaceId, appName, appVersion).program(programType, programName);
RunRecordMeta runRecordMeta = store.getRun(progId, runid);
if (runRecordMeta != null) {
RunRecord runRecord = CONVERT_TO_RUN_RECORD.apply(runRecordMeta);
responder.sendJson(HttpResponseStatus.OK, runRecord);
return;
}
throw new NotFoundException(progId.run(runid));
}
use of co.cask.cdap.internal.app.store.RunRecordMeta in project cdap by caskdata.
the class ProgramLifecycleHttpHandler method getMapReduceInfo.
/**
* Relays job-level and task-level information about a particular MapReduce program run.
*/
@GET
@Path("/apps/{app-id}/mapreduce/{mapreduce-id}/runs/{run-id}/info")
public void getMapReduceInfo(HttpRequest request, HttpResponder responder, @PathParam("namespace-id") String namespaceId, @PathParam("app-id") String appId, @PathParam("mapreduce-id") String mapreduceId, @PathParam("run-id") String runId) throws IOException, NotFoundException {
ProgramId programId = new ProgramId(namespaceId, appId, ProgramType.MAPREDUCE, mapreduceId);
ProgramRunId run = programId.run(runId);
ApplicationSpecification appSpec = store.getApplication(programId.getParent());
if (appSpec == null) {
throw new NotFoundException(programId.getApplication());
}
if (!appSpec.getMapReduce().containsKey(mapreduceId)) {
throw new NotFoundException(programId);
}
RunRecordMeta runRecordMeta = store.getRun(programId, runId);
if (runRecordMeta == null) {
throw new NotFoundException(run);
}
MRJobInfo mrJobInfo = mrJobInfoFetcher.getMRJobInfo(run.toId());
mrJobInfo.setState(runRecordMeta.getStatus().name());
// Multiple startTs / endTs by 1000, to be consistent with Task-level start/stop times returned by JobClient
// in milliseconds. RunRecord returns seconds value.
mrJobInfo.setStartTime(TimeUnit.SECONDS.toMillis(runRecordMeta.getStartTs()));
Long stopTs = runRecordMeta.getStopTs();
if (stopTs != null) {
mrJobInfo.setStopTime(TimeUnit.SECONDS.toMillis(stopTs));
}
// JobClient (in DistributedMRJobInfoFetcher) can return NaN as some of the values, and GSON otherwise fails
Gson gson = new GsonBuilder().serializeSpecialFloatingPointValues().create();
responder.sendJson(HttpResponseStatus.OK, mrJobInfo, mrJobInfo.getClass(), gson);
}
use of co.cask.cdap.internal.app.store.RunRecordMeta in project cdap by caskdata.
the class RemoteRuntimeStoreTest method testWorkflowMethods.
@Test
public void testWorkflowMethods() {
ProgramId workflowId = new ProgramId(Id.Namespace.DEFAULT.getId(), "test_app", ProgramType.WORKFLOW, "test_workflow");
long stopTime = System.currentTimeMillis() / 1000;
long startTime = stopTime - 20;
String pid = RunIds.generate(startTime * 1000).getId();
String twillRunId = "twill_run_id";
Map<String, String> runtimeArgs = ImmutableMap.of();
Map<String, String> properties = ImmutableMap.of("runtimeArgs", GSON.toJson(runtimeArgs));
Map<String, String> systemArgs = ImmutableMap.of();
RunRecordMeta initialRunRecord = new RunRecordMeta(pid, startTime, null, ProgramRunStatus.RUNNING, properties, systemArgs, twillRunId);
runtimeStore.setStart(workflowId, pid, startTime, twillRunId, runtimeArgs, systemArgs);
Assert.assertEquals(initialRunRecord, store.getRun(workflowId, pid));
ProgramId mapreduceId = new ProgramId(workflowId.getNamespace(), workflowId.getApplication(), ProgramType.MAPREDUCE, "test_mr");
String mapreducePid = RunIds.generate(startTime * 1000).getId();
// these system properties just have to be set on the system arguments of the program, in order for it to be
// understood as a program in a workflow node
Map<String, String> mrSystemArgs = ImmutableMap.of(ProgramOptionConstants.WORKFLOW_NODE_ID, "test_node_id", ProgramOptionConstants.WORKFLOW_NAME, workflowId.getProgram(), ProgramOptionConstants.WORKFLOW_RUN_ID, pid);
runtimeStore.setStart(mapreduceId, mapreducePid, startTime, twillRunId, runtimeArgs, mrSystemArgs);
BasicThrowable failureCause = new BasicThrowable(new IllegalArgumentException("failure", new RuntimeException("oops")));
runtimeStore.setStop(mapreduceId, mapreducePid, stopTime, ProgramRunStatus.FAILED, failureCause);
runtimeStore.setStop(workflowId, pid, stopTime, ProgramRunStatus.FAILED);
RunRecordMeta completedWorkflowRecord = store.getRun(workflowId, pid);
// we're not comparing properties, since runtime (such as starting/stopping inner programs) modifies it
Assert.assertEquals(pid, completedWorkflowRecord.getPid());
Assert.assertEquals(initialRunRecord.getStartTs(), completedWorkflowRecord.getStartTs());
Assert.assertEquals((Long) stopTime, completedWorkflowRecord.getStopTs());
Assert.assertEquals(ProgramRunStatus.FAILED, completedWorkflowRecord.getStatus());
Assert.assertEquals(twillRunId, completedWorkflowRecord.getTwillRunId());
Assert.assertEquals(systemArgs, completedWorkflowRecord.getSystemArgs());
// test that the BasicThrowable was serialized properly by RemoteRuntimeStore
ProgramRunId workflowRunId = workflowId.run(pid);
List<WorkflowNodeStateDetail> workflowNodeStates = store.getWorkflowNodeStates(workflowRunId);
Assert.assertEquals(1, workflowNodeStates.size());
WorkflowNodeStateDetail workflowNodeStateDetail = workflowNodeStates.get(0);
Assert.assertEquals("test_node_id", workflowNodeStateDetail.getNodeId());
Assert.assertEquals(mapreducePid, workflowNodeStateDetail.getRunId());
Assert.assertEquals(NodeStatus.FAILED, workflowNodeStateDetail.getNodeStatus());
Assert.assertEquals(failureCause, workflowNodeStateDetail.getFailureCause());
}
Aggregations