use of co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo in project cdap by caskdata.
the class ProgramLifecycleServiceTest method testInvalidFlowRunRecord.
@Test
public void testInvalidFlowRunRecord() throws Exception {
// Create App with Flow and the deploy
HttpResponse response = deploy(WordCountApp.class, Constants.Gateway.API_VERSION_3_TOKEN, TEST_NAMESPACE1);
Assert.assertEquals(200, response.getStatusLine().getStatusCode());
final Id.Program wordcountFlow1 = Id.Program.from(TEST_NAMESPACE1, "WordCountApp", ProgramType.FLOW, "WordCountFlow");
// flow is stopped initially
Assert.assertEquals("STOPPED", getProgramStatus(wordcountFlow1));
// start a flow and check the status
startProgram(wordcountFlow1);
waitState(wordcountFlow1, ProgramRunStatus.RUNNING.toString());
// Wait until we have a run record
Tasks.waitFor(1, new Callable<Integer>() {
@Override
public Integer call() throws Exception {
return getProgramRuns(wordcountFlow1, ProgramRunStatus.RUNNING.toString()).size();
}
}, 5, TimeUnit.SECONDS);
// Get the RunRecord
List<RunRecord> runRecords = getProgramRuns(wordcountFlow1, ProgramRunStatus.RUNNING.toString());
Assert.assertEquals(1, runRecords.size());
final RunRecord rr = runRecords.get(0);
// Check the RunRecords status
Assert.assertEquals(ProgramRunStatus.RUNNING, rr.getStatus());
// Lets set the runtime info to off
RuntimeInfo runtimeInfo = runtimeService.lookup(wordcountFlow1.toEntityId(), RunIds.fromString(rr.getPid()));
ProgramController programController = runtimeInfo.getController();
programController.stop();
// Verify that the status of that run is KILLED
Tasks.waitFor(ProgramRunStatus.KILLED, new Callable<ProgramRunStatus>() {
@Override
public ProgramRunStatus call() throws Exception {
RunRecordMeta runRecord = store.getRun(wordcountFlow1.toEntityId(), rr.getPid());
return runRecord == null ? null : runRecord.getStatus();
}
}, 5, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
// Use the store manipulate state to be RUNNING
long now = System.currentTimeMillis();
long nowSecs = TimeUnit.MILLISECONDS.toSeconds(now);
store.setStart(wordcountFlow1.toEntityId(), rr.getPid(), nowSecs);
// Now check again via Store to assume data store is wrong.
RunRecord runRecordMeta = store.getRun(wordcountFlow1.toEntityId(), rr.getPid());
Assert.assertNotNull(runRecordMeta);
Assert.assertEquals(ProgramRunStatus.RUNNING, runRecordMeta.getStatus());
// Verify there is NO FAILED run record for the application
runRecords = getProgramRuns(wordcountFlow1, ProgramRunStatus.FAILED.toString());
Assert.assertEquals(0, runRecords.size());
// Lets fix it
Set<String> processedInvalidRunRecordIds = Sets.newHashSet();
programLifecycleService.validateAndCorrectRunningRunRecords(ProgramType.FLOW, processedInvalidRunRecordIds);
// Verify there is one FAILED run record for the application
runRecords = getProgramRuns(wordcountFlow1, ProgramRunStatus.FAILED.toString());
Assert.assertEquals(1, runRecords.size());
Assert.assertEquals(ProgramRunStatus.FAILED, runRecords.get(0).getStatus());
}
use of co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo in project cdap by caskdata.
the class ProgramLifecycleService method validateAndCorrectRunningRunRecords.
/**
* Fix all the possible inconsistent states for RunRecords that shows it is in RUNNING state but actually not
* via check to {@link ProgramRuntimeService} for a type of CDAP program.
*
* @param programType The type of program the run records need to validate and update.
* @param processedInvalidRunRecordIds the {@link Set} of processed invalid run record ids.
*/
@VisibleForTesting
void validateAndCorrectRunningRunRecords(final ProgramType programType, final Set<String> processedInvalidRunRecordIds) {
final Map<RunId, RuntimeInfo> runIdToRuntimeInfo = runtimeService.list(programType);
LOG.trace("Start getting run records not actually running ...");
Collection<RunRecordMeta> notActuallyRunning = store.getRuns(ProgramRunStatus.RUNNING, new com.google.common.base.Predicate<RunRecordMeta>() {
@Override
public boolean apply(RunRecordMeta input) {
String runId = input.getPid();
// Check if it is not actually running.
return !runIdToRuntimeInfo.containsKey(RunIds.fromString(runId));
}
}).values();
LOG.trace("End getting {} run records not actually running.", notActuallyRunning.size());
final Map<String, ProgramId> runIdToProgramId = new HashMap<>();
LOG.trace("Start getting invalid run records ...");
Collection<RunRecordMeta> invalidRunRecords = Collections2.filter(notActuallyRunning, new com.google.common.base.Predicate<RunRecordMeta>() {
@Override
public boolean apply(RunRecordMeta input) {
String runId = input.getPid();
// check for program Id for the run record, if null then it is invalid program type.
ProgramId targetProgramId = retrieveProgramIdForRunRecord(programType, runId);
// Check if run id is for the right program type
if (targetProgramId != null) {
runIdToProgramId.put(runId, targetProgramId);
return true;
} else {
return false;
}
}
});
// don't correct run records for programs running inside a workflow
// for instance, a MapReduce running in a Workflow will not be contained in the runtime info in this class
invalidRunRecords = Collections2.filter(invalidRunRecords, new com.google.common.base.Predicate<RunRecordMeta>() {
@Override
public boolean apply(RunRecordMeta invalidRunRecordMeta) {
boolean shouldCorrect = shouldCorrectForWorkflowChildren(invalidRunRecordMeta, processedInvalidRunRecordIds);
if (!shouldCorrect) {
LOG.trace("Will not correct invalid run record {} since it's parent workflow still running.", invalidRunRecordMeta);
return false;
}
return true;
}
});
LOG.trace("End getting invalid run records.");
if (!invalidRunRecords.isEmpty()) {
LOG.warn("Found {} RunRecords with RUNNING status and the program not actually running for program type {}", invalidRunRecords.size(), programType.getPrettyName());
} else {
LOG.trace("No RunRecords found with RUNNING status and the program not actually running for program type {}", programType.getPrettyName());
}
// Now lets correct the invalid RunRecords
for (RunRecordMeta invalidRunRecordMeta : invalidRunRecords) {
String runId = invalidRunRecordMeta.getPid();
ProgramId targetProgramId = runIdToProgramId.get(runId);
boolean updated = store.compareAndSetStatus(targetProgramId, runId, ProgramController.State.ALIVE.getRunStatus(), ProgramController.State.ERROR.getRunStatus());
if (updated) {
LOG.warn("Fixed RunRecord {} for program {} with RUNNING status because the program was not " + "actually running", runId, targetProgramId);
processedInvalidRunRecordIds.add(runId);
}
}
}
use of co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo in project cdap by caskdata.
the class ProgramLifecycleService method resetLogLevels.
/**
* Helper method to reset log levels for Worker, Flow or Service.
*/
private void resetLogLevels(ProgramId programId, Set<String> loggerNames, @Nullable String component, @Nullable String runId) throws Exception {
List<ProgramRuntimeService.RuntimeInfo> runtimeInfos = findRuntimeInfo(programId, runId);
ProgramRuntimeService.RuntimeInfo runtimeInfo = runtimeInfos.isEmpty() ? null : runtimeInfos.get(0);
if (runtimeInfo != null) {
LogLevelUpdater logLevelUpdater = getLogLevelUpdater(runtimeInfo);
logLevelUpdater.resetLogLevels(loggerNames, component);
}
}
use of co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo in project cdap by caskdata.
the class ProgramLifecycleService method issueStop.
/**
* Issues a command to stop the specified {@link RunId} of the specified {@link ProgramId} and returns a
* {@link ListenableFuture} with the {@link ProgramController} for it.
* Clients can wait for completion of the {@link ListenableFuture}.
*
* @param programId the {@link ProgramId program} to issue a stop for
* @param runId the runId of the program run to stop. If null, all runs of the program as returned by
* {@link ProgramRuntimeService} are stopped.
* @return a list of {@link ListenableFuture} with a {@link ProgramController} that clients can wait on for stop
* to complete.
* @throws NotFoundException if the app, program or run was not found
* @throws BadRequestException if an attempt is made to stop a program that is either not running or
* was started by a workflow
* @throws UnauthorizedException if the user issuing the command is not authorized to stop the program. To stop a
* program, a user requires {@link Action#EXECUTE} permission on the program.
*/
public List<ListenableFuture<ProgramController>> issueStop(ProgramId programId, @Nullable String runId) throws Exception {
authorizationEnforcer.enforce(programId, authenticationContext.getPrincipal(), Action.EXECUTE);
List<ProgramRuntimeService.RuntimeInfo> runtimeInfos = findRuntimeInfo(programId, runId);
if (runtimeInfos.isEmpty()) {
if (!store.applicationExists(programId.getParent())) {
throw new ApplicationNotFoundException(programId.getParent());
} else if (!store.programExists(programId)) {
throw new ProgramNotFoundException(programId);
} else if (runId != null) {
ProgramRunId programRunId = programId.run(runId);
// Check if the program is running and is started by the Workflow
RunRecordMeta runRecord = store.getRun(programId, runId);
if (runRecord != null && runRecord.getProperties().containsKey("workflowrunid") && runRecord.getStatus().equals(ProgramRunStatus.RUNNING)) {
String workflowRunId = runRecord.getProperties().get("workflowrunid");
throw new BadRequestException(String.format("Cannot stop the program '%s' started by the Workflow " + "run '%s'. Please stop the Workflow.", programRunId, workflowRunId));
}
throw new NotFoundException(programRunId);
}
throw new BadRequestException(String.format("Program '%s' is not running.", programId));
}
List<ListenableFuture<ProgramController>> futures = new ArrayList<>();
for (ProgramRuntimeService.RuntimeInfo runtimeInfo : runtimeInfos) {
futures.add(runtimeInfo.getController().stop());
}
return futures;
}
use of co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo in project cdap by caskdata.
the class ProgramLifecycleService method setWorkerInstances.
private void setWorkerInstances(ProgramId programId, int instances) throws ExecutionException, InterruptedException, BadRequestException {
int oldInstances = store.getWorkerInstances(programId);
if (oldInstances != instances) {
store.setWorkerInstances(programId, instances);
ProgramRuntimeService.RuntimeInfo runtimeInfo = findRuntimeInfo(programId);
if (runtimeInfo != null) {
runtimeInfo.getController().command(ProgramOptionConstants.INSTANCES, ImmutableMap.of("runnable", programId.getProgram(), "newInstances", String.valueOf(instances), "oldInstances", String.valueOf(oldInstances))).get();
}
}
}
Aggregations