use of io.cdap.cdap.proto.ProgramRunStatus in project cdap by caskdata.
the class AppMetadataStoreTest method testGetActiveRuns.
@Test
public void testGetActiveRuns() throws Exception {
// write a run record for each state for two programs in two apps in two namespaces
String app1 = "app1";
String app2 = "app2";
String program1 = "prog1";
String program2 = "prog2";
Collection<NamespaceId> namespaces = Arrays.asList(new NamespaceId("ns1"), new NamespaceId("ns2"));
Collection<ApplicationId> apps = namespaces.stream().flatMap(ns -> Stream.of(ns.app(app1), ns.app(app2))).collect(Collectors.toList());
Collection<ProgramId> programs = apps.stream().flatMap(app -> Stream.of(app.mr(program1), app.mr(program2))).collect(Collectors.toList());
for (ProgramId programId : programs) {
TransactionRunners.run(transactionRunner, context -> {
AppMetadataStore store = AppMetadataStore.create(context);
// one run in pending state
ProgramRunId runId = programId.run(RunIds.generate());
store.recordProgramProvisioning(runId, Collections.emptyMap(), SINGLETON_PROFILE_MAP, AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()), ARTIFACT_ID);
// one run in starting state
runId = programId.run(RunIds.generate());
store.recordProgramProvisioning(runId, Collections.emptyMap(), SINGLETON_PROFILE_MAP, AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()), ARTIFACT_ID);
store.recordProgramProvisioned(runId, 3, AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()));
store.recordProgramStart(runId, UUID.randomUUID().toString(), Collections.emptyMap(), AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()));
// one run in running state
runId = programId.run(RunIds.generate());
store.recordProgramProvisioning(runId, Collections.emptyMap(), SINGLETON_PROFILE_MAP, AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()), ARTIFACT_ID);
store.recordProgramProvisioned(runId, 3, AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()));
String twillRunId = UUID.randomUUID().toString();
store.recordProgramStart(runId, twillRunId, Collections.emptyMap(), AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()));
store.recordProgramRunning(runId, System.currentTimeMillis(), twillRunId, AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()));
// one in suspended state
runId = programId.run(RunIds.generate());
store.recordProgramProvisioning(runId, Collections.emptyMap(), SINGLETON_PROFILE_MAP, AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()), ARTIFACT_ID);
store.recordProgramProvisioned(runId, 3, AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()));
twillRunId = UUID.randomUUID().toString();
store.recordProgramStart(runId, twillRunId, Collections.emptyMap(), AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()));
store.recordProgramRunning(runId, System.currentTimeMillis(), twillRunId, AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()));
store.recordProgramSuspend(runId, AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()), System.currentTimeMillis());
// one run in stopping state
runId = programId.run(RunIds.generate());
store.recordProgramProvisioning(runId, Collections.emptyMap(), SINGLETON_PROFILE_MAP, AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()), ARTIFACT_ID);
store.recordProgramProvisioned(runId, 3, AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()));
twillRunId = UUID.randomUUID().toString();
store.recordProgramStart(runId, twillRunId, Collections.emptyMap(), AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()));
store.recordProgramRunning(runId, System.currentTimeMillis(), twillRunId, AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()));
store.recordProgramStopping(runId, AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()), System.currentTimeMillis(), System.currentTimeMillis() + 1000);
// one run in each stopped state
for (ProgramRunStatus runStatus : ProgramRunStatus.values()) {
if (!runStatus.isEndState()) {
continue;
}
runId = programId.run(RunIds.generate());
store.recordProgramProvisioning(runId, Collections.emptyMap(), SINGLETON_PROFILE_MAP, AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()), ARTIFACT_ID);
store.recordProgramProvisioned(runId, 3, AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()));
twillRunId = UUID.randomUUID().toString();
store.recordProgramStart(runId, twillRunId, Collections.emptyMap(), AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()));
store.recordProgramStop(runId, System.currentTimeMillis(), runStatus, null, AppFabricTestHelper.createSourceId(sourceId.incrementAndGet()));
}
});
}
Set<ProgramRunStatus> activeStates = new HashSet<>();
activeStates.add(ProgramRunStatus.PENDING);
activeStates.add(ProgramRunStatus.STARTING);
activeStates.add(ProgramRunStatus.RUNNING);
activeStates.add(ProgramRunStatus.SUSPENDED);
activeStates.add(ProgramRunStatus.STOPPING);
// test the instance level method and namespace level method
TransactionRunners.run(transactionRunner, context -> {
AppMetadataStore store = AppMetadataStore.create(context);
Map<ProgramId, Set<ProgramRunStatus>> allExpected = new HashMap<>();
Map<ProgramId, Set<ProgramRunStatus>> allActual = new HashMap<>();
// check active runs per namespace
for (NamespaceId namespace : namespaces) {
Map<ProgramRunId, RunRecordDetail> activeRuns = store.getActiveRuns(namespace);
// we expect 4 runs per program, with 4 programs in each namespace
Map<ProgramId, Set<ProgramRunStatus>> expected = new HashMap<>();
expected.put(namespace.app(app1).mr(program1), activeStates);
expected.put(namespace.app(app1).mr(program2), activeStates);
expected.put(namespace.app(app2).mr(program1), activeStates);
expected.put(namespace.app(app2).mr(program2), activeStates);
Map<ProgramId, Set<ProgramRunStatus>> actual = new HashMap<>();
actual.put(namespace.app(app1).mr(program1), new HashSet<>());
actual.put(namespace.app(app1).mr(program2), new HashSet<>());
actual.put(namespace.app(app2).mr(program1), new HashSet<>());
actual.put(namespace.app(app2).mr(program2), new HashSet<>());
allActual.putAll(actual);
for (Map.Entry<ProgramRunId, RunRecordDetail> activeRun : activeRuns.entrySet()) {
ProgramId programId = activeRun.getKey().getParent();
Assert.assertTrue("Unexpected program returned: " + programId, actual.containsKey(activeRun.getKey().getParent()));
actual.get(programId).add(activeRun.getValue().getStatus());
}
Assert.assertEquals(expected, actual);
allExpected.putAll(expected);
}
// test the instance level method
for (Map.Entry<ProgramRunId, RunRecordDetail> activeRun : store.getActiveRuns(x -> true).entrySet()) {
ProgramId programId = activeRun.getKey().getParent();
Assert.assertTrue("Unexpected program returned: " + programId, allActual.containsKey(activeRun.getKey().getParent()));
allActual.get(programId).add(activeRun.getValue().getStatus());
}
Assert.assertEquals(allExpected, allActual);
// test the count-all method
Assert.assertEquals(store.getActiveRuns(x -> true).size(), store.countActiveRuns(null));
Assert.assertEquals(store.getActiveRuns(x -> true).size(), store.countActiveRuns(100));
Assert.assertEquals(2, store.countActiveRuns(2));
});
// check active runs per app
for (ApplicationId app : apps) {
TransactionRunners.run(transactionRunner, context -> {
AppMetadataStore store = AppMetadataStore.create(context);
Map<ProgramRunId, RunRecordDetail> activeRuns = store.getActiveRuns(app);
// we expect 3 runs per program, with 2 programs in each app
Map<ProgramId, Set<ProgramRunStatus>> expected = new HashMap<>();
expected.put(app.mr(program1), activeStates);
expected.put(app.mr(program2), activeStates);
Map<ProgramId, Set<ProgramRunStatus>> actual = new HashMap<>();
actual.put(app.mr(program1), new HashSet<>());
actual.put(app.mr(program2), new HashSet<>());
for (Map.Entry<ProgramRunId, RunRecordDetail> activeRun : activeRuns.entrySet()) {
ProgramId programId = activeRun.getKey().getParent();
Assert.assertTrue("Unexpected program returned: " + programId, actual.containsKey(activeRun.getKey().getParent()));
actual.get(programId).add(activeRun.getValue().getStatus());
}
Assert.assertEquals(expected, actual);
});
}
// check active runs per program
for (ProgramId program : programs) {
TransactionRunners.run(transactionRunner, context -> {
AppMetadataStore store = AppMetadataStore.create(context);
Map<ProgramRunId, RunRecordDetail> activeRuns = store.getActiveRuns(program);
Set<ProgramRunStatus> actual = new HashSet<>();
for (Map.Entry<ProgramRunId, RunRecordDetail> activeRun : activeRuns.entrySet()) {
Assert.assertEquals(program, activeRun.getKey().getParent());
actual.add(activeRun.getValue().getStatus());
}
Assert.assertEquals(activeStates, actual);
});
}
}
use of io.cdap.cdap.proto.ProgramRunStatus in project cdap by caskdata.
the class AppMetadataStore method recordProgramSuspendResume.
private RunRecordDetail recordProgramSuspendResume(ProgramRunId programRunId, byte[] sourceId, RunRecordDetail existing, String action, long timestamp) throws IOException {
ProgramRunStatus toStatus = ProgramRunStatus.SUSPENDED;
if (action.equals("resume")) {
toStatus = ProgramRunStatus.RUNNING;
}
// Delete the old run record
delete(existing);
List<Field<?>> key = getProgramRunInvertedTimeKey(TYPE_RUN_RECORD_ACTIVE, programRunId, existing.getStartTs());
RunRecordDetail.Builder builder = RunRecordDetail.builder(existing).setStatus(toStatus).setSourceId(sourceId);
if (timestamp != -1) {
if (action.equals("resume")) {
builder.setResumeTime(timestamp);
} else {
builder.setSuspendTime(timestamp);
}
}
RunRecordDetail meta = builder.build();
writeToRunRecordTableWithPrimaryKeys(key, meta);
LOG.trace("Recorded {} for program {}", toStatus, programRunId);
return meta;
}
use of io.cdap.cdap.proto.ProgramRunStatus in project cdap by caskdata.
the class DefaultStore method getRuns.
@Override
public List<ProgramHistory> getRuns(Collection<ProgramId> programs, ProgramRunStatus status, long startTime, long endTime, int limitPerProgram) {
return TransactionRunners.run(transactionRunner, context -> {
List<ProgramHistory> result = new ArrayList<>(programs.size());
AppMetadataStore appMetadataStore = getAppMetadataStore(context);
Set<ProgramId> existingPrograms = appMetadataStore.filterProgramsExistence(programs);
for (ProgramId programId : programs) {
if (!existingPrograms.contains(programId)) {
result.add(new ProgramHistory(programId, Collections.emptyList(), new ProgramNotFoundException(programId)));
continue;
}
List<RunRecord> runs = appMetadataStore.getRuns(programId, status, startTime, endTime, limitPerProgram, null).values().stream().map(record -> RunRecord.builder(record).build()).collect(Collectors.toList());
result.add(new ProgramHistory(programId, runs, null));
}
return result;
});
}
use of io.cdap.cdap.proto.ProgramRunStatus in project cdap by caskdata.
the class AppMetadataStore method addWorkflowNodeState.
private void addWorkflowNodeState(ProgramRunId programRunId, Map<String, String> systemArgs, ProgramRunStatus status, @Nullable BasicThrowable failureCause, byte[] sourceId) throws IOException {
String workflowNodeId = systemArgs.get(ProgramOptionConstants.WORKFLOW_NODE_ID);
String workflowName = systemArgs.get(ProgramOptionConstants.WORKFLOW_NAME);
String workflowRun = systemArgs.get(ProgramOptionConstants.WORKFLOW_RUN_ID);
ApplicationId appId = programRunId.getParent().getParent();
ProgramRunId workflowRunId = appId.workflow(workflowName).run(workflowRun);
// Get the run record of the Workflow which started this program
List<Field<?>> runRecordFields = getProgramRunInvertedTimeKey(TYPE_RUN_RECORD_ACTIVE, workflowRunId, RunIds.getTime(workflowRun, TimeUnit.SECONDS));
RunRecordDetail record = getRunRecordsTable().read(runRecordFields).map(AppMetadataStore::deserializeRunRecordMeta).orElse(null);
// If the workflow is gone, just ignore the update
if (record == null) {
return;
}
List<Field<?>> primaryKeys = getWorkflowPrimaryKeys(workflowRunId, workflowNodeId);
WorkflowNodeStateDetail nodeState = getWorkflowNodeStateTable().read(primaryKeys).map(r -> r.getString(StoreDefinition.AppMetadataStore.NODE_STATE_DATA)).map(f -> GSON.fromJson(f, WorkflowNodeStateDetail.class)).orElse(null);
// - the program runId is the same as the existing workflow state
if (status == ProgramRunStatus.STARTING || nodeState == null || programRunId.getRun().equals(nodeState.getRunId())) {
WorkflowNodeStateDetail nodeStateDetail = new WorkflowNodeStateDetail(workflowNodeId, ProgramRunStatus.toNodeStatus(status), programRunId.getRun(), failureCause);
writeToStructuredTableWithPrimaryKeys(primaryKeys, nodeStateDetail, getWorkflowNodeStateTable(), StoreDefinition.AppMetadataStore.NODE_STATE_DATA);
// Update the parent Workflow run record by adding node id and program run id in the properties
Map<String, String> properties = new HashMap<>(record.getProperties());
properties.put(workflowNodeId, programRunId.getRun());
writeToRunRecordTableWithPrimaryKeys(runRecordFields, RunRecordDetail.builder(record).setProperties(properties).setSourceId(sourceId).build());
}
}
use of io.cdap.cdap.proto.ProgramRunStatus in project cdap by caskdata.
the class RunRecordCorrectorService method doFixRunRecords.
/**
* Fix all the possible inconsistent states for RunRecords that shows it is in RUNNING state but actually not
* via check to {@link ProgramRuntimeService} for a type of CDAP program.
*
* @return the set of fixed {@link ProgramRunId}.
*/
private Set<ProgramRunId> doFixRunRecords() {
LOG.trace("Start getting run records not actually running ...");
// Get run records in STARTING, RUNNING and SUSPENDED states that are actually not running
// Do it in micro batches of transactions to avoid tx timeout
Set<ProgramRunId> fixedPrograms = new HashSet<>();
Predicate<RunRecordDetail> filter = createFilter(fixedPrograms);
for (ProgramRunStatus status : NOT_STOPPED_STATUSES) {
while (true) {
// runs are not guaranteed to come back in order of start time, so need to scan the entire time range
// each time. Should not be worse in performance than specifying a more restrictive time range
// because time range is just used as a read-time filter.
Map<ProgramRunId, RunRecordDetail> runs = store.getRuns(status, 0L, Long.MAX_VALUE, txBatchSize, filter);
LOG.trace("{} run records in {} state but are not actually running", runs.size(), status);
if (runs.isEmpty()) {
break;
}
for (RunRecordDetail record : runs.values()) {
ProgramRunId programRunId = record.getProgramRunId();
String msg = String.format("Fixed RunRecord for program run %s in %s state because it is actually not running", programRunId, record.getStatus());
programStateWriter.error(programRunId, new ProgramRunAbortedException(msg));
fixedPrograms.add(programRunId);
LOG.warn(msg);
}
}
}
if (fixedPrograms.isEmpty()) {
LOG.trace("No RunRecord found with status in {}, but the program are not actually running", NOT_STOPPED_STATUSES);
} else {
LOG.warn("Fixed {} RunRecords with status in {}, but the programs are not actually running", fixedPrograms.size(), NOT_STOPPED_STATUSES);
}
return fixedPrograms;
}
Aggregations