use of io.cdap.cdap.app.program.ProgramDescriptor in project cdap by caskdata.
the class ProgramNotificationSubscriberService method handleClusterEvent.
/**
* Handles a notification related to cluster operations.
*
* @param programRunId program run id from the event
* @param clusterStatus cluster status from the event
* @param notification the notification to process
* @param messageIdBytes the unique ID for the notification message
* @param appMetadataStore the data table to use
* @param context the table context for performing table operations
* @return an {@link Optional} of {@link Runnable} to carry a task to execute after handling of this event completed.
* See {@link #postProcess()} for details.
* @throws IOException if failed to read/write to the app metadata store.
*/
private Optional<Runnable> handleClusterEvent(ProgramRunId programRunId, ProgramRunClusterStatus clusterStatus, Notification notification, byte[] messageIdBytes, AppMetadataStore appMetadataStore, StructuredTableContext context) throws IOException {
Map<String, String> properties = notification.getProperties();
ProgramOptions programOptions = ProgramOptions.fromNotification(notification, GSON);
String userId = properties.get(ProgramOptionConstants.USER_ID);
long endTs = getTimeSeconds(properties, ProgramOptionConstants.CLUSTER_END_TIME);
ProgramDescriptor programDescriptor = GSON.fromJson(properties.get(ProgramOptionConstants.PROGRAM_DESCRIPTOR), ProgramDescriptor.class);
switch(clusterStatus) {
case PROVISIONING:
appMetadataStore.recordProgramProvisioning(programRunId, programOptions.getUserArguments().asMap(), programOptions.getArguments().asMap(), messageIdBytes, programDescriptor.getArtifactId().toApiArtifactId());
ProvisionRequest provisionRequest = new ProvisionRequest(programRunId, programOptions, programDescriptor, userId);
return Optional.of(provisioningService.provision(provisionRequest, context));
case PROVISIONED:
Cluster cluster = GSON.fromJson(properties.get(ProgramOptionConstants.CLUSTER), Cluster.class);
appMetadataStore.recordProgramProvisioned(programRunId, cluster.getNodes().size(), messageIdBytes);
// Update the ProgramOptions system arguments to include information needed for program execution
Map<String, String> systemArgs = new HashMap<>(programOptions.getArguments().asMap());
systemArgs.put(ProgramOptionConstants.USER_ID, properties.get(ProgramOptionConstants.USER_ID));
systemArgs.put(ProgramOptionConstants.CLUSTER, properties.get(ProgramOptionConstants.CLUSTER));
systemArgs.put(ProgramOptionConstants.SECURE_KEYS_DIR, properties.get(ProgramOptionConstants.SECURE_KEYS_DIR));
ProgramOptions newProgramOptions = new SimpleProgramOptions(programOptions.getProgramId(), new BasicArguments(systemArgs), programOptions.getUserArguments());
// Publish the program STARTING state before starting the program
programStateWriter.start(programRunId, newProgramOptions, null, programDescriptor);
// emit provisioning time metric
long provisioningTime = System.currentTimeMillis() / 1000 - RunIds.getTime(programRunId.getRun(), TimeUnit.SECONDS);
SystemArguments.getProfileIdFromArgs(programRunId.getNamespaceId(), systemArgs).ifPresent(profileId -> emitProvisioningTimeMetric(programRunId, profileId, programOptions, provisioningTime));
break;
case DEPROVISIONING:
RunRecordDetail recordedMeta = appMetadataStore.recordProgramDeprovisioning(programRunId, messageIdBytes);
// or an invalid state transition. In both cases, we should not try to deprovision the cluster.
if (recordedMeta != null) {
return Optional.of(provisioningService.deprovision(programRunId, context));
}
break;
case DEPROVISIONED:
appMetadataStore.recordProgramDeprovisioned(programRunId, endTs, messageIdBytes);
break;
case ORPHANED:
appMetadataStore.recordProgramOrphaned(programRunId, endTs, messageIdBytes);
break;
}
return Optional.empty();
}
use of io.cdap.cdap.app.program.ProgramDescriptor in project cdap by caskdata.
the class ProgramNotificationSubscriberService method handleProgramEvent.
private void handleProgramEvent(ProgramRunId programRunId, ProgramRunStatus programRunStatus, Notification notification, byte[] messageIdBytes, AppMetadataStore appMetadataStore, ProgramHeartbeatTable programHeartbeatTable, List<Runnable> runnables) throws Exception {
LOG.trace("Processing program status notification: {}", notification);
Map<String, String> properties = notification.getProperties();
String twillRunId = notification.getProperties().get(ProgramOptionConstants.TWILL_RUN_ID);
RunRecordDetail recordedRunRecord;
switch(programRunStatus) {
case STARTING:
try {
RunRecordDetail runRecordDetail = appMetadataStore.getRun(programRunId);
if (runRecordDetail != null && runRecordDetail.getStatus() != ProgramRunStatus.PENDING && runRecordDetail.getStatus() != ProgramRunStatus.STARTING) {
// This is an invalid state transition happening. Valid state transitions are:
// PENDING => STARTING : normal state transition
// STARTING => STARTING : state transition after app-fabric restart
LOG.debug("Ignoring unexpected request to transition program run {} from {} state to program " + "STARTING state.", programRunId, runRecordDetail.getStatus());
return;
}
} catch (IllegalStateException ex) {
LOG.error("Request to transition program run {} from non-existent state to program STARTING state " + "but multiple run IDs exist.", programRunId);
}
String systemArgumentsString = properties.get(ProgramOptionConstants.SYSTEM_OVERRIDES);
Map<String, String> systemArguments = systemArgumentsString == null ? Collections.emptyMap() : GSON.fromJson(systemArgumentsString, STRING_STRING_MAP);
boolean isInWorkflow = systemArguments.containsKey(ProgramOptionConstants.WORKFLOW_NAME);
boolean skipProvisioning = Boolean.parseBoolean(systemArguments.get(ProgramOptionConstants.SKIP_PROVISIONING));
ProgramOptions prgOptions = ProgramOptions.fromNotification(notification, GSON);
ProgramDescriptor prgDescriptor = GSON.fromJson(properties.get(ProgramOptionConstants.PROGRAM_DESCRIPTOR), ProgramDescriptor.class);
// state changes into Starting.
if (isInWorkflow || skipProvisioning) {
appMetadataStore.recordProgramProvisioning(programRunId, prgOptions.getUserArguments().asMap(), prgOptions.getArguments().asMap(), messageIdBytes, prgDescriptor.getArtifactId().toApiArtifactId());
appMetadataStore.recordProgramProvisioned(programRunId, 0, messageIdBytes);
} else {
runnables.add(() -> {
String oldUser = SecurityRequestContext.getUserId();
try {
SecurityRequestContext.setUserId(prgOptions.getArguments().getOption(ProgramOptionConstants.USER_ID));
try {
programLifecycleService.startInternal(prgDescriptor, prgOptions, programRunId);
} catch (Exception e) {
LOG.error("Failed to start program {}", programRunId, e);
programStateWriter.error(programRunId, e);
}
} finally {
SecurityRequestContext.setUserId(oldUser);
}
});
}
recordedRunRecord = appMetadataStore.recordProgramStart(programRunId, twillRunId, systemArguments, messageIdBytes);
writeToHeartBeatTable(recordedRunRecord, RunIds.getTime(programRunId.getRun(), TimeUnit.SECONDS), programHeartbeatTable);
break;
case RUNNING:
long logicalStartTimeSecs = getTimeSeconds(notification.getProperties(), ProgramOptionConstants.LOGICAL_START_TIME);
if (logicalStartTimeSecs == -1) {
LOG.warn("Ignore program running notification for program {} without {} specified, {}", programRunId, ProgramOptionConstants.LOGICAL_START_TIME, notification);
return;
}
recordedRunRecord = appMetadataStore.recordProgramRunning(programRunId, logicalStartTimeSecs, twillRunId, messageIdBytes);
writeToHeartBeatTable(recordedRunRecord, logicalStartTimeSecs, programHeartbeatTable);
runRecordMonitorService.removeRequest(programRunId, true);
long startDelayTime = logicalStartTimeSecs - RunIds.getTime(programRunId.getRun(), TimeUnit.SECONDS);
emitStartingTimeMetric(programRunId, startDelayTime);
break;
case SUSPENDED:
long suspendTime = getTimeSeconds(notification.getProperties(), ProgramOptionConstants.SUSPEND_TIME);
// since we are adding suspend time recently, there might be old suspended notifications for which time
// can be -1.
recordedRunRecord = appMetadataStore.recordProgramSuspend(programRunId, messageIdBytes, suspendTime);
writeToHeartBeatTable(recordedRunRecord, suspendTime, programHeartbeatTable);
break;
case RESUMING:
long resumeTime = getTimeSeconds(notification.getProperties(), ProgramOptionConstants.RESUME_TIME);
// since we are adding suspend time recently, there might be old suspended notifications for which time
// can be -1.
recordedRunRecord = appMetadataStore.recordProgramResumed(programRunId, messageIdBytes, resumeTime);
writeToHeartBeatTable(recordedRunRecord, resumeTime, programHeartbeatTable);
break;
case STOPPING:
Map<String, String> notificationProperties = notification.getProperties();
long stoppingTsSecs = getTimeSeconds(notificationProperties, ProgramOptionConstants.STOPPING_TIME);
if (stoppingTsSecs == -1L) {
LOG.warn("Ignore program stopping notification for program {} without {} specified, {}", programRunId, ProgramOptionConstants.STOPPING_TIME, notification);
return;
}
long terminateTsSecs = getTimeSeconds(notificationProperties, ProgramOptionConstants.TERMINATE_TIME);
recordedRunRecord = appMetadataStore.recordProgramStopping(programRunId, messageIdBytes, stoppingTsSecs, terminateTsSecs);
writeToHeartBeatTable(recordedRunRecord, stoppingTsSecs, programHeartbeatTable);
break;
case COMPLETED:
case KILLED:
case FAILED:
recordedRunRecord = handleProgramCompletion(appMetadataStore, programHeartbeatTable, programRunId, programRunStatus, notification, messageIdBytes, runnables);
break;
case REJECTED:
ProgramOptions programOptions = ProgramOptions.fromNotification(notification, GSON);
ProgramDescriptor programDescriptor = GSON.fromJson(properties.get(ProgramOptionConstants.PROGRAM_DESCRIPTOR), ProgramDescriptor.class);
recordedRunRecord = appMetadataStore.recordProgramRejected(programRunId, programOptions.getUserArguments().asMap(), programOptions.getArguments().asMap(), messageIdBytes, programDescriptor.getArtifactId().toApiArtifactId());
writeToHeartBeatTable(recordedRunRecord, RunIds.getTime(programRunId.getRun(), TimeUnit.SECONDS), programHeartbeatTable);
getEmitMetricsRunnable(programRunId, recordedRunRecord, Constants.Metrics.Program.PROGRAM_REJECTED_RUNS, null).ifPresent(runnables::add);
runRecordMonitorService.removeRequest(programRunId, true);
break;
default:
// This should not happen
LOG.error("Unsupported program status {} for program {}, {}", programRunStatus, programRunId, notification);
return;
}
if (recordedRunRecord != null) {
// We need to publish the message so that the trigger subscriber can pick it up and start the trigger if
// necessary
publishRecordedStatus(notification, programRunId, recordedRunRecord.getStatus());
// publish the deprovisioning event(s).
if (programRunStatus.isEndState() && programRunStatus != ProgramRunStatus.REJECTED) {
// if this is a preview run or a program within a workflow, we don't actually need to de-provision the cluster.
// instead, we just record the state as deprovisioned without notifying the provisioner
// and we will emit the program status metrics for it
boolean isInWorkflow = recordedRunRecord.getSystemArgs().containsKey(ProgramOptionConstants.WORKFLOW_NAME);
boolean skipProvisioning = Boolean.parseBoolean(recordedRunRecord.getSystemArgs().get(ProgramOptionConstants.SKIP_PROVISIONING));
if (isInWorkflow || skipProvisioning) {
appMetadataStore.recordProgramDeprovisioning(programRunId, messageIdBytes);
appMetadataStore.recordProgramDeprovisioned(programRunId, null, messageIdBytes);
} else {
provisionerNotifier.deprovisioning(programRunId);
}
}
}
}
use of io.cdap.cdap.app.program.ProgramDescriptor in project cdap by caskdata.
the class CoreSchedulerServiceTest method testProgramEvents.
@Test
@Category(XSlowTests.class)
public void testProgramEvents() throws Exception {
// Deploy the app
deploy(AppWithMultipleSchedules.class, 200);
CConfiguration cConf = getInjector().getInstance(CConfiguration.class);
TopicId programEventTopic = NamespaceId.SYSTEM.topic(cConf.get(Constants.AppFabric.PROGRAM_STATUS_RECORD_EVENT_TOPIC));
ProgramStateWriter programStateWriter = new MessagingProgramStateWriter(cConf, messagingService);
// These notifications should not trigger the program
ProgramRunId anotherWorkflowRun = ANOTHER_WORKFLOW.run(RunIds.generate());
ArtifactId artifactId = ANOTHER_WORKFLOW.getNamespaceId().artifact("test", "1.0").toApiArtifactId();
ApplicationSpecification appSpec = new DefaultApplicationSpecification(AppWithMultipleSchedules.NAME, ApplicationId.DEFAULT_VERSION, ProjectInfo.getVersion().toString(), "desc", null, artifactId, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap());
ProgramDescriptor programDescriptor = new ProgramDescriptor(anotherWorkflowRun.getParent(), appSpec);
BasicArguments systemArgs = new BasicArguments(ImmutableMap.of(ProgramOptionConstants.SKIP_PROVISIONING, Boolean.TRUE.toString()));
ProgramOptions programOptions = new SimpleProgramOptions(anotherWorkflowRun.getParent(), systemArgs, new BasicArguments(), false);
programStateWriter.start(anotherWorkflowRun, programOptions, null, programDescriptor);
programStateWriter.running(anotherWorkflowRun, null);
long lastProcessed = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis());
programStateWriter.error(anotherWorkflowRun, null);
waitUntilProcessed(programEventTopic, lastProcessed);
ProgramRunId someWorkflowRun = SOME_WORKFLOW.run(RunIds.generate());
programDescriptor = new ProgramDescriptor(someWorkflowRun.getParent(), appSpec);
programStateWriter.start(someWorkflowRun, new SimpleProgramOptions(someWorkflowRun.getParent(), systemArgs, new BasicArguments()), null, programDescriptor);
programStateWriter.running(someWorkflowRun, null);
lastProcessed = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis());
programStateWriter.killed(someWorkflowRun);
waitUntilProcessed(programEventTopic, lastProcessed);
Assert.assertEquals(0, getRuns(TRIGGERED_WORKFLOW, ProgramRunStatus.ALL));
// Enable the schedule
scheduler.enableSchedule(APP_MULT_ID.schedule(AppWithMultipleSchedules.WORKFLOW_COMPLETED_SCHEDULE));
// Start a program with user arguments
startProgram(ANOTHER_WORKFLOW, ImmutableMap.of(AppWithMultipleSchedules.ANOTHER_RUNTIME_ARG_KEY, AppWithMultipleSchedules.ANOTHER_RUNTIME_ARG_VALUE), 200);
// Wait for a completed run record
waitForCompleteRuns(1, TRIGGERED_WORKFLOW);
assertProgramRuns(TRIGGERED_WORKFLOW, ProgramRunStatus.COMPLETED, 1);
RunRecord run = getProgramRuns(TRIGGERED_WORKFLOW, ProgramRunStatus.COMPLETED).get(0);
Map<String, List<WorkflowTokenDetail.NodeValueDetail>> tokenData = getWorkflowToken(TRIGGERED_WORKFLOW, run.getPid(), null, null).getTokenData();
// There should be 2 entries in tokenData
Assert.assertEquals(2, tokenData.size());
// The value of TRIGGERED_RUNTIME_ARG_KEY should be ANOTHER_RUNTIME_ARG_VALUE from the triggering workflow
Assert.assertEquals(AppWithMultipleSchedules.ANOTHER_RUNTIME_ARG_VALUE, tokenData.get(AppWithMultipleSchedules.TRIGGERED_RUNTIME_ARG_KEY).get(0).getValue());
// The value of TRIGGERED_TOKEN_KEY should be ANOTHER_TOKEN_VALUE from the triggering workflow
Assert.assertEquals(AppWithMultipleSchedules.ANOTHER_TOKEN_VALUE, tokenData.get(AppWithMultipleSchedules.TRIGGERED_TOKEN_KEY).get(0).getValue());
}
use of io.cdap.cdap.app.program.ProgramDescriptor in project cdap by caskdata.
the class MapReduceTaskContextProvider method createProgram.
/**
* Creates a {@link Program} instance based on the information from the {@link MapReduceContextConfig}, using
* the given program ClassLoader.
*/
private Program createProgram(MapReduceContextConfig contextConfig, ClassLoader programClassLoader) {
Location programLocation;
LocationFactory locationFactory = new LocalLocationFactory();
// Use the program jar location regardless if local or distributed, since it is valid for both
programLocation = locationFactory.create(new File(contextConfig.getProgramJarName()).getAbsoluteFile().toURI());
return new DefaultProgram(new ProgramDescriptor(contextConfig.getProgramId(), contextConfig.getApplicationSpecification()), programLocation, programClassLoader);
}
use of io.cdap.cdap.app.program.ProgramDescriptor in project cdap by caskdata.
the class ProgramNotificationSubscriberServiceTest method testWorkflowInnerPrograms.
@Test
public void testWorkflowInnerPrograms() throws Exception {
AppFabricTestHelper.deployApplication(Id.Namespace.DEFAULT, ProgramStateWorkflowApp.class, null, cConf);
ProgramRunId workflowRunId = NamespaceId.DEFAULT.app(ProgramStateWorkflowApp.class.getSimpleName()).workflow(ProgramStateWorkflowApp.ProgramStateWorkflow.class.getSimpleName()).run(RunIds.generate());
ApplicationSpecification appSpec = TransactionRunners.run(transactionRunner, context -> {
return AppMetadataStore.create(context).getApplication(workflowRunId.getParent().getParent()).getSpec();
});
ProgramDescriptor programDescriptor = new ProgramDescriptor(workflowRunId.getParent(), appSpec);
// Start and run the workflow
Map<String, String> systemArgs = new HashMap<>();
systemArgs.put(ProgramOptionConstants.SKIP_PROVISIONING, Boolean.TRUE.toString());
systemArgs.put(SystemArguments.PROFILE_NAME, ProfileId.NATIVE.getScopedName());
programStateWriter.start(workflowRunId, new SimpleProgramOptions(workflowRunId.getParent(), new BasicArguments(systemArgs), new BasicArguments()), null, programDescriptor);
programStateWriter.running(workflowRunId, null);
ProgramRunId mrRunId = workflowRunId.getParent().getParent().mr(ProgramStateWorkflowApp.ProgramStateMR.class.getSimpleName()).run(RunIds.generate());
ProgramRunId sparkRunId = workflowRunId.getParent().getParent().spark(ProgramStateWorkflowApp.ProgramStateSpark.class.getSimpleName()).run(RunIds.generate());
ProgramId sparkId2 = workflowRunId.getParent().getParent().spark(ProgramStateWorkflowApp.ProgramStateSpark2.class.getSimpleName());
// Start and run the MR and Spark inside
for (ProgramRunId programRunId : Arrays.asList(mrRunId, sparkRunId)) {
workflowStateWriter.addWorkflowNodeState(workflowRunId, new WorkflowNodeStateDetail(programRunId.getProgram(), NodeStatus.STARTING));
workflowStateWriter.addWorkflowNodeState(workflowRunId, new WorkflowNodeStateDetail(programRunId.getProgram(), NodeStatus.RUNNING));
systemArgs = new HashMap<>(systemArgs);
systemArgs.put(ProgramOptionConstants.RUN_ID, programRunId.getRun());
systemArgs.put(ProgramOptionConstants.WORKFLOW_NAME, workflowRunId.getProgram());
systemArgs.put(ProgramOptionConstants.WORKFLOW_RUN_ID, workflowRunId.getRun());
systemArgs.put(ProgramOptionConstants.WORKFLOW_NODE_ID, programRunId.getProgram());
systemArgs.put(ProgramOptionConstants.PROGRAM_NAME_IN_WORKFLOW, programRunId.getProgram());
programStateWriter.start(programRunId, new SimpleProgramOptions(programRunId.getParent(), new BasicArguments(systemArgs), new BasicArguments()), null, programDescriptor);
programStateWriter.running(programRunId, null);
// Wait for the inner program running
Tasks.waitFor(ProgramRunStatus.RUNNING, () -> TransactionRunners.run(transactionRunner, context -> {
AppMetadataStore metadataStoreDataset = AppMetadataStore.create(context);
RunRecordDetail meta = metadataStoreDataset.getRun(programRunId);
if (meta == null) {
return null;
}
return meta.getStatus();
}), 10, TimeUnit.SECONDS);
}
// Stop the Spark normally
programStateWriter.completed(sparkRunId);
// Error out the Workflow without stopping the MR
programStateWriter.error(workflowRunId, new IllegalStateException("Explicitly error out"));
// Wait for the Workflow state changed to failed
Tasks.waitFor(ProgramRunStatus.FAILED, () -> TransactionRunners.run(transactionRunner, context -> {
AppMetadataStore metadataStoreDataset = AppMetadataStore.create(context);
RunRecordDetail meta = metadataStoreDataset.getRun(workflowRunId);
if (meta == null) {
return null;
}
return meta.getStatus();
}), 10000, TimeUnit.SECONDS);
// The MR run record should be changed to ERROR state as well (without race)
TransactionRunners.run(transactionRunner, context -> {
AppMetadataStore metadataStoreDataset = AppMetadataStore.create(context);
RunRecordDetail meta = metadataStoreDataset.getRun(mrRunId);
Assert.assertNotNull(meta);
Assert.assertEquals(ProgramRunStatus.FAILED, meta.getStatus());
});
// The Spark run record should stay as COMPLETED
TransactionRunners.run(transactionRunner, context -> {
AppMetadataStore metadataStoreDataset = AppMetadataStore.create(context);
RunRecordDetail meta = metadataStoreDataset.getRun(sparkRunId);
Assert.assertNotNull(meta);
Assert.assertEquals(ProgramRunStatus.COMPLETED, meta.getStatus());
});
// Since the Spark2 program hasn't been executed, there should be no run record
TransactionRunners.run(transactionRunner, context -> {
AppMetadataStore metadataStoreDataset = AppMetadataStore.create(context);
Map<ProgramRunId, RunRecordDetail> runs = metadataStoreDataset.getRuns(sparkId2, ProgramRunStatus.ALL, 0, Long.MAX_VALUE, 100, null);
Assert.assertTrue(runs.isEmpty());
});
}
Aggregations