use of io.cdap.cdap.app.runtime.ProgramOptions in project cdap by caskdata.
the class ProvisioningService method provision.
/**
* Record that a cluster will be provisioned for a program run, returning a Runnable that will actually perform
* the cluster provisioning. This method must be run within a transaction.
* The task returned should only be executed after the transaction that ran this method has completed.
* Running the returned Runnable will start the actual task using an executor within this service so that it can be
* tracked and optionally cancelled using {@link #cancelProvisionTask(ProgramRunId)}. The caller does not need to
* submit the runnable using their own executor.
*
* @param provisionRequest the provision request
* @param context context for the transaction
* @return runnable that will actually execute the cluster provisioning
*/
public Runnable provision(ProvisionRequest provisionRequest, StructuredTableContext context) throws IOException, InterruptedException {
initializeLatch.await(120, TimeUnit.SECONDS);
ProgramRunId programRunId = provisionRequest.getProgramRunId();
ProgramOptions programOptions = provisionRequest.getProgramOptions();
Map<String, String> args = programOptions.getArguments().asMap();
String name = SystemArguments.getProfileProvisioner(args);
Provisioner provisioner = provisionerInfo.get().provisioners.get(name);
// any errors seen here will transition the state straight to deprovisioned since no cluster create was attempted
if (provisioner == null) {
runWithProgramLogging(programRunId, args, () -> LOG.error("Could not provision cluster for the run because provisioner {} does not exist.", name));
programStateWriter.error(programRunId, new IllegalStateException("Provisioner does not exist."));
provisionerNotifier.deprovisioned(programRunId);
return () -> {
};
}
// get plugin requirement information and check for capability to run on the provisioner
Set<PluginRequirement> requirements = GSON.fromJson(args.get(ProgramOptionConstants.PLUGIN_REQUIREMENTS), PLUGIN_REQUIREMENT_SET_TYPE);
if (requirements != null) {
Set<PluginRequirement> unfulfilledRequirements = getUnfulfilledRequirements(provisioner.getCapabilities(), requirements);
if (!unfulfilledRequirements.isEmpty()) {
runWithProgramLogging(programRunId, args, () -> LOG.error(String.format("'%s' cannot be run using profile '%s' because the profile does not met all " + "plugin requirements. Following requirements were not meet by the listed " + "plugins: '%s'", programRunId.getProgram(), name, groupByRequirement(unfulfilledRequirements))));
programStateWriter.error(programRunId, new IllegalArgumentException("Provisioner does not meet all the " + "requirements for the program to run."));
provisionerNotifier.deprovisioned(programRunId);
return () -> {
};
}
}
Map<String, String> properties = SystemArguments.getProfileProperties(args);
ProvisioningOp provisioningOp = new ProvisioningOp(ProvisioningOp.Type.PROVISION, ProvisioningOp.Status.REQUESTING_CREATE);
ProvisioningTaskInfo provisioningTaskInfo = new ProvisioningTaskInfo(programRunId, provisionRequest.getProgramDescriptor(), programOptions, properties, name, provisionRequest.getUser(), provisioningOp, createKeysDirectory(programRunId).toURI(), null);
ProvisionerTable provisionerTable = new ProvisionerTable(context);
provisionerTable.putTaskInfo(provisioningTaskInfo);
return createProvisionTask(provisioningTaskInfo, provisioner);
}
use of io.cdap.cdap.app.runtime.ProgramOptions in project cdap by caskdata.
the class ProvisioningService method createProvisionTask.
private Runnable createProvisionTask(ProvisioningTaskInfo taskInfo, Provisioner provisioner) {
ProgramRunId programRunId = taskInfo.getProgramRunId();
ProgramOptions programOptions = taskInfo.getProgramOptions();
Map<String, String> systemArgs = programOptions.getArguments().asMap();
ProvisionerContext context;
try {
SSHContext sshContext = new DefaultSSHContext(Networks.getAddress(cConf, Constants.NETWORK_PROXY_ADDRESS), locationFactory.create(taskInfo.getSecureKeysDir()), createSSHKeyPair(taskInfo));
context = createContext(cConf, programOptions, programRunId, taskInfo.getUser(), taskInfo.getProvisionerProperties(), sshContext);
} catch (IOException e) {
runWithProgramLogging(taskInfo.getProgramRunId(), systemArgs, () -> LOG.error("Failed to load ssh key. The run will be marked as failed.", e));
programStateWriter.error(programRunId, new IllegalStateException("Failed to load ssh key.", e));
provisionerNotifier.deprovisioning(taskInfo.getProgramRunId());
return () -> {
};
} catch (InvalidMacroException e) {
runWithProgramLogging(taskInfo.getProgramRunId(), systemArgs, () -> LOG.error("Could not evaluate macros while provisoning. " + "The run will be marked as failed.", e));
programStateWriter.error(programRunId, new IllegalStateException("Could not evaluate macros while provisioning", e));
provisionerNotifier.deprovisioning(taskInfo.getProgramRunId());
return () -> {
};
}
// TODO: (CDAP-13246) pick up timeout from profile instead of hardcoding
ProvisioningTask task = new ProvisionTask(taskInfo, transactionRunner, provisioner, context, provisionerNotifier, programStateWriter, 300);
ProvisioningTaskKey taskKey = new ProvisioningTaskKey(programRunId, ProvisioningOp.Type.PROVISION);
return () -> taskExecutor.submit(taskKey, () -> callWithProgramLogging(programRunId, systemArgs, () -> {
try {
return task.executeOnce();
} catch (InterruptedException e) {
LOG.debug("Provision task for program run {} interrupted.", taskInfo.getProgramRunId());
throw e;
} catch (Exception e) {
LOG.info("Provision task for program run {} failed.", taskInfo.getProgramRunId(), e);
throw e;
}
}));
}
use of io.cdap.cdap.app.runtime.ProgramOptions in project cdap by caskdata.
the class RuntimeServiceMainTest method testRuntimeService.
@Test
public void testRuntimeService() throws Exception {
ArtifactId artifactId = NamespaceId.DEFAULT.artifact("test", "1.0");
ProgramRunId programRunId = NamespaceId.DEFAULT.app("app").worker("worker").run(RunIds.generate());
Map<String, String> systemArgs = ImmutableMap.of(SystemArguments.PROFILE_PROVISIONER, NativeProvisioner.SPEC.getName(), SystemArguments.PROFILE_NAME, "default");
ProgramOptions programOptions = new SimpleProgramOptions(programRunId.getParent(), new BasicArguments(systemArgs), new BasicArguments());
ProgramDescriptor programDescriptor = new ProgramDescriptor(programRunId.getParent(), null, artifactId);
// Write out program state events to simulate program start
Injector appFabricInjector = getServiceMainInstance(AppFabricServiceMain.class).getInjector();
CConfiguration cConf = appFabricInjector.getInstance(CConfiguration.class);
ProgramStatePublisher programStatePublisher = new MessagingProgramStatePublisher(appFabricInjector.getInstance(MessagingService.class), NamespaceId.SYSTEM.topic(cConf.get(Constants.AppFabric.PROGRAM_STATUS_RECORD_EVENT_TOPIC)), RetryStrategies.fromConfiguration(cConf, "system.program.state."));
new MessagingProgramStateWriter(programStatePublisher).start(programRunId, programOptions, null, programDescriptor);
Injector injector = getServiceMainInstance(RuntimeServiceMain.class).getInjector();
TransactionRunner txRunner = injector.getInstance(TransactionRunner.class);
// Should see a STARTING record in the runtime store
Tasks.waitFor(ProgramRunStatus.STARTING, () -> {
RunRecordDetail detail = TransactionRunners.run(txRunner, context -> {
return AppMetadataStore.create(context).getRun(programRunId);
});
return detail == null ? null : detail.getStatus();
}, 5, TimeUnit.SECONDS);
ProgramStateWriter programStateWriter = createProgramStateWriter(injector, programRunId);
// Write a running state. We should see a RUNNING record in the runtime store
programStateWriter.running(programRunId, null);
Tasks.waitFor(ProgramRunStatus.RUNNING, () -> {
RunRecordDetail detail = TransactionRunners.run(txRunner, context -> {
return AppMetadataStore.create(context).getRun(programRunId);
});
return detail == null ? null : detail.getStatus();
}, 5, TimeUnit.SECONDS);
// Write a complete state. The run record should be removed in the runtime store
programStateWriter.completed(programRunId);
Tasks.waitFor(true, () -> TransactionRunners.run(txRunner, context -> AppMetadataStore.create(context).getRun(programRunId) == null), 5, TimeUnit.SECONDS);
}
use of io.cdap.cdap.app.runtime.ProgramOptions in project cdap by caskdata.
the class AbstractInMemoryProgramRunner method startAll.
/**
* Starts all instances of a Program component.
* @param program The program to run
* @param options options for the program
* @param numInstances number of component instances to start
*/
protected final ProgramController startAll(Program program, ProgramOptions options, int numInstances) {
RunId runId = ProgramRunners.getRunId(options);
Table<String, Integer, ProgramController> components = HashBasedTable.create();
try {
for (int instanceId = 0; instanceId < numInstances; instanceId++) {
ProgramOptions componentOptions = createComponentOptions(instanceId, numInstances, runId, options);
ProgramController controller = createProgramRunner().run(program, componentOptions);
components.put(program.getName(), instanceId, controller);
}
return new InMemoryProgramController(components, program, options);
} catch (Throwable t) {
LOG.error("Failed to start all program instances", t);
try {
// Need to stop all started components
Futures.successfulAsList(Iterables.transform(components.values(), new Function<ProgramController, ListenableFuture<?>>() {
@Override
public ListenableFuture<?> apply(ProgramController controller) {
return controller.stop();
}
})).get();
throw Throwables.propagate(t);
} catch (Exception e) {
LOG.error("Failed to stop all program instances upon startup failure.", e);
throw Throwables.propagate(e);
}
}
}
use of io.cdap.cdap.app.runtime.ProgramOptions in project cdap by caskdata.
the class ProgramNotificationSubscriberService method handleClusterEvent.
/**
* Handles a notification related to cluster operations.
*
* @param programRunId program run id from the event
* @param clusterStatus cluster status from the event
* @param notification the notification to process
* @param messageIdBytes the unique ID for the notification message
* @param appMetadataStore the data table to use
* @param context the table context for performing table operations
* @return an {@link Optional} of {@link Runnable} to carry a task to execute after handling of this event completed.
* See {@link #postProcess()} for details.
* @throws IOException if failed to read/write to the app metadata store.
*/
private Optional<Runnable> handleClusterEvent(ProgramRunId programRunId, ProgramRunClusterStatus clusterStatus, Notification notification, byte[] messageIdBytes, AppMetadataStore appMetadataStore, StructuredTableContext context) throws IOException, InterruptedException {
Map<String, String> properties = notification.getProperties();
ProgramOptions programOptions = ProgramOptions.fromNotification(notification, GSON);
String userId = properties.get(ProgramOptionConstants.USER_ID);
long endTs = getTimeSeconds(properties, ProgramOptionConstants.CLUSTER_END_TIME);
ProgramDescriptor programDescriptor = GSON.fromJson(properties.get(ProgramOptionConstants.PROGRAM_DESCRIPTOR), ProgramDescriptor.class);
switch(clusterStatus) {
case PROVISIONING:
appMetadataStore.recordProgramProvisioning(programRunId, programOptions.getUserArguments().asMap(), programOptions.getArguments().asMap(), messageIdBytes, programDescriptor.getArtifactId().toApiArtifactId());
ProvisionRequest provisionRequest = new ProvisionRequest(programRunId, programOptions, programDescriptor, userId);
return Optional.of(provisioningService.provision(provisionRequest, context));
case PROVISIONED:
Cluster cluster = GSON.fromJson(properties.get(ProgramOptionConstants.CLUSTER), Cluster.class);
RunRecordDetail runRecord = appMetadataStore.recordProgramProvisioned(programRunId, cluster.getNodes().size(), messageIdBytes);
// In that case, we end up with a provisioned message, but we don't want to start the program.
if (runRecord == null || runRecord.getStatus() == ProgramRunStatus.STOPPING || runRecord.getStatus() == ProgramRunStatus.KILLED) {
break;
}
// Update the ProgramOptions system arguments to include information needed for program execution
Map<String, String> systemArgs = new HashMap<>(programOptions.getArguments().asMap());
systemArgs.put(ProgramOptionConstants.USER_ID, properties.get(ProgramOptionConstants.USER_ID));
systemArgs.put(ProgramOptionConstants.CLUSTER, properties.get(ProgramOptionConstants.CLUSTER));
systemArgs.put(ProgramOptionConstants.SECURE_KEYS_DIR, properties.get(ProgramOptionConstants.SECURE_KEYS_DIR));
ProgramOptions newProgramOptions = new SimpleProgramOptions(programOptions.getProgramId(), new BasicArguments(systemArgs), programOptions.getUserArguments());
// Publish the program STARTING state before starting the program
programStateWriter.start(programRunId, newProgramOptions, null, programDescriptor);
// emit provisioning time metric
long provisioningTime = System.currentTimeMillis() / 1000 - RunIds.getTime(programRunId.getRun(), TimeUnit.SECONDS);
SystemArguments.getProfileIdFromArgs(programRunId.getNamespaceId(), systemArgs).ifPresent(profileId -> emitProvisioningTimeMetric(programRunId, profileId, programOptions, provisioningTime));
break;
case DEPROVISIONING:
RunRecordDetail recordedMeta = appMetadataStore.recordProgramDeprovisioning(programRunId, messageIdBytes);
// or an invalid state transition. In both cases, we should not try to deprovision the cluster.
if (recordedMeta != null) {
return Optional.of(provisioningService.deprovision(programRunId, context));
}
break;
case DEPROVISIONED:
appMetadataStore.recordProgramDeprovisioned(programRunId, endTs, messageIdBytes);
break;
case ORPHANED:
appMetadataStore.recordProgramOrphaned(programRunId, endTs, messageIdBytes);
break;
}
return Optional.empty();
}
Aggregations