use of io.cdap.cdap.runtime.spi.provisioner.Cluster in project cdap by caskdata.
the class DefaultRuntimeJob method run.
@Override
public void run(RuntimeJobEnvironment runtimeJobEnv) throws Exception {
// Setup process wide settings
Thread.setDefaultUncaughtExceptionHandler(new UncaughtExceptionHandler());
SLF4JBridgeHandler.removeHandlersForRootLogger();
SLF4JBridgeHandler.install();
// Get Program Options
ProgramOptions programOpts = readJsonFile(new File(DistributedProgramRunner.PROGRAM_OPTIONS_FILE_NAME), ProgramOptions.class);
ProgramRunId programRunId = programOpts.getProgramId().run(ProgramRunners.getRunId(programOpts));
ProgramId programId = programRunId.getParent();
Arguments systemArgs = programOpts.getArguments();
// Setup logging context for the program
LoggingContextAccessor.setLoggingContext(LoggingContextHelper.getLoggingContextWithRunId(programRunId, systemArgs.asMap()));
// Get the cluster launch type
Cluster cluster = GSON.fromJson(systemArgs.getOption(ProgramOptionConstants.CLUSTER), Cluster.class);
// Get App spec
ApplicationSpecification appSpec = readJsonFile(new File(DistributedProgramRunner.APP_SPEC_FILE_NAME), ApplicationSpecification.class);
ProgramDescriptor programDescriptor = new ProgramDescriptor(programId, appSpec);
// Create injector and get program runner
Injector injector = Guice.createInjector(createModules(runtimeJobEnv, createCConf(runtimeJobEnv, programOpts), programRunId, programOpts));
CConfiguration cConf = injector.getInstance(CConfiguration.class);
// Initialize log appender
LogAppenderInitializer logAppenderInitializer = injector.getInstance(LogAppenderInitializer.class);
logAppenderInitializer.initialize();
SystemArguments.setLogLevel(programOpts.getUserArguments(), logAppenderInitializer);
ProxySelector oldProxySelector = ProxySelector.getDefault();
RuntimeMonitors.setupMonitoring(injector, programOpts);
Deque<Service> coreServices = createCoreServices(injector, systemArgs, cluster);
startCoreServices(coreServices);
// regenerate app spec
ConfiguratorFactory configuratorFactory = injector.getInstance(ConfiguratorFactory.class);
try {
Map<String, String> systemArguments = new HashMap<>(programOpts.getArguments().asMap());
File pluginDir = new File(programOpts.getArguments().getOption(ProgramOptionConstants.PLUGIN_DIR, DistributedProgramRunner.PLUGIN_DIR));
// create a directory to store plugin artifacts for the regeneration of app spec to fetch plugin artifacts
DirUtils.mkdirs(pluginDir);
if (!programOpts.getArguments().hasOption(ProgramOptionConstants.PLUGIN_DIR)) {
systemArguments.put(ProgramOptionConstants.PLUGIN_DIR, DistributedProgramRunner.PLUGIN_DIR);
}
// remember the file names in the artifact folder before app regeneration
List<String> pluginFiles = DirUtils.listFiles(pluginDir, File::isFile).stream().map(File::getName).collect(Collectors.toList());
ApplicationSpecification generatedAppSpec = regenerateAppSpec(systemArguments, programOpts.getUserArguments().asMap(), programId, appSpec, programDescriptor, configuratorFactory);
appSpec = generatedAppSpec != null ? generatedAppSpec : appSpec;
programDescriptor = new ProgramDescriptor(programDescriptor.getProgramId(), appSpec);
List<String> pluginFilesAfter = DirUtils.listFiles(pluginDir, File::isFile).stream().map(File::getName).collect(Collectors.toList());
if (pluginFilesAfter.isEmpty()) {
systemArguments.remove(ProgramOptionConstants.PLUGIN_DIR);
}
// recreate it from the folders
if (!pluginFiles.equals(pluginFilesAfter)) {
systemArguments.remove(ProgramOptionConstants.PLUGIN_ARCHIVE);
}
// update program options
programOpts = new SimpleProgramOptions(programOpts.getProgramId(), new BasicArguments(systemArguments), programOpts.getUserArguments(), programOpts.isDebug());
} catch (Exception e) {
LOG.warn("Failed to regenerate the app spec for program {}, using the existing app spec", programId, e);
}
ProgramStateWriter programStateWriter = injector.getInstance(ProgramStateWriter.class);
RuntimeClientService runtimeClientService = injector.getInstance(RuntimeClientService.class);
CompletableFuture<ProgramController.State> programCompletion = new CompletableFuture<>();
try {
ProgramRunner programRunner = injector.getInstance(ProgramRunnerFactory.class).create(programId.getType());
// Create and run the program. The program files should be present in current working directory.
try (Program program = createProgram(cConf, programRunner, programDescriptor, programOpts)) {
ProgramController controller = programRunner.run(program, programOpts);
controllerFuture.complete(controller);
runtimeClientService.onProgramStopRequested(controller::stop);
controller.addListener(new AbstractListener() {
@Override
public void completed() {
programCompletion.complete(ProgramController.State.COMPLETED);
}
@Override
public void killed() {
// Write an extra state to make sure there is always a terminal state even
// if the program application run failed to write out the state.
programStateWriter.killed(programRunId);
programCompletion.complete(ProgramController.State.KILLED);
}
@Override
public void error(Throwable cause) {
// Write an extra state to make sure there is always a terminal state even
// if the program application run failed to write out the state.
programStateWriter.error(programRunId, cause);
programCompletion.completeExceptionally(cause);
}
}, Threads.SAME_THREAD_EXECUTOR);
if (stopRequested) {
controller.stop();
}
// Block on the completion
programCompletion.get();
} finally {
if (programRunner instanceof Closeable) {
Closeables.closeQuietly((Closeable) programRunner);
}
}
} catch (Throwable t) {
controllerFuture.completeExceptionally(t);
if (!programCompletion.isDone()) {
// We log here so that the logs would still send back to the program logs collection.
// Only log if the program completion is not done.
// Otherwise the program runner itself should have logged the error.
LOG.error("Failed to execute program {}", programRunId, t);
// If the program completion is not done, then this exception
// is due to systematic failure in which fail to run the program.
// We write out an extra error state for the program to make sure the program state get transited.
programStateWriter.error(programRunId, t);
}
throw t;
} finally {
stopCoreServices(coreServices, logAppenderInitializer);
ProxySelector.setDefault(oldProxySelector);
Authenticator.setDefault(null);
runCompletedLatch.countDown();
}
}
use of io.cdap.cdap.runtime.spi.provisioner.Cluster in project cdap by caskdata.
the class SSHRemoteProcessController method isRunning.
@Override
public boolean isRunning() throws Exception {
// Try to SSH into the host and see if the CDAP runtime process is running or not
try (SSHSession session = new DefaultSSHSession(sshConfig)) {
SSHProcess process = session.execute("pgrep -f -- -Dcdap.runid=" + programRunId.getRun());
// Reading will be blocked until the process finished.
// The output is not needed, just read it to avoid filling up the network buffer.
ByteStreams.toByteArray(process.getInputStream());
ByteStreams.toByteArray(process.getErrorStream());
int exitCode = process.waitFor();
if (exitCode != 0) {
LOG.info("Received exit code {} when checking for remote process for program run {}.", exitCode, programRunId);
}
return exitCode == 0;
} catch (IOException e) {
// If there is error performing SSH, check if the cluster still exist and running
LOG.debug("Failed to use SSH to determine if the remote process is running for {}. Check cluster status instead.", programRunId, e);
Cluster cluster = GSON.fromJson(programOpts.getArguments().getOption(ProgramOptionConstants.CLUSTER), Cluster.class);
String userId = programOpts.getArguments().getOption(ProgramOptionConstants.USER_ID);
ClusterStatus clusterStatus = provisioningService.getClusterStatus(programRunId, programOpts, cluster, userId);
// The cluster status has to be RUNNING in order for the remote process still has a chance that is running
return clusterStatus == ClusterStatus.RUNNING;
}
}
use of io.cdap.cdap.runtime.spi.provisioner.Cluster in project cdap by caskdata.
the class ProvisioningService method createSSHKeyPair.
/**
* Creates a {@link SSHKeyPair} based on the given {@link ProvisioningTaskInfo}.
*
* @param taskInfo the task info containing information about the ssh keys
* @return a {@link SSHKeyPair} or {@code null} if ssh key information are not present in the task info
*/
@Nullable
private SSHKeyPair createSSHKeyPair(ProvisioningTaskInfo taskInfo) throws IOException {
// Check if there is ssh user property in the Cluster
String sshUser = Optional.ofNullable(taskInfo.getCluster()).map(Cluster::getProperties).map(p -> p.get(Constants.RuntimeMonitor.SSH_USER)).orElse(null);
if (sshUser == null) {
return null;
}
Location keysDir = locationFactory.create(taskInfo.getSecureKeysDir());
Location publicKeyLocation = keysDir.append(Constants.RuntimeMonitor.PUBLIC_KEY);
Location privateKeyLocation = keysDir.append(Constants.RuntimeMonitor.PRIVATE_KEY);
if (!publicKeyLocation.exists() || !privateKeyLocation.exists()) {
return null;
}
return new LocationBasedSSHKeyPair(keysDir, sshUser);
}
Aggregations