Search in sources :

Example 16 with Cluster

use of io.cdap.cdap.runtime.spi.provisioner.Cluster in project cdap by caskdata.

the class DefaultRuntimeJob method run.

@Override
public void run(RuntimeJobEnvironment runtimeJobEnv) throws Exception {
    // Setup process wide settings
    Thread.setDefaultUncaughtExceptionHandler(new UncaughtExceptionHandler());
    SLF4JBridgeHandler.removeHandlersForRootLogger();
    SLF4JBridgeHandler.install();
    // Get Program Options
    ProgramOptions programOpts = readJsonFile(new File(DistributedProgramRunner.PROGRAM_OPTIONS_FILE_NAME), ProgramOptions.class);
    ProgramRunId programRunId = programOpts.getProgramId().run(ProgramRunners.getRunId(programOpts));
    ProgramId programId = programRunId.getParent();
    Arguments systemArgs = programOpts.getArguments();
    // Setup logging context for the program
    LoggingContextAccessor.setLoggingContext(LoggingContextHelper.getLoggingContextWithRunId(programRunId, systemArgs.asMap()));
    // Get the cluster launch type
    Cluster cluster = GSON.fromJson(systemArgs.getOption(ProgramOptionConstants.CLUSTER), Cluster.class);
    // Get App spec
    ApplicationSpecification appSpec = readJsonFile(new File(DistributedProgramRunner.APP_SPEC_FILE_NAME), ApplicationSpecification.class);
    ProgramDescriptor programDescriptor = new ProgramDescriptor(programId, appSpec);
    // Create injector and get program runner
    Injector injector = Guice.createInjector(createModules(runtimeJobEnv, createCConf(runtimeJobEnv, programOpts), programRunId, programOpts));
    CConfiguration cConf = injector.getInstance(CConfiguration.class);
    // Initialize log appender
    LogAppenderInitializer logAppenderInitializer = injector.getInstance(LogAppenderInitializer.class);
    logAppenderInitializer.initialize();
    SystemArguments.setLogLevel(programOpts.getUserArguments(), logAppenderInitializer);
    ProxySelector oldProxySelector = ProxySelector.getDefault();
    RuntimeMonitors.setupMonitoring(injector, programOpts);
    Deque<Service> coreServices = createCoreServices(injector, systemArgs, cluster);
    startCoreServices(coreServices);
    // regenerate app spec
    ConfiguratorFactory configuratorFactory = injector.getInstance(ConfiguratorFactory.class);
    try {
        Map<String, String> systemArguments = new HashMap<>(programOpts.getArguments().asMap());
        File pluginDir = new File(programOpts.getArguments().getOption(ProgramOptionConstants.PLUGIN_DIR, DistributedProgramRunner.PLUGIN_DIR));
        // create a directory to store plugin artifacts for the regeneration of app spec to fetch plugin artifacts
        DirUtils.mkdirs(pluginDir);
        if (!programOpts.getArguments().hasOption(ProgramOptionConstants.PLUGIN_DIR)) {
            systemArguments.put(ProgramOptionConstants.PLUGIN_DIR, DistributedProgramRunner.PLUGIN_DIR);
        }
        // remember the file names in the artifact folder before app regeneration
        List<String> pluginFiles = DirUtils.listFiles(pluginDir, File::isFile).stream().map(File::getName).collect(Collectors.toList());
        ApplicationSpecification generatedAppSpec = regenerateAppSpec(systemArguments, programOpts.getUserArguments().asMap(), programId, appSpec, programDescriptor, configuratorFactory);
        appSpec = generatedAppSpec != null ? generatedAppSpec : appSpec;
        programDescriptor = new ProgramDescriptor(programDescriptor.getProgramId(), appSpec);
        List<String> pluginFilesAfter = DirUtils.listFiles(pluginDir, File::isFile).stream().map(File::getName).collect(Collectors.toList());
        if (pluginFilesAfter.isEmpty()) {
            systemArguments.remove(ProgramOptionConstants.PLUGIN_DIR);
        }
        // recreate it from the folders
        if (!pluginFiles.equals(pluginFilesAfter)) {
            systemArguments.remove(ProgramOptionConstants.PLUGIN_ARCHIVE);
        }
        // update program options
        programOpts = new SimpleProgramOptions(programOpts.getProgramId(), new BasicArguments(systemArguments), programOpts.getUserArguments(), programOpts.isDebug());
    } catch (Exception e) {
        LOG.warn("Failed to regenerate the app spec for program {}, using the existing app spec", programId, e);
    }
    ProgramStateWriter programStateWriter = injector.getInstance(ProgramStateWriter.class);
    RuntimeClientService runtimeClientService = injector.getInstance(RuntimeClientService.class);
    CompletableFuture<ProgramController.State> programCompletion = new CompletableFuture<>();
    try {
        ProgramRunner programRunner = injector.getInstance(ProgramRunnerFactory.class).create(programId.getType());
        // Create and run the program. The program files should be present in current working directory.
        try (Program program = createProgram(cConf, programRunner, programDescriptor, programOpts)) {
            ProgramController controller = programRunner.run(program, programOpts);
            controllerFuture.complete(controller);
            runtimeClientService.onProgramStopRequested(controller::stop);
            controller.addListener(new AbstractListener() {

                @Override
                public void completed() {
                    programCompletion.complete(ProgramController.State.COMPLETED);
                }

                @Override
                public void killed() {
                    // Write an extra state to make sure there is always a terminal state even
                    // if the program application run failed to write out the state.
                    programStateWriter.killed(programRunId);
                    programCompletion.complete(ProgramController.State.KILLED);
                }

                @Override
                public void error(Throwable cause) {
                    // Write an extra state to make sure there is always a terminal state even
                    // if the program application run failed to write out the state.
                    programStateWriter.error(programRunId, cause);
                    programCompletion.completeExceptionally(cause);
                }
            }, Threads.SAME_THREAD_EXECUTOR);
            if (stopRequested) {
                controller.stop();
            }
            // Block on the completion
            programCompletion.get();
        } finally {
            if (programRunner instanceof Closeable) {
                Closeables.closeQuietly((Closeable) programRunner);
            }
        }
    } catch (Throwable t) {
        controllerFuture.completeExceptionally(t);
        if (!programCompletion.isDone()) {
            // We log here so that the logs would still send back to the program logs collection.
            // Only log if the program completion is not done.
            // Otherwise the program runner itself should have logged the error.
            LOG.error("Failed to execute program {}", programRunId, t);
            // If the program completion is not done, then this exception
            // is due to systematic failure in which fail to run the program.
            // We write out an extra error state for the program to make sure the program state get transited.
            programStateWriter.error(programRunId, t);
        }
        throw t;
    } finally {
        stopCoreServices(coreServices, logAppenderInitializer);
        ProxySelector.setDefault(oldProxySelector);
        Authenticator.setDefault(null);
        runCompletedLatch.countDown();
    }
}
Also used : ApplicationSpecification(io.cdap.cdap.api.app.ApplicationSpecification) ConfiguratorFactory(io.cdap.cdap.internal.app.deploy.ConfiguratorFactory) HashMap(java.util.HashMap) Closeable(java.io.Closeable) ProgramRunnerFactory(io.cdap.cdap.app.runtime.ProgramRunnerFactory) DefaultProgramRunnerFactory(io.cdap.cdap.app.guice.DefaultProgramRunnerFactory) ProxySelector(java.net.ProxySelector) LogAppenderInitializer(io.cdap.cdap.logging.appender.LogAppenderInitializer) CompletableFuture(java.util.concurrent.CompletableFuture) ProgramStateWriter(io.cdap.cdap.app.runtime.ProgramStateWriter) MessagingProgramStateWriter(io.cdap.cdap.internal.app.program.MessagingProgramStateWriter) Injector(com.google.inject.Injector) AbstractListener(io.cdap.cdap.internal.app.runtime.AbstractListener) ProgramDescriptor(io.cdap.cdap.app.program.ProgramDescriptor) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) UncaughtExceptionHandler(io.cdap.cdap.common.logging.common.UncaughtExceptionHandler) DistributedProgramRunner(io.cdap.cdap.internal.app.runtime.distributed.DistributedProgramRunner) DistributedMapReduceProgramRunner(io.cdap.cdap.internal.app.runtime.distributed.DistributedMapReduceProgramRunner) DistributedWorkerProgramRunner(io.cdap.cdap.internal.app.runtime.distributed.DistributedWorkerProgramRunner) ProgramRunner(io.cdap.cdap.app.runtime.ProgramRunner) DistributedWorkflowProgramRunner(io.cdap.cdap.internal.app.runtime.distributed.DistributedWorkflowProgramRunner) RuntimeClientService(io.cdap.cdap.internal.app.runtime.monitor.RuntimeClientService) ProgramController(io.cdap.cdap.app.runtime.ProgramController) Program(io.cdap.cdap.app.program.Program) Arguments(io.cdap.cdap.app.runtime.Arguments) SystemArguments(io.cdap.cdap.internal.app.runtime.SystemArguments) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) Cluster(io.cdap.cdap.runtime.spi.provisioner.Cluster) RuntimeClientService(io.cdap.cdap.internal.app.runtime.monitor.RuntimeClientService) Service(com.google.common.util.concurrent.Service) ProfileMetricService(io.cdap.cdap.internal.profile.ProfileMetricService) LogAppenderLoaderService(io.cdap.cdap.logging.appender.loader.LogAppenderLoaderService) MessagingService(io.cdap.cdap.messaging.MessagingService) AbstractIdleService(com.google.common.util.concurrent.AbstractIdleService) MessagingHttpService(io.cdap.cdap.messaging.server.MessagingHttpService) MetricsCollectionService(io.cdap.cdap.api.metrics.MetricsCollectionService) ProgramId(io.cdap.cdap.proto.id.ProgramId) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) SimpleProgramOptions(io.cdap.cdap.internal.app.runtime.SimpleProgramOptions) ProgramOptions(io.cdap.cdap.app.runtime.ProgramOptions) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) SimpleProgramOptions(io.cdap.cdap.internal.app.runtime.SimpleProgramOptions) File(java.io.File)

Example 17 with Cluster

use of io.cdap.cdap.runtime.spi.provisioner.Cluster in project cdap by caskdata.

the class SSHRemoteProcessController method isRunning.

@Override
public boolean isRunning() throws Exception {
    // Try to SSH into the host and see if the CDAP runtime process is running or not
    try (SSHSession session = new DefaultSSHSession(sshConfig)) {
        SSHProcess process = session.execute("pgrep -f -- -Dcdap.runid=" + programRunId.getRun());
        // Reading will be blocked until the process finished.
        // The output is not needed, just read it to avoid filling up the network buffer.
        ByteStreams.toByteArray(process.getInputStream());
        ByteStreams.toByteArray(process.getErrorStream());
        int exitCode = process.waitFor();
        if (exitCode != 0) {
            LOG.info("Received exit code {} when checking for remote process for program run {}.", exitCode, programRunId);
        }
        return exitCode == 0;
    } catch (IOException e) {
        // If there is error performing SSH, check if the cluster still exist and running
        LOG.debug("Failed to use SSH to determine if the remote process is running for {}. Check cluster status instead.", programRunId, e);
        Cluster cluster = GSON.fromJson(programOpts.getArguments().getOption(ProgramOptionConstants.CLUSTER), Cluster.class);
        String userId = programOpts.getArguments().getOption(ProgramOptionConstants.USER_ID);
        ClusterStatus clusterStatus = provisioningService.getClusterStatus(programRunId, programOpts, cluster, userId);
        // The cluster status has to be RUNNING in order for the remote process still has a chance that is running
        return clusterStatus == ClusterStatus.RUNNING;
    }
}
Also used : SSHSession(io.cdap.cdap.runtime.spi.ssh.SSHSession) DefaultSSHSession(io.cdap.cdap.common.ssh.DefaultSSHSession) Cluster(io.cdap.cdap.runtime.spi.provisioner.Cluster) IOException(java.io.IOException) DefaultSSHSession(io.cdap.cdap.common.ssh.DefaultSSHSession) SSHProcess(io.cdap.cdap.runtime.spi.ssh.SSHProcess) ClusterStatus(io.cdap.cdap.runtime.spi.provisioner.ClusterStatus)

Example 18 with Cluster

use of io.cdap.cdap.runtime.spi.provisioner.Cluster in project cdap by caskdata.

the class ProvisioningService method createSSHKeyPair.

/**
 * Creates a {@link SSHKeyPair} based on the given {@link ProvisioningTaskInfo}.
 *
 * @param taskInfo the task info containing information about the ssh keys
 * @return a {@link SSHKeyPair} or {@code null} if ssh key information are not present in the task info
 */
@Nullable
private SSHKeyPair createSSHKeyPair(ProvisioningTaskInfo taskInfo) throws IOException {
    // Check if there is ssh user property in the Cluster
    String sshUser = Optional.ofNullable(taskInfo.getCluster()).map(Cluster::getProperties).map(p -> p.get(Constants.RuntimeMonitor.SSH_USER)).orElse(null);
    if (sshUser == null) {
        return null;
    }
    Location keysDir = locationFactory.create(taskInfo.getSecureKeysDir());
    Location publicKeyLocation = keysDir.append(Constants.RuntimeMonitor.PUBLIC_KEY);
    Location privateKeyLocation = keysDir.append(Constants.RuntimeMonitor.PRIVATE_KEY);
    if (!publicKeyLocation.exists() || !privateKeyLocation.exists()) {
        return null;
    }
    return new LocationBasedSSHKeyPair(keysDir, sshUser);
}
Also used : TypeToken(com.google.gson.reflect.TypeToken) Provisioner(io.cdap.cdap.runtime.spi.provisioner.Provisioner) Inject(com.google.inject.Inject) LoggerFactory(org.slf4j.LoggerFactory) RetryStrategies(io.cdap.cdap.common.service.RetryStrategies) ProvisionerDetail(io.cdap.cdap.proto.provisioner.ProvisionerDetail) SparkCompatReader(io.cdap.cdap.internal.app.spark.SparkCompatReader) Future(java.util.concurrent.Future) ProgramStateWriter(io.cdap.cdap.app.runtime.ProgramStateWriter) Gson(com.google.gson.Gson) InvalidMacroException(io.cdap.cdap.api.macro.InvalidMacroException) Map(java.util.Map) MacroParserOptions(io.cdap.cdap.api.macro.MacroParserOptions) Cancellable(org.apache.twill.common.Cancellable) Capabilities(io.cdap.cdap.runtime.spi.provisioner.Capabilities) Threads(org.apache.twill.common.Threads) ProvisionerSpecification(io.cdap.cdap.runtime.spi.provisioner.ProvisionerSpecification) Collection(java.util.Collection) Set(java.util.Set) KeyedExecutor(io.cdap.cdap.common.async.KeyedExecutor) LoggingContextAccessor(io.cdap.cdap.common.logging.LoggingContextAccessor) ProvisioningTask(io.cdap.cdap.internal.provision.task.ProvisioningTask) StructuredTableContext(io.cdap.cdap.spi.data.StructuredTableContext) SecureStore(io.cdap.cdap.api.security.store.SecureStore) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) Executors(java.util.concurrent.Executors) LoggingContext(io.cdap.cdap.common.logging.LoggingContext) List(java.util.List) SecurityRequestContext(io.cdap.cdap.security.spi.authentication.SecurityRequestContext) Type(java.lang.reflect.Type) TransactionRunner(io.cdap.cdap.spi.data.transaction.TransactionRunner) Optional(java.util.Optional) Constants(io.cdap.cdap.common.conf.Constants) ProfileId(io.cdap.cdap.proto.id.ProfileId) ProgramOptionConstants(io.cdap.cdap.internal.app.runtime.ProgramOptionConstants) DeprovisionTask(io.cdap.cdap.internal.provision.task.DeprovisionTask) NotFoundException(io.cdap.cdap.common.NotFoundException) RuntimeJobManager(io.cdap.cdap.runtime.spi.runtimejob.RuntimeJobManager) Retries(io.cdap.cdap.common.service.Retries) RuntimeMonitorType(io.cdap.cdap.runtime.spi.RuntimeMonitorType) Cluster(io.cdap.cdap.runtime.spi.provisioner.Cluster) SSHKeyPair(io.cdap.cdap.runtime.spi.ssh.SSHKeyPair) Location(org.apache.twill.filesystem.Location) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) ProvisionerContext(io.cdap.cdap.runtime.spi.provisioner.ProvisionerContext) Loggers(io.cdap.cdap.common.logging.Loggers) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) ClusterStatus(io.cdap.cdap.runtime.spi.provisioner.ClusterStatus) SSHContext(io.cdap.cdap.runtime.spi.ssh.SSHContext) AtomicReference(java.util.concurrent.atomic.AtomicReference) MacroParser(io.cdap.cdap.internal.app.runtime.plugin.MacroParser) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) ProvisionTask(io.cdap.cdap.internal.provision.task.ProvisionTask) ProgramOptions(io.cdap.cdap.app.runtime.ProgramOptions) PluginRequirement(io.cdap.cdap.internal.pipeline.PluginRequirement) AbstractIdleService(com.google.common.util.concurrent.AbstractIdleService) SocketTimeoutException(java.net.SocketTimeoutException) ConnectException(java.net.ConnectException) SystemArguments(io.cdap.cdap.internal.app.runtime.SystemArguments) VersionInfo(io.cdap.cdap.runtime.spi.VersionInfo) ExecutorService(java.util.concurrent.ExecutorService) Nullable(javax.annotation.Nullable) RetryableProvisionException(io.cdap.cdap.runtime.spi.provisioner.RetryableProvisionException) LoggingContextHelper(io.cdap.cdap.logging.context.LoggingContextHelper) Logger(org.slf4j.Logger) SparkCompat(io.cdap.cdap.runtime.spi.SparkCompat) Networks(io.cdap.cdap.common.utils.Networks) Throwables(com.google.common.base.Throwables) IOException(java.io.IOException) LocationFactory(org.apache.twill.filesystem.LocationFactory) ProjectInfo(io.cdap.cdap.common.utils.ProjectInfo) ProvisionerSystemContext(io.cdap.cdap.runtime.spi.provisioner.ProvisionerSystemContext) MetricsCollectionService(io.cdap.cdap.api.metrics.MetricsCollectionService) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) Requirements(io.cdap.cdap.api.plugin.Requirements) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) LogSamplers(io.cdap.cdap.common.logging.LogSamplers) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Collections(java.util.Collections) MacroEvaluator(io.cdap.cdap.api.macro.MacroEvaluator) Cluster(io.cdap.cdap.runtime.spi.provisioner.Cluster) Location(org.apache.twill.filesystem.Location) Nullable(javax.annotation.Nullable)

Aggregations

Cluster (io.cdap.cdap.runtime.spi.provisioner.Cluster)18 HashMap (java.util.HashMap)8 IOException (java.io.IOException)6 SSHKeyPair (io.cdap.cdap.runtime.spi.ssh.SSHKeyPair)5 Test (org.junit.Test)5 ClusterStatus (io.cdap.cdap.runtime.spi.provisioner.ClusterStatus)4 RetryableProvisionException (io.cdap.cdap.runtime.spi.provisioner.RetryableProvisionException)4 SSHContext (io.cdap.cdap.runtime.spi.ssh.SSHContext)4 ProgramOptions (io.cdap.cdap.app.runtime.ProgramOptions)3 CConfiguration (io.cdap.cdap.common.conf.CConfiguration)3 SystemArguments (io.cdap.cdap.internal.app.runtime.SystemArguments)3 ProgramRunId (io.cdap.cdap.proto.id.ProgramRunId)3 RuntimeMonitorType (io.cdap.cdap.runtime.spi.RuntimeMonitorType)3 ProvisionerContext (io.cdap.cdap.runtime.spi.provisioner.ProvisionerContext)3 ProvisionerSpecification (io.cdap.cdap.runtime.spi.provisioner.ProvisionerSpecification)3 GeneralSecurityException (java.security.GeneralSecurityException)3 Map (java.util.Map)3 Optional (java.util.Optional)3 TimeUnit (java.util.concurrent.TimeUnit)3 Logger (org.slf4j.Logger)3