Search in sources :

Example 6 with ProcessResult

use of io.hops.hopsworks.common.util.ProcessResult in project hopsworks by logicalclocks.

the class LibraryInstaller method exportEnvironment.

public void exportEnvironment(Project project, Users user, String exportPath) throws IOException, ServiceException, ServiceDiscoveryException {
    ProcessDescriptor processDescriptor = new ProcessDescriptor.Builder().addCommand("/usr/bin/sudo").addCommand(prog).addCommand("export").addCommand(projectUtils.getFullDockerImageName(project, false)).redirectErrorStream(true).setWaitTimeout(30, TimeUnit.MINUTES).build();
    ProcessResult processResult = osProcessExecutor.execute(processDescriptor);
    if (processResult.getExitCode() != 0) {
        String errorMsg = "Could not create the docker image. Exit code: " + processResult.getExitCode() + " out: " + processResult.getStdout() + "\n err: " + processResult.getStderr() + "||\n";
        throw new IOException(errorMsg);
    } else {
        environmentController.uploadYmlInProject(project, user, processResult.getStdout(), exportPath);
    }
}
Also used : ProcessResult(io.hops.hopsworks.common.util.ProcessResult) ProcessDescriptor(io.hops.hopsworks.common.util.ProcessDescriptor) IOException(java.io.IOException)

Example 7 with ProcessResult

use of io.hops.hopsworks.common.util.ProcessResult in project hopsworks by logicalclocks.

the class LocalhostServingMonitor method monitor.

@Timeout
public void monitor(Timer timer) {
    try {
        // Get the list of running Localhost Serving instances
        List<Serving> servingList = servingFacade.getLocalhostRunning();
        for (Serving serving : servingList) {
            try {
                Serving dbServing = servingFacade.acquireLock(serving.getProject(), serving.getId());
                ProcessDescriptor.Builder builder = new ProcessDescriptor.Builder().addCommand("/usr/bin/sudo");
                if (serving.getModelServer() == ModelServer.TENSORFLOW_SERVING) {
                    builder.addCommand(tfScript);
                }
                if (serving.getModelServer() == ModelServer.PYTHON) {
                    builder.addCommand(sklearnScript);
                }
                ProcessDescriptor processDescriptor = builder.addCommand("alive").addCommand(dbServing.getProject().getName().toLowerCase()).addCommand(dbServing.getName()).ignoreOutErrStreams(true).build();
                LOGGER.log(Level.FINE, processDescriptor.toString());
                try {
                    ProcessResult processResult = osProcessExecutor.execute(processDescriptor);
                    if (processResult.getExitCode() != 0) {
                        // The processes is dead, run the kill script to delete the directory
                        // and update the value in the db
                        Path secretDir = Paths.get(settings.getStagingDir(), SERVING_DIRS + serving.getLocalDir());
                        builder = new ProcessDescriptor.Builder().addCommand("/usr/bin/sudo");
                        if (serving.getModelServer() == ModelServer.TENSORFLOW_SERVING) {
                            builder.addCommand(tfScript);
                        }
                        if (serving.getModelServer() == ModelServer.PYTHON) {
                            builder.addCommand(sklearnScript);
                        }
                        processDescriptor = builder.addCommand("kill").addCommand(dbServing.getCid()).addCommand(dbServing.getName()).addCommand(dbServing.getProject().getName().toLowerCase()).addCommand(secretDir.toString()).ignoreOutErrStreams(true).build();
                        LOGGER.log(Level.FINE, processDescriptor.toString());
                        osProcessExecutor.execute(processDescriptor);
                        // If the process succeeded to delete the localDir update the db
                        dbServing.setCid(CID_STOPPED);
                        dbServing.setLocalPort(-1);
                        servingFacade.updateDbObject(dbServing, dbServing.getProject());
                    }
                } catch (IOException e) {
                    LOGGER.log(Level.SEVERE, "Could not clean up serving instance with id: " + serving.getId(), e);
                }
                servingFacade.releaseLock(serving.getProject(), serving.getId());
            } catch (ServingException e) {
                LOGGER.log(Level.INFO, "Error processing serving instance with id: " + serving.getId(), e);
            }
        }
    } catch (Exception e) {
        LOGGER.log(Level.SEVERE, "Got an exception while monitoring servings", e);
    }
}
Also used : Serving(io.hops.hopsworks.persistence.entity.serving.Serving) Path(java.nio.file.Path) ServingException(io.hops.hopsworks.exceptions.ServingException) ProcessResult(io.hops.hopsworks.common.util.ProcessResult) ProcessDescriptor(io.hops.hopsworks.common.util.ProcessDescriptor) IOException(java.io.IOException) IOException(java.io.IOException) ServingException(io.hops.hopsworks.exceptions.ServingException) Timeout(javax.ejb.Timeout)

Example 8 with ProcessResult

use of io.hops.hopsworks.common.util.ProcessResult in project hopsworks by logicalclocks.

the class LocalhostTfServingController method startServingInstance.

/**
 * Starts a Tensorflow serving instance. Executes the tfserving bash script to launch a tensorflow serving
 * server as serving-user and localize the tf-model from HDFS server. It records the PID of the server for monitoring.
 *
 * @param project the project to start the serving in
 * @param user the user starting the serving
 * @param serving the serving instance to start (tfserving modelserver)
 * @throws ServingException
 */
public void startServingInstance(Project project, Users user, Serving serving) throws ServingException {
    String script = settings.getSudoersDir() + "/tfserving.sh";
    // TODO(Fabio) this is bad as we don't know if the port is used or not
    Integer grpcPort = ThreadLocalRandom.current().nextInt(40000, 59999);
    Integer restPort = ThreadLocalRandom.current().nextInt(40000, 59999);
    Path secretDir = Paths.get(settings.getStagingDir(), SERVING_DIRS + serving.getLocalDir());
    ProcessDescriptor processDescriptor;
    try {
        processDescriptor = new ProcessDescriptor.Builder().addCommand("/usr/bin/sudo").addCommand(script).addCommand("start").addCommand(serving.getName()).addCommand(Paths.get(serving.getModelPath(), serving.getModelVersion().toString()).toString()).addCommand(String.valueOf(grpcPort)).addCommand(String.valueOf(restPort)).addCommand(secretDir.toString()).addCommand(project.getName() + USER_NAME_DELIMITER + user.getUsername()).addCommand(serving.getBatchingConfiguration().isBatchingEnabled() ? "1" : "0").addCommand(project.getName().toLowerCase()).addCommand(projectUtils.getFullDockerImageName(project, true)).setWaitTimeout(2L, TimeUnit.MINUTES).ignoreOutErrStreams(false).build();
        logger.log(Level.INFO, processDescriptor.toString());
    } catch (ServiceDiscoveryException ex) {
        throw new ServingException(RESTCodes.ServingErrorCode.LIFECYCLEERRORINT, Level.SEVERE, null, ex.getMessage(), ex);
    }
    // Materialized TLS certificates to be able to read the model
    if (settings.getHopsRpcTls()) {
        try {
            certificateMaterializer.materializeCertificatesLocal(user.getUsername(), project.getName());
        } catch (IOException e) {
            throw new ServingException(RESTCodes.ServingErrorCode.LIFECYCLEERRORINT, Level.SEVERE, null, e.getMessage(), e);
        } finally {
            // Release lock on the serving entry
            servingFacade.releaseLock(project, serving.getId());
        }
    }
    try {
        ProcessResult processResult = osProcessExecutor.execute(processDescriptor);
        if (processResult.getExitCode() != 0) {
            // Startup process failed for some reason
            serving.setCid(CID_STOPPED);
            servingFacade.updateDbObject(serving, project);
            throw new ServingException(RESTCodes.ServingErrorCode.LIFECYCLEERRORINT, Level.INFO);
        }
        // Read the pid for TensorFlow Serving server
        Path cidFilePath = Paths.get(secretDir.toString(), "tfserving.pid");
        String cid = Files.readFirstLine(cidFilePath.toFile(), Charset.defaultCharset());
        // Update the info in the db
        serving.setCid(cid);
        serving.setLocalPort(restPort);
        serving.setDeployed(new Date());
        servingFacade.updateDbObject(serving, project);
    } catch (Exception ex) {
        // Startup process failed for some reason
        serving.setCid(CID_STOPPED);
        servingFacade.updateDbObject(serving, project);
        throw new ServingException(RESTCodes.ServingErrorCode.LIFECYCLEERRORINT, Level.SEVERE, null, ex.getMessage(), ex);
    } finally {
        if (settings.getHopsRpcTls()) {
            certificateMaterializer.removeCertificatesLocal(user.getUsername(), project.getName());
        }
        // release lock on the serving entry
        servingFacade.releaseLock(project, serving.getId());
    }
}
Also used : Path(java.nio.file.Path) ServingException(io.hops.hopsworks.exceptions.ServingException) ProcessResult(io.hops.hopsworks.common.util.ProcessResult) ProcessDescriptor(io.hops.hopsworks.common.util.ProcessDescriptor) ServiceDiscoveryException(com.logicalclocks.servicediscoverclient.exceptions.ServiceDiscoveryException) IOException(java.io.IOException) Date(java.util.Date) IOException(java.io.IOException) ServiceDiscoveryException(com.logicalclocks.servicediscoverclient.exceptions.ServiceDiscoveryException) ServingException(io.hops.hopsworks.exceptions.ServingException)

Example 9 with ProcessResult

use of io.hops.hopsworks.common.util.ProcessResult in project hopsworks by logicalclocks.

the class GitCommandOperationUtil method shutdownCommandService.

public void shutdownCommandService(GitRepository repository, GitOpExecution execution) {
    String cid = repository.getCid();
    try {
        gitRepositoryFacade.updateRepositoryCid(repository, null);
    } catch (Exception e) {
        LOGGER.log(Level.SEVERE, "Failed to update repository pid", e);
    }
    String gitHomePath = getGitHome(execution.getConfigSecret());
    String hdfsUsername = hdfsUsersController.getHdfsUserName(repository.getProject(), execution.getUser());
    String prog = settings.getSudoersDir() + "/git.sh";
    int exitValue = 0;
    ProcessDescriptor.Builder pdBuilder = new ProcessDescriptor.Builder().addCommand("/usr/bin/sudo").addCommand(prog).addCommand("kill").addCommand(gitHomePath).addCommand(cid).addCommand(hdfsUsername).redirectErrorStream(true).setWaitTimeout(10L, TimeUnit.SECONDS);
    try {
        ProcessResult processResult = osProcessExecutor.execute(pdBuilder.build());
        LOGGER.log(Level.FINE, processResult.getStdout());
        exitValue = processResult.getExitCode();
    } catch (IOException ex) {
        LOGGER.log(Level.SEVERE, "Failed to shutdown git container executing command for user " + hdfsUsername, ex);
    }
    if (exitValue != 0) {
        LOGGER.log(Level.SEVERE, "Exited with " + exitValue + "Failed to shutdown git container executing command for user " + hdfsUsername);
    }
    cleanUp(repository.getProject(), execution.getUser(), gitHomePath);
}
Also used : ProcessResult(io.hops.hopsworks.common.util.ProcessResult) ProcessDescriptor(io.hops.hopsworks.common.util.ProcessDescriptor) IOException(java.io.IOException) JSONException(org.json.JSONException) GitOpException(io.hops.hopsworks.exceptions.GitOpException) IOException(java.io.IOException) UserException(io.hops.hopsworks.exceptions.UserException)

Example 10 with ProcessResult

use of io.hops.hopsworks.common.util.ProcessResult in project hopsworks by logicalclocks.

the class AsynchronousGitCommandExecutor method execute.

@Asynchronous
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
public void execute(GitOpExecution gitOpExecution, GitPaths gitPaths) {
    int maxTries = 5;
    String pid = "";
    String gitCommand = gitOpExecution.getGitCommandConfiguration().getCommandType().getGitCommand();
    String prog = settings.getSudoersDir() + "/git.sh";
    String commandArgumentsFile = gitPaths.getConfDirPath() + File.separator + GitContainerLaunchScriptArgumentsTemplate.FILE_NAME;
    while (maxTries > 0 && Strings.isNullOrEmpty(pid)) {
        try {
            ProcessDescriptor processDescriptor = new ProcessDescriptor.Builder().addCommand("/usr/bin/sudo").addCommand(prog).addCommand("start").addCommand(commandArgumentsFile).redirectErrorStream(true).setCurrentWorkingDirectory(new File(gitPaths.getGitPath())).setWaitTimeout(60L, TimeUnit.SECONDS).build();
            String pidFile = gitPaths.getRunDirPath() + "/git.pid";
            ProcessResult processResult = osProcessExecutor.execute(processDescriptor);
            if (processResult.getExitCode() != 0) {
                String errorMsg = "Could not start git service to execute command " + gitCommand + " . " + "Exit code: " + processResult.getExitCode() + " Error: stdout: " + processResult.getStdout() + " stderr: " + processResult.getStderr();
                LOGGER.log(Level.SEVERE, errorMsg);
                throw new IOException(errorMsg);
            } else {
                pid = com.google.common.io.Files.readFirstLine(new File(pidFile), Charset.defaultCharset());
                // Get the updated repository
                Optional<GitRepository> optional = gitRepositoryFacade.findById(gitOpExecution.getRepository().getId());
                gitRepositoryFacade.updateRepositoryCid(optional.get(), pid);
            // gitOpExecutionFacade.updateState(gitOpExecution, GitOpExecutionState.SUBMITTED);
            }
        } catch (Exception ex) {
            LOGGER.log(Level.SEVERE, "Problem executing shell script to start git command service", ex);
            maxTries--;
        }
    }
    if (Strings.isNullOrEmpty(pid)) {
        updateExecutionStateToFail(gitOpExecution);
    }
}
Also used : GitRepository(io.hops.hopsworks.persistence.entity.git.GitRepository) ProcessResult(io.hops.hopsworks.common.util.ProcessResult) ProcessDescriptor(io.hops.hopsworks.common.util.ProcessDescriptor) IOException(java.io.IOException) File(java.io.File) IOException(java.io.IOException) Asynchronous(javax.ejb.Asynchronous) TransactionAttribute(javax.ejb.TransactionAttribute)

Aggregations

ProcessDescriptor (io.hops.hopsworks.common.util.ProcessDescriptor)24 ProcessResult (io.hops.hopsworks.common.util.ProcessResult)24 IOException (java.io.IOException)22 File (java.io.File)9 TransactionAttribute (javax.ejb.TransactionAttribute)5 ServiceException (io.hops.hopsworks.exceptions.ServiceException)4 ProjectException (io.hops.hopsworks.exceptions.ProjectException)3 ServingException (io.hops.hopsworks.exceptions.ServingException)3 Project (io.hops.hopsworks.persistence.entity.project.Project)3 BufferedWriter (java.io.BufferedWriter)3 FileWriter (java.io.FileWriter)3 Path (java.nio.file.Path)3 ServiceDiscoveryException (com.logicalclocks.servicediscoverclient.exceptions.ServiceDiscoveryException)2 CompressionInfo (io.hops.hopsworks.common.dataset.util.CompressionInfo)2 DatasetException (io.hops.hopsworks.exceptions.DatasetException)2 JobException (io.hops.hopsworks.exceptions.JobException)2 PythonException (io.hops.hopsworks.exceptions.PythonException)2 TensorBoardException (io.hops.hopsworks.exceptions.TensorBoardException)2 FileNotFoundException (java.io.FileNotFoundException)2 URISyntaxException (java.net.URISyntaxException)2