Search in sources :

Example 16 with ProcessResult

use of io.hops.hopsworks.common.util.ProcessResult in project hopsworks by logicalclocks.

the class TensorBoardProcessMgr method killTensorBoard.

/**
 * Kill the TensorBoard process
 * @param tb
 * @return
 */
public int killTensorBoard(TensorBoard tb) {
    String prog = settings.getSudoersDir() + "/tensorboard.sh";
    int exitValue;
    ProcessDescriptor processDescriptor = new ProcessDescriptor.Builder().addCommand("/usr/bin/sudo").addCommand(prog).addCommand("kill").addCommand(tb.getCid()).ignoreOutErrStreams(true).build();
    LOGGER.log(Level.FINE, processDescriptor.toString());
    try {
        ProcessResult processResult = osProcessExecutor.execute(processDescriptor);
        if (!processResult.processExited()) {
            LOGGER.log(Level.SEVERE, "Failed to kill TensorBoard, process time-out");
        }
        exitValue = processResult.getExitCode();
    } catch (IOException ex) {
        exitValue = 2;
        LOGGER.log(Level.SEVERE, "Failed to kill TensorBoard", ex);
    }
    return exitValue;
}
Also used : ProcessResult(io.hops.hopsworks.common.util.ProcessResult) ProcessDescriptor(io.hops.hopsworks.common.util.ProcessDescriptor) IOException(java.io.IOException)

Example 17 with ProcessResult

use of io.hops.hopsworks.common.util.ProcessResult in project hopsworks by logicalclocks.

the class DatasetController method zip.

public void zip(Project project, Users user, Path path, Path destPath) throws DatasetException {
    String hdfsUser = hdfsUsersController.getHdfsUserName(project, user);
    checkFileExists(path, hdfsUser);
    CompressionInfo compressionInfo = new CompressionInfo(path, destPath);
    String stagingDir = settings.getStagingDir() + File.separator + compressionInfo.getStagingDirectory();
    File zipDir = new File(stagingDir);
    zipDir.mkdirs();
    settings.addZippingState(compressionInfo);
    ProcessDescriptor.Builder processDescriptorBuilder = new ProcessDescriptor.Builder().addCommand(settings.getHopsworksDomainDir() + "/bin/zip-background.sh").addCommand(stagingDir).addCommand(path.toString()).addCommand(hdfsUser);
    if (destPath != null) {
        processDescriptorBuilder.addCommand(destPath.toString());
    }
    ProcessDescriptor processDescriptor = processDescriptorBuilder.ignoreOutErrStreams(true).build();
    try {
        ProcessResult processResult = osProcessExecutor.execute(processDescriptor);
        int result = processResult.getExitCode();
        if (result == 2) {
            throw new DatasetException(RESTCodes.DatasetErrorCode.COMPRESSION_SIZE_ERROR, Level.WARNING);
        }
        if (result != 0) {
            throw new DatasetException(RESTCodes.DatasetErrorCode.COMPRESSION_ERROR, Level.WARNING, "path: " + path.toString() + ", result: " + result);
        }
    } catch (IOException ex) {
        throw new DatasetException(RESTCodes.DatasetErrorCode.COMPRESSION_ERROR, Level.SEVERE, "path: " + path.toString(), ex.getMessage(), ex);
    }
}
Also used : ProcessResult(io.hops.hopsworks.common.util.ProcessResult) ProcessDescriptor(io.hops.hopsworks.common.util.ProcessDescriptor) IOException(java.io.IOException) CompressionInfo(io.hops.hopsworks.common.dataset.util.CompressionInfo) File(java.io.File) DatasetException(io.hops.hopsworks.exceptions.DatasetException)

Example 18 with ProcessResult

use of io.hops.hopsworks.common.util.ProcessResult in project hopsworks by logicalclocks.

the class LocalhostSkLearnServingController method startServingInstance.

/**
 * Starts a SkLearn serving instance. Executes the sklearn bash script to launch a Flask server as serving-user
 * in the project's anaconda environment. It records the PID of the server for monitoring.
 *
 * @param project the project to start the serving in
 * @param user the user starting the serving
 * @param serving the serving instance to start (flask server)
 * @throws ServingException
 */
public void startServingInstance(Project project, Users user, Serving serving) throws ServingException {
    String script = settings.getSudoersDir() + "/sklearn_serving.sh";
    Integer port = ThreadLocalRandom.current().nextInt(40000, 59999);
    Path secretDir = Paths.get(settings.getStagingDir(), SERVING_DIRS + serving.getLocalDir());
    String predictorFilename = serving.getPredictor();
    if (serving.getPredictor().contains("/")) {
        String[] splits = serving.getPredictor().split("/");
        predictorFilename = splits[splits.length - 1];
    }
    try {
        ProcessDescriptor processDescriptor = new ProcessDescriptor.Builder().addCommand("/usr/bin/sudo").addCommand(script).addCommand("start").addCommand(predictorFilename).addCommand(Paths.get(serving.getPredictor()).toString()).addCommand(String.valueOf(port)).addCommand(secretDir.toString()).addCommand(project.getName() + USER_NAME_DELIMITER + user.getUsername()).addCommand(project.getName().toLowerCase()).addCommand(settings.getAnacondaProjectDir() + "/bin/python").addCommand(certificateMaterializer.getUserTransientKeystorePath(project, user)).addCommand(certificateMaterializer.getUserTransientTruststorePath(project, user)).addCommand(certificateMaterializer.getUserTransientPasswordPath(project, user)).addCommand(serving.getName()).addCommand(projectUtils.getFullDockerImageName(project, false)).setWaitTimeout(2L, TimeUnit.MINUTES).ignoreOutErrStreams(true).build();
        logger.log(Level.FINE, processDescriptor.toString());
        // Materialized TLS certificates so that user can read from HDFS inside python script
        certificateMaterializer.materializeCertificatesLocal(user.getUsername(), project.getName());
        ProcessResult processResult = osProcessExecutor.execute(processDescriptor);
        if (processResult.getExitCode() != 0) {
            // Startup process failed for some reason
            serving.setCid(CID_STOPPED);
            servingFacade.updateDbObject(serving, project);
            throw new ServingException(RESTCodes.ServingErrorCode.LIFECYCLEERRORINT, Level.WARNING, "Could not start sklearn serving", "ut:" + processResult.getStdout() + ", err:" + processResult.getStderr());
        }
        // Read the pid for SkLearn Serving Flask server
        Path pidFilePath = Paths.get(secretDir.toString(), "sklearn_flask_server.pid");
        // Pid file is created by sklearn server inside the docker container.
        // That means the process that started the container returned with exit code 0 but the file might not have been
        // created yet. Therefore, we wait until the file is created
        String pidContents = Files.readFirstLine(pidFilePath.toFile(), Charset.defaultCharset());
        int pidReadCounter = 0;
        while (Strings.isNullOrEmpty(pidContents) && pidReadCounter < 10) {
            logger.log(Level.FINE, "Waiting for sklearn to start...");
            Thread.sleep(1000);
            pidContents = Files.readFirstLine(pidFilePath.toFile(), Charset.defaultCharset());
            pidReadCounter++;
        }
        if (Strings.isNullOrEmpty(pidContents)) {
            throw new ServingException(RESTCodes.ServingErrorCode.LIFECYCLEERRORINT, Level.WARNING, "Could not start sklearn serving because pid file could not be read or was empty");
        }
        logger.log(Level.FINE, "sklearn pidContents:" + pidContents);
        // Update the info in the db
        serving.setCid(pidContents);
        serving.setLocalPort(port);
        serving.setDeployed(new Date());
        servingFacade.updateDbObject(serving, project);
    } catch (Exception ex) {
        // Startup process failed for some reason
        serving.setCid(CID_STOPPED);
        servingFacade.updateDbObject(serving, project);
        throw new ServingException(RESTCodes.ServingErrorCode.LIFECYCLEERRORINT, Level.SEVERE, null, ex.getMessage(), ex);
    } finally {
        if (settings.getHopsRpcTls()) {
            certificateMaterializer.removeCertificatesLocal(user.getUsername(), project.getName());
        }
        // release lock on the serving entry
        servingFacade.releaseLock(project, serving.getId());
    }
}
Also used : Path(java.nio.file.Path) ServingException(io.hops.hopsworks.exceptions.ServingException) ProcessResult(io.hops.hopsworks.common.util.ProcessResult) ProcessDescriptor(io.hops.hopsworks.common.util.ProcessDescriptor) Date(java.util.Date) IOException(java.io.IOException) ServingException(io.hops.hopsworks.exceptions.ServingException)

Example 19 with ProcessResult

use of io.hops.hopsworks.common.util.ProcessResult in project hopsworks by logicalclocks.

the class DockerRegistryMngrImpl method deleteProjectImagesOnRegistry.

public List<String> deleteProjectImagesOnRegistry(String projectDockerImage) throws ServiceDiscoveryException, IOException {
    final String projectDockerImageNoTags = projectUtils.getProjectNameFromDockerImageName(projectDockerImage);
    URI registryURL = URI.create("https://" + projectUtils.getRegistryURL() + "/v2/" + projectDockerImageNoTags + "/tags" + "/list");
    HttpGet request = new HttpGet(registryURL);
    request.setHeader(HttpHeaders.CONTENT_TYPE, "application/json");
    HttpHost host = new HttpHost(registryURL.getHost(), registryURL.getPort(), registryURL.getScheme());
    String httpResp = httpClient.execute(host, request, httpResponse -> {
        if (httpResponse.getStatusLine().getStatusCode() >= 400) {
            throw new IOException("Could not fetch tags from registry: " + httpResponse.getStatusLine().toString());
        }
        return EntityUtils.toString(httpResponse.getEntity());
    });
    List<String> projectImageTags = new ArrayList<>();
    JSONObject respJson = new JSONObject(httpResp);
    if (respJson.has("tags") && respJson.get("tags") != "null") {
        JSONArray tagsJSON = new JSONObject(httpResp).getJSONArray("tags");
        for (int i = 0; i < tagsJSON.length(); i++) {
            String tag = tagsJSON.get(i).toString();
            projectImageTags.add(tag);
            String prog = settings.getSudoersDir() + "/dockerImage.sh";
            ProcessDescriptor processDescriptor = new ProcessDescriptor.Builder().addCommand("/usr/bin/sudo").addCommand(prog).addCommand("delete").addCommand(projectDockerImageNoTags).redirectErrorStream(true).setWaitTimeout(1, TimeUnit.MINUTES).build();
            ProcessResult processResult = osProcessExecutor.execute(processDescriptor);
            if (processResult.getExitCode() != 0) {
                throw new IOException("Could not delete the docker image. Exit code: " + processResult.getExitCode() + " out: " + processResult.getStdout());
            }
        }
    }
    return projectImageTags;
}
Also used : HttpGet(org.apache.http.client.methods.HttpGet) ArrayList(java.util.ArrayList) JSONArray(org.json.JSONArray) ProcessResult(io.hops.hopsworks.common.util.ProcessResult) IOException(java.io.IOException) URI(java.net.URI) JSONObject(org.json.JSONObject) HttpHost(org.apache.http.HttpHost) ProcessDescriptor(io.hops.hopsworks.common.util.ProcessDescriptor)

Example 20 with ProcessResult

use of io.hops.hopsworks.common.util.ProcessResult in project hopsworks by logicalclocks.

the class LibraryController method condaList.

public String condaList(String dockerImage) throws IOException {
    String prog = settings.getSudoersDir() + "/dockerImage.sh";
    ProcessDescriptor processDescriptor = new ProcessDescriptor.Builder().addCommand("/usr/bin/sudo").addCommand(prog).addCommand("list").addCommand(dockerImage).redirectErrorStream(true).setWaitTimeout(30, TimeUnit.MINUTES).build();
    ProcessResult processResult = osProcessExecutor.execute(processDescriptor);
    if (processResult.getExitCode() != 0) {
        String errorMsg = "Could list libraries in the docker image. " + "Try to retry the command or recreate the environment" + "\n Exit code: " + processResult.getExitCode() + "\nout: " + processResult.getStdout() + "\nerr: " + processResult.getStderr() + "||\n";
        throw new IOException(errorMsg);
    } else {
        return processResult.getStdout();
    }
}
Also used : ProcessResult(io.hops.hopsworks.common.util.ProcessResult) ProcessDescriptor(io.hops.hopsworks.common.util.ProcessDescriptor) IOException(java.io.IOException)

Aggregations

ProcessDescriptor (io.hops.hopsworks.common.util.ProcessDescriptor)24 ProcessResult (io.hops.hopsworks.common.util.ProcessResult)24 IOException (java.io.IOException)22 File (java.io.File)9 TransactionAttribute (javax.ejb.TransactionAttribute)5 ServiceException (io.hops.hopsworks.exceptions.ServiceException)4 ProjectException (io.hops.hopsworks.exceptions.ProjectException)3 ServingException (io.hops.hopsworks.exceptions.ServingException)3 Project (io.hops.hopsworks.persistence.entity.project.Project)3 BufferedWriter (java.io.BufferedWriter)3 FileWriter (java.io.FileWriter)3 Path (java.nio.file.Path)3 ServiceDiscoveryException (com.logicalclocks.servicediscoverclient.exceptions.ServiceDiscoveryException)2 CompressionInfo (io.hops.hopsworks.common.dataset.util.CompressionInfo)2 DatasetException (io.hops.hopsworks.exceptions.DatasetException)2 JobException (io.hops.hopsworks.exceptions.JobException)2 PythonException (io.hops.hopsworks.exceptions.PythonException)2 TensorBoardException (io.hops.hopsworks.exceptions.TensorBoardException)2 FileNotFoundException (java.io.FileNotFoundException)2 URISyntaxException (java.net.URISyntaxException)2