use of io.hops.hopsworks.common.util.ProcessResult in project hopsworks by logicalclocks.
the class TensorBoardProcessMgr method killTensorBoard.
/**
* Kill the TensorBoard process
* @param tb
* @return
*/
public int killTensorBoard(TensorBoard tb) {
String prog = settings.getSudoersDir() + "/tensorboard.sh";
int exitValue;
ProcessDescriptor processDescriptor = new ProcessDescriptor.Builder().addCommand("/usr/bin/sudo").addCommand(prog).addCommand("kill").addCommand(tb.getCid()).ignoreOutErrStreams(true).build();
LOGGER.log(Level.FINE, processDescriptor.toString());
try {
ProcessResult processResult = osProcessExecutor.execute(processDescriptor);
if (!processResult.processExited()) {
LOGGER.log(Level.SEVERE, "Failed to kill TensorBoard, process time-out");
}
exitValue = processResult.getExitCode();
} catch (IOException ex) {
exitValue = 2;
LOGGER.log(Level.SEVERE, "Failed to kill TensorBoard", ex);
}
return exitValue;
}
use of io.hops.hopsworks.common.util.ProcessResult in project hopsworks by logicalclocks.
the class DatasetController method zip.
public void zip(Project project, Users user, Path path, Path destPath) throws DatasetException {
String hdfsUser = hdfsUsersController.getHdfsUserName(project, user);
checkFileExists(path, hdfsUser);
CompressionInfo compressionInfo = new CompressionInfo(path, destPath);
String stagingDir = settings.getStagingDir() + File.separator + compressionInfo.getStagingDirectory();
File zipDir = new File(stagingDir);
zipDir.mkdirs();
settings.addZippingState(compressionInfo);
ProcessDescriptor.Builder processDescriptorBuilder = new ProcessDescriptor.Builder().addCommand(settings.getHopsworksDomainDir() + "/bin/zip-background.sh").addCommand(stagingDir).addCommand(path.toString()).addCommand(hdfsUser);
if (destPath != null) {
processDescriptorBuilder.addCommand(destPath.toString());
}
ProcessDescriptor processDescriptor = processDescriptorBuilder.ignoreOutErrStreams(true).build();
try {
ProcessResult processResult = osProcessExecutor.execute(processDescriptor);
int result = processResult.getExitCode();
if (result == 2) {
throw new DatasetException(RESTCodes.DatasetErrorCode.COMPRESSION_SIZE_ERROR, Level.WARNING);
}
if (result != 0) {
throw new DatasetException(RESTCodes.DatasetErrorCode.COMPRESSION_ERROR, Level.WARNING, "path: " + path.toString() + ", result: " + result);
}
} catch (IOException ex) {
throw new DatasetException(RESTCodes.DatasetErrorCode.COMPRESSION_ERROR, Level.SEVERE, "path: " + path.toString(), ex.getMessage(), ex);
}
}
use of io.hops.hopsworks.common.util.ProcessResult in project hopsworks by logicalclocks.
the class LocalhostSkLearnServingController method startServingInstance.
/**
* Starts a SkLearn serving instance. Executes the sklearn bash script to launch a Flask server as serving-user
* in the project's anaconda environment. It records the PID of the server for monitoring.
*
* @param project the project to start the serving in
* @param user the user starting the serving
* @param serving the serving instance to start (flask server)
* @throws ServingException
*/
public void startServingInstance(Project project, Users user, Serving serving) throws ServingException {
String script = settings.getSudoersDir() + "/sklearn_serving.sh";
Integer port = ThreadLocalRandom.current().nextInt(40000, 59999);
Path secretDir = Paths.get(settings.getStagingDir(), SERVING_DIRS + serving.getLocalDir());
String predictorFilename = serving.getPredictor();
if (serving.getPredictor().contains("/")) {
String[] splits = serving.getPredictor().split("/");
predictorFilename = splits[splits.length - 1];
}
try {
ProcessDescriptor processDescriptor = new ProcessDescriptor.Builder().addCommand("/usr/bin/sudo").addCommand(script).addCommand("start").addCommand(predictorFilename).addCommand(Paths.get(serving.getPredictor()).toString()).addCommand(String.valueOf(port)).addCommand(secretDir.toString()).addCommand(project.getName() + USER_NAME_DELIMITER + user.getUsername()).addCommand(project.getName().toLowerCase()).addCommand(settings.getAnacondaProjectDir() + "/bin/python").addCommand(certificateMaterializer.getUserTransientKeystorePath(project, user)).addCommand(certificateMaterializer.getUserTransientTruststorePath(project, user)).addCommand(certificateMaterializer.getUserTransientPasswordPath(project, user)).addCommand(serving.getName()).addCommand(projectUtils.getFullDockerImageName(project, false)).setWaitTimeout(2L, TimeUnit.MINUTES).ignoreOutErrStreams(true).build();
logger.log(Level.FINE, processDescriptor.toString());
// Materialized TLS certificates so that user can read from HDFS inside python script
certificateMaterializer.materializeCertificatesLocal(user.getUsername(), project.getName());
ProcessResult processResult = osProcessExecutor.execute(processDescriptor);
if (processResult.getExitCode() != 0) {
// Startup process failed for some reason
serving.setCid(CID_STOPPED);
servingFacade.updateDbObject(serving, project);
throw new ServingException(RESTCodes.ServingErrorCode.LIFECYCLEERRORINT, Level.WARNING, "Could not start sklearn serving", "ut:" + processResult.getStdout() + ", err:" + processResult.getStderr());
}
// Read the pid for SkLearn Serving Flask server
Path pidFilePath = Paths.get(secretDir.toString(), "sklearn_flask_server.pid");
// Pid file is created by sklearn server inside the docker container.
// That means the process that started the container returned with exit code 0 but the file might not have been
// created yet. Therefore, we wait until the file is created
String pidContents = Files.readFirstLine(pidFilePath.toFile(), Charset.defaultCharset());
int pidReadCounter = 0;
while (Strings.isNullOrEmpty(pidContents) && pidReadCounter < 10) {
logger.log(Level.FINE, "Waiting for sklearn to start...");
Thread.sleep(1000);
pidContents = Files.readFirstLine(pidFilePath.toFile(), Charset.defaultCharset());
pidReadCounter++;
}
if (Strings.isNullOrEmpty(pidContents)) {
throw new ServingException(RESTCodes.ServingErrorCode.LIFECYCLEERRORINT, Level.WARNING, "Could not start sklearn serving because pid file could not be read or was empty");
}
logger.log(Level.FINE, "sklearn pidContents:" + pidContents);
// Update the info in the db
serving.setCid(pidContents);
serving.setLocalPort(port);
serving.setDeployed(new Date());
servingFacade.updateDbObject(serving, project);
} catch (Exception ex) {
// Startup process failed for some reason
serving.setCid(CID_STOPPED);
servingFacade.updateDbObject(serving, project);
throw new ServingException(RESTCodes.ServingErrorCode.LIFECYCLEERRORINT, Level.SEVERE, null, ex.getMessage(), ex);
} finally {
if (settings.getHopsRpcTls()) {
certificateMaterializer.removeCertificatesLocal(user.getUsername(), project.getName());
}
// release lock on the serving entry
servingFacade.releaseLock(project, serving.getId());
}
}
use of io.hops.hopsworks.common.util.ProcessResult in project hopsworks by logicalclocks.
the class DockerRegistryMngrImpl method deleteProjectImagesOnRegistry.
public List<String> deleteProjectImagesOnRegistry(String projectDockerImage) throws ServiceDiscoveryException, IOException {
final String projectDockerImageNoTags = projectUtils.getProjectNameFromDockerImageName(projectDockerImage);
URI registryURL = URI.create("https://" + projectUtils.getRegistryURL() + "/v2/" + projectDockerImageNoTags + "/tags" + "/list");
HttpGet request = new HttpGet(registryURL);
request.setHeader(HttpHeaders.CONTENT_TYPE, "application/json");
HttpHost host = new HttpHost(registryURL.getHost(), registryURL.getPort(), registryURL.getScheme());
String httpResp = httpClient.execute(host, request, httpResponse -> {
if (httpResponse.getStatusLine().getStatusCode() >= 400) {
throw new IOException("Could not fetch tags from registry: " + httpResponse.getStatusLine().toString());
}
return EntityUtils.toString(httpResponse.getEntity());
});
List<String> projectImageTags = new ArrayList<>();
JSONObject respJson = new JSONObject(httpResp);
if (respJson.has("tags") && respJson.get("tags") != "null") {
JSONArray tagsJSON = new JSONObject(httpResp).getJSONArray("tags");
for (int i = 0; i < tagsJSON.length(); i++) {
String tag = tagsJSON.get(i).toString();
projectImageTags.add(tag);
String prog = settings.getSudoersDir() + "/dockerImage.sh";
ProcessDescriptor processDescriptor = new ProcessDescriptor.Builder().addCommand("/usr/bin/sudo").addCommand(prog).addCommand("delete").addCommand(projectDockerImageNoTags).redirectErrorStream(true).setWaitTimeout(1, TimeUnit.MINUTES).build();
ProcessResult processResult = osProcessExecutor.execute(processDescriptor);
if (processResult.getExitCode() != 0) {
throw new IOException("Could not delete the docker image. Exit code: " + processResult.getExitCode() + " out: " + processResult.getStdout());
}
}
}
return projectImageTags;
}
use of io.hops.hopsworks.common.util.ProcessResult in project hopsworks by logicalclocks.
the class LibraryController method condaList.
public String condaList(String dockerImage) throws IOException {
String prog = settings.getSudoersDir() + "/dockerImage.sh";
ProcessDescriptor processDescriptor = new ProcessDescriptor.Builder().addCommand("/usr/bin/sudo").addCommand(prog).addCommand("list").addCommand(dockerImage).redirectErrorStream(true).setWaitTimeout(30, TimeUnit.MINUTES).build();
ProcessResult processResult = osProcessExecutor.execute(processDescriptor);
if (processResult.getExitCode() != 0) {
String errorMsg = "Could list libraries in the docker image. " + "Try to retry the command or recreate the environment" + "\n Exit code: " + processResult.getExitCode() + "\nout: " + processResult.getStdout() + "\nerr: " + processResult.getStderr() + "||\n";
throw new IOException(errorMsg);
} else {
return processResult.getStdout();
}
}
Aggregations