use of io.hops.hopsworks.exceptions.ServingException in project hopsworks by logicalclocks.
the class LocalhostTfServingController method startServingInstance.
/**
* Starts a Tensorflow serving instance. Executes the tfserving bash script to launch a tensorflow serving
* server as serving-user and localize the tf-model from HDFS server. It records the PID of the server for monitoring.
*
* @param project the project to start the serving in
* @param user the user starting the serving
* @param serving the serving instance to start (tfserving modelserver)
* @throws ServingException
*/
public void startServingInstance(Project project, Users user, Serving serving) throws ServingException {
String script = settings.getSudoersDir() + "/tfserving.sh";
// TODO(Fabio) this is bad as we don't know if the port is used or not
Integer grpcPort = ThreadLocalRandom.current().nextInt(40000, 59999);
Integer restPort = ThreadLocalRandom.current().nextInt(40000, 59999);
Path secretDir = Paths.get(settings.getStagingDir(), SERVING_DIRS + serving.getLocalDir());
ProcessDescriptor processDescriptor;
try {
processDescriptor = new ProcessDescriptor.Builder().addCommand("/usr/bin/sudo").addCommand(script).addCommand("start").addCommand(serving.getName()).addCommand(Paths.get(serving.getModelPath(), serving.getModelVersion().toString()).toString()).addCommand(String.valueOf(grpcPort)).addCommand(String.valueOf(restPort)).addCommand(secretDir.toString()).addCommand(project.getName() + USER_NAME_DELIMITER + user.getUsername()).addCommand(serving.isBatchingEnabled() ? "1" : "0").addCommand(project.getName().toLowerCase()).addCommand(projectUtils.getFullDockerImageName(project, true)).setWaitTimeout(2L, TimeUnit.MINUTES).ignoreOutErrStreams(false).build();
logger.log(Level.INFO, processDescriptor.toString());
} catch (ServiceDiscoveryException ex) {
throw new ServingException(RESTCodes.ServingErrorCode.LIFECYCLEERRORINT, Level.SEVERE, null, ex.getMessage(), ex);
}
// Materialized TLS certificates to be able to read the model
if (settings.getHopsRpcTls()) {
try {
certificateMaterializer.materializeCertificatesLocal(user.getUsername(), project.getName());
} catch (IOException e) {
throw new ServingException(RESTCodes.ServingErrorCode.LIFECYCLEERRORINT, Level.SEVERE, null, e.getMessage(), e);
} finally {
// Release lock on the serving entry
servingFacade.releaseLock(project, serving.getId());
}
}
try {
ProcessResult processResult = osProcessExecutor.execute(processDescriptor);
if (processResult.getExitCode() != 0) {
// Startup process failed for some reason
serving.setCid(CID_STOPPED);
servingFacade.updateDbObject(serving, project);
throw new ServingException(RESTCodes.ServingErrorCode.LIFECYCLEERRORINT, Level.INFO);
}
// Read the pid for TensorFlow Serving server
Path cidFilePath = Paths.get(secretDir.toString(), "tfserving.pid");
String cid = Files.readFirstLine(cidFilePath.toFile(), Charset.defaultCharset());
// Update the info in the db
serving.setCid(cid);
serving.setLocalPort(restPort);
serving.setDeployed(new Date());
servingFacade.updateDbObject(serving, project);
} catch (Exception ex) {
// Startup process failed for some reason
serving.setCid(CID_STOPPED);
servingFacade.updateDbObject(serving, project);
throw new ServingException(RESTCodes.ServingErrorCode.LIFECYCLEERRORINT, Level.SEVERE, null, ex.getMessage(), ex);
} finally {
if (settings.getHopsRpcTls()) {
certificateMaterializer.removeCertificatesLocal(user.getUsername(), project.getName());
}
// release lock on the serving entry
servingFacade.releaseLock(project, serving.getId());
}
}
use of io.hops.hopsworks.exceptions.ServingException in project hopsworks by logicalclocks.
the class ServingUtil method validatePythonUserInput.
private void validatePythonUserInput(Serving serving) throws IllegalArgumentException, ServingException {
// Check model files and/or python scripts
try {
List<Inode> children = inodeController.getChildren(serving.getModelVersionPath());
long modelFiles = children.stream().filter(c -> {
String name = c.getInodePK().getName();
return MODEL_FILE_EXTS.stream().anyMatch(name::endsWith);
}).count();
if (modelFiles == 0) {
// if no model files found
if (children.stream().noneMatch(c -> c.getInodePK().getName().endsWith(".py"))) {
// and no python script
throw new ServingException(RESTCodes.ServingErrorCode.MODEL_FILES_STRUCTURE_NOT_VALID, Level.FINE, "Model" + " path requires either a python script or model file (i.e., joblib or pickle files)");
}
}
} catch (FileNotFoundException e) {
throw new ServingException(RESTCodes.ServingErrorCode.MODEL_PATH_NOT_FOUND, Level.FINE, null);
}
if (serving.isBatchingEnabled()) {
throw new ServingException(RESTCodes.ServingErrorCode.REQUEST_BATCHING_NOT_SUPPORTED, Level.SEVERE, "Request " + "batching is not supported in Python deployments");
}
}
use of io.hops.hopsworks.exceptions.ServingException in project hopsworks by logicalclocks.
the class ServingUtil method validateServingName.
private void validateServingName(Serving serving, Serving dbServing) throws ServingException {
if (Strings.isNullOrEmpty(serving.getName())) {
throw new IllegalArgumentException("Serving name not provided");
} else if (serving.getName().contains(" ")) {
throw new IllegalArgumentException("Serving name cannot contain spaces");
}
// Check for duplicated entries
if (dbServing != null && !dbServing.getId().equals(serving.getId())) {
// There is already an entry for this project
throw new ServingException(RESTCodes.ServingErrorCode.DUPLICATEDENTRY, Level.FINE);
}
// Check serving name follows allowed regex as required by the InferenceResource to use it as a
// REST endpoint
Pattern urlPattern = Pattern.compile("[a-zA-Z0-9]+");
Matcher urlMatcher = urlPattern.matcher(serving.getName());
if (!urlMatcher.matches()) {
throw new IllegalArgumentException("Serving name must follow regex: \"[a-zA-Z0-9]+\"");
}
}
use of io.hops.hopsworks.exceptions.ServingException in project hopsworks by logicalclocks.
the class ServingUtil method validateKafkaTopicSchema.
private void validateKafkaTopicSchema(Project project, Serving serving, TopicDTO topic) throws ServingException {
// if an existing topic, check schema
if (topic != null && !topic.getName().equals("NONE") && !topic.getName().equals("CREATE")) {
ProjectTopics projectTopic = projectTopicsFacade.findTopicByNameAndProject(project, topic.getName()).orElseThrow(() -> new ServingException(RESTCodes.ServingErrorCode.KAFKA_TOPIC_NOT_FOUND, Level.SEVERE, null));
Subjects subjects = projectTopic.getSubjects();
if (!subjects.getSubject().equalsIgnoreCase(Settings.INFERENCE_SCHEMANAME)) {
throw new ServingException(RESTCodes.ServingErrorCode.KAFKA_TOPIC_NOT_VALID, Level.FINE, "Inference logging" + " requires a Kafka topic with schema '" + Settings.INFERENCE_SCHEMANAME + "'");
}
}
}
use of io.hops.hopsworks.exceptions.ServingException in project hopsworks by logicalclocks.
the class ProjectController method removeProjectInt.
private void removeProjectInt(Project project, List<HdfsUsers> usersToClean, List<HdfsGroups> groupsToClean, List<Future<?>> projectCreationFutures, boolean decreaseCreatedProj, Users owner) throws IOException, InterruptedException, HopsSecurityException, ServiceException, ProjectException, GenericException, TensorBoardException, FeaturestoreException {
DistributedFileSystemOps dfso = null;
try {
dfso = dfs.getDfsOps();
// Run custom handlers for project deletion
ProjectHandler.runProjectPreDeleteHandlers(projectHandlers, project);
// log removal to notify elastic search
logProject(project, OperationType.Delete);
// change the owner and group of the project folder to hdfs super user
Path location = new Path(Utils.getProjectPath(project.getName()));
changeOwnershipToSuperuser(location, dfso);
Path dumy = new Path("/tmp/" + project.getName());
changeOwnershipToSuperuser(dumy, dfso);
// remove kafka topics
removeKafkaTopics(project);
// projectCreationFutures will be null during project deletion.
if (projectCreationFutures != null) {
for (Future f : projectCreationFutures) {
if (f != null) {
try {
f.get();
} catch (ExecutionException ex) {
LOGGER.log(Level.SEVERE, "Error while waiting for ProjectCreationFutures to finish for Project " + project.getName(), ex);
}
}
}
}
try {
certificatesController.revokeProjectCertificates(project, owner);
} catch (HopsSecurityException ex) {
if (ex.getErrorCode() != RESTCodes.SecurityErrorCode.CERTIFICATE_NOT_FOUND) {
LOGGER.log(Level.SEVERE, "Could not delete certificates during cleanup for project " + project.getName() + ". Manual cleanup is needed!!!", ex);
throw ex;
}
} catch (IOException | GenericException ex) {
LOGGER.log(Level.SEVERE, "Could not delete certificates during cleanup for project " + project.getName() + ". Manual cleanup is needed!!!", ex);
throw ex;
}
// remove running tensorboards
removeTensorBoard(project);
// remove jupyter
removeJupyter(project);
removeProjectRelatedFiles(usersToClean, dfso);
// remove quota
removeQuotas(project);
// change owner for files in shared datasets
fixSharedDatasets(project, dfso);
// Delete online featurestore database
onlineFeaturestoreController.removeOnlineFeatureStore(project);
// Delete Hive database - will automatically cleanup all the Hive's metadata
hiveController.dropDatabases(project, dfso, false);
try {
// Delete elasticsearch template for this project
removeElasticsearch(project);
} catch (ElasticException ex) {
LOGGER.log(Level.WARNING, "Failure while removing elasticsearch indices", ex);
}
// delete project group and users
removeGroupAndUsers(groupsToClean, usersToClean);
// remove dumy Inode
dfso.rm(dumy, true);
// Remove servings
try {
servingController.deleteAll(project);
} catch (ServingException e) {
throw new IOException(e);
}
// Remove Airflow DAGs from local filesystem,
// JWT renewal monitors and materialized X.509
airflowManager.onProjectRemoval(project);
// remove folder
removeProjectFolder(project.getName(), dfso);
if (decreaseCreatedProj) {
usersController.decrementNumProjectsCreated(project.getOwner().getUid());
}
usersController.decrementNumActiveProjects(project.getOwner().getUid());
// Run custom handlers for project deletion
ProjectHandler.runProjectPostDeleteHandlers(projectHandlers, project);
LOGGER.log(Level.INFO, "{0} - project removed.", project.getName());
} finally {
if (dfso != null) {
dfso.close();
}
}
}
Aggregations