use of io.hops.hopsworks.exceptions.GenericException in project hopsworks by logicalclocks.
the class ProjectService method example.
@POST
@Path("starterProject/{type}")
@Produces(MediaType.APPLICATION_JSON)
public Response example(@PathParam("type") String type, @Context HttpServletRequest req, @Context SecurityContext sc) throws DatasetException, GenericException, KafkaException, ProjectException, UserException, ServiceException, HopsSecurityException, FeaturestoreException, JobException, IOException, ElasticException, SchemaException, ProvenanceException {
TourProjectType demoType;
try {
demoType = TourProjectType.fromString(type);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException("Type must be one of: " + Arrays.toString(TourProjectType.values()));
}
ProjectDTO projectDTO = new ProjectDTO();
Project project = null;
projectDTO.setDescription("A demo project for getting started with " + demoType.getDescription());
Users user = jWTHelper.getUserPrincipal(sc);
String username = user.getUsername();
List<String> projectServices = new ArrayList<>();
// save the project
String readMeMessage = null;
switch(demoType) {
case KAFKA:
// It's a Kafka guide
projectDTO.setProjectName("demo_" + TourProjectType.KAFKA.getTourName() + "_" + username);
populateActiveServices(projectServices, TourProjectType.KAFKA);
readMeMessage = "jar file to demonstrate Kafka streaming";
break;
case SPARK:
// It's a Spark guide
projectDTO.setProjectName("demo_" + TourProjectType.SPARK.getTourName() + "_" + username);
populateActiveServices(projectServices, TourProjectType.SPARK);
readMeMessage = "jar file to demonstrate the creation of a spark batch job";
break;
case FS:
// It's a Featurestore guide
projectDTO.setProjectName("demo_" + TourProjectType.FS.getTourName() + "_" + username);
populateActiveServices(projectServices, TourProjectType.FS);
readMeMessage = "Dataset containing a jar file and data that can be used to run a sample spark-job for " + "inserting data in the feature store.";
break;
case ML:
// It's a TensorFlow guide
projectDTO.setProjectName("demo_" + TourProjectType.ML.getTourName() + "_" + username);
populateActiveServices(projectServices, TourProjectType.ML);
readMeMessage = "Jupyter notebooks and training data for demonstrating how to run Deep Learning";
break;
default:
throw new IllegalArgumentException("Type must be one of: " + Arrays.toString(TourProjectType.values()));
}
projectDTO.setServices(projectServices);
DistributedFileSystemOps dfso = null;
DistributedFileSystemOps udfso = null;
try {
project = projectController.createProject(projectDTO, user, req.getSession().getId());
dfso = dfs.getDfsOps();
username = hdfsUsersBean.getHdfsUserName(project, user);
udfso = dfs.getDfsOps(username);
ProvTypeDTO projectMetaStatus = fsProvenanceController.getProjectProvType(user, project);
String tourFilesDataset = projectController.addTourFilesToProject(user.getEmail(), project, dfso, dfso, demoType, projectMetaStatus);
// TestJob dataset
datasetController.generateReadme(udfso, tourFilesDataset, readMeMessage, project.getName());
} catch (Exception ex) {
projectController.cleanup(project, req.getSession().getId());
throw ex;
} finally {
if (dfso != null) {
dfso.close();
}
if (udfso != null) {
dfs.closeDfsClient(udfso);
}
}
return noCacheResponse.getNoCacheResponseBuilder(Response.Status.CREATED).entity(project).build();
}
use of io.hops.hopsworks.exceptions.GenericException in project hopsworks by logicalclocks.
the class ProjectController method createProject.
/**
* Creates a new project(project), the related DIR, the different services in
* the project, and the master of the
* project.
* <p>
* This needs to be an atomic operation (all or nothing) REQUIRES_NEW will
* make sure a new transaction is created even
* if this method is called from within a transaction.
*
* @param projectDTO
* @param owner
* @param sessionId
* @return
*/
public Project createProject(ProjectDTO projectDTO, Users owner, String sessionId) throws DatasetException, GenericException, KafkaException, ProjectException, UserException, HopsSecurityException, ServiceException, FeaturestoreException, ElasticException, SchemaException, IOException {
Long startTime = System.currentTimeMillis();
// check that the project name is ok
String projectName = projectDTO.getProjectName();
FolderNameValidator.isValidProjectName(projectUtils, projectName);
List<ProjectServiceEnum> projectServices = new ArrayList<>();
if (projectDTO.getServices() != null) {
for (String s : projectDTO.getServices()) {
ProjectServiceEnum se = ProjectServiceEnum.valueOf(s.toUpperCase());
projectServices.add(se);
}
}
LOGGER.log(Level.FINE, () -> "PROJECT CREATION TIME. Step 1: " + (System.currentTimeMillis() - startTime));
DistributedFileSystemOps dfso = null;
Project project = null;
try {
dfso = dfs.getDfsOps();
/*
* create a project in the database
* if the creation go through it means that there is no other project with
* the same name.
* this project creation act like a lock, no other project can be created
* with the same name
* until this project is removed from the database
*/
try {
project = createProject(projectName, owner, projectDTO.getDescription(), dfso);
} catch (EJBException ex) {
LOGGER.log(Level.WARNING, null, ex);
Path dummy = new Path("/tmp/" + projectName);
try {
dfso.rm(dummy, true);
} catch (IOException e) {
LOGGER.log(Level.SEVERE, null, e);
}
throw new ProjectException(RESTCodes.ProjectErrorCode.PROJECT_EXISTS, Level.SEVERE, "project: " + projectName, ex.getMessage(), ex);
}
LOGGER.log(Level.FINE, "PROJECT CREATION TIME. Step 2 (hdfs): {0}", System.currentTimeMillis() - startTime);
verifyProject(project, dfso, sessionId);
LOGGER.log(Level.FINE, "PROJECT CREATION TIME. Step 3 (verify): {0}", System.currentTimeMillis() - startTime);
// Run the handlers.
try {
ProjectHandler.runProjectPreCreateHandlers(projectHandlers, project);
} catch (ProjectException ex) {
cleanup(project, sessionId, null, true, owner);
throw ex;
}
List<Future<?>> projectCreationFutures = new ArrayList<>();
// This is an async call
try {
projectCreationFutures.add(certificatesController.generateCertificates(project, owner));
} catch (Exception ex) {
cleanup(project, sessionId, projectCreationFutures, true, owner);
throw new HopsSecurityException(RESTCodes.SecurityErrorCode.CERT_CREATION_ERROR, Level.SEVERE, "project: " + project.getName() + "owner: " + owner.getUsername(), ex.getMessage(), ex);
}
String username = hdfsUsersController.getHdfsUserName(project, owner);
if (username == null || username.isEmpty()) {
cleanup(project, sessionId, projectCreationFutures, true, owner);
throw new UserException(RESTCodes.UserErrorCode.USER_WAS_NOT_FOUND, Level.SEVERE, "project: " + project.getName() + "owner: " + owner.getUsername());
}
LOGGER.log(Level.FINE, "PROJECT CREATION TIME. Step 4 (certs): {0}", System.currentTimeMillis() - startTime);
// all the verifications have passed, we can now create the project
// create the project folder
ProvTypeDTO provType = settings.getProvType().dto;
try {
mkProjectDIR(projectName, dfso);
fsProvController.updateProjectProvType(project, provType, dfso);
} catch (IOException | EJBException | ProvenanceException ex) {
cleanup(project, sessionId, projectCreationFutures, true, owner);
throw new ProjectException(RESTCodes.ProjectErrorCode.PROJECT_FOLDER_NOT_CREATED, Level.SEVERE, "project: " + projectName, ex.getMessage(), ex);
}
LOGGER.log(Level.FINE, "PROJECT CREATION TIME. Step 5 (folders): {0}", System.currentTimeMillis() - startTime);
// update the project with the project folder inode
try {
setProjectInode(project, dfso);
} catch (IOException | EJBException ex) {
cleanup(project, sessionId, projectCreationFutures, true, owner);
throw new ProjectException(RESTCodes.ProjectErrorCode.PROJECT_INODE_CREATION_ERROR, Level.SEVERE, "project: " + projectName, ex.getMessage(), ex);
}
LOGGER.log(Level.FINE, "PROJECT CREATION TIME. Step 6 (inodes): {0}", System.currentTimeMillis() - startTime);
// set payment and quotas
try {
setProjectOwnerAndQuotas(project, dfso, owner);
} catch (IOException | EJBException ex) {
cleanup(project, sessionId, projectCreationFutures, true, owner);
throw new ProjectException(RESTCodes.ProjectErrorCode.QUOTA_ERROR, Level.SEVERE, "project: " + project.getName(), ex.getMessage(), ex);
}
LOGGER.log(Level.FINE, "PROJECT CREATION TIME. Step 7 (quotas): {0}", System.currentTimeMillis() - startTime);
try {
hdfsUsersController.addProjectFolderOwner(project, dfso);
createProjectLogResources(owner, project, dfso);
} catch (IOException | EJBException ex) {
cleanup(project, sessionId, projectCreationFutures);
throw new ProjectException(RESTCodes.ProjectErrorCode.PROJECT_SET_PERMISSIONS_ERROR, Level.SEVERE, "project: " + projectName, ex.getMessage(), ex);
}
LOGGER.log(Level.FINE, "PROJECT CREATION TIME. Step 8 (logs): {0}", System.currentTimeMillis() - startTime);
// inconsistencies
try {
elasticController.deleteProjectIndices(project);
elasticController.deleteProjectSavedObjects(projectName);
LOGGER.log(Level.FINE, "PROJECT CREATION TIME. Step 9 (elastic cleanup): {0}", System.currentTimeMillis() - startTime);
} catch (ElasticException ex) {
LOGGER.log(Level.FINE, "Error while cleaning old project indices", ex);
}
logProject(project, OperationType.Add);
// enable services
for (ProjectServiceEnum service : projectServices) {
try {
projectCreationFutures.addAll(addService(project, service, owner, dfso, provType));
} catch (RESTException | IOException ex) {
cleanup(project, sessionId, projectCreationFutures);
throw ex;
}
}
try {
for (Future f : projectCreationFutures) {
if (f != null) {
f.get();
}
}
} catch (InterruptedException | ExecutionException ex) {
LOGGER.log(Level.SEVERE, "Error while waiting for the certificate generation thread to finish. Will try to " + "cleanup...", ex);
cleanup(project, sessionId, projectCreationFutures);
throw new HopsSecurityException(RESTCodes.SecurityErrorCode.CERT_CREATION_ERROR, Level.SEVERE);
}
// Run the handlers.
try {
ProjectHandler.runProjectPostCreateHandlers(projectHandlers, project);
} catch (ProjectException ex) {
cleanup(project, sessionId, projectCreationFutures);
throw ex;
}
try {
project = environmentController.createEnv(project, owner);
} catch (PythonException | EJBException ex) {
cleanup(project, sessionId, projectCreationFutures);
throw new ProjectException(RESTCodes.ProjectErrorCode.PROJECT_ANACONDA_ENABLE_ERROR, Level.SEVERE, "project: " + projectName, ex.getMessage(), ex);
}
LOGGER.log(Level.FINE, "PROJECT CREATION TIME. Step 10 (env): {0}", System.currentTimeMillis() - startTime);
return project;
} finally {
if (dfso != null) {
dfso.close();
}
LOGGER.log(Level.FINE, "PROJECT CREATION TIME. Step 11 (close): {0}", System.currentTimeMillis() - startTime);
}
}
use of io.hops.hopsworks.exceptions.GenericException in project hopsworks by logicalclocks.
the class ProjectController method cleanup.
public void cleanup(Project project, String sessionId, List<Future<?>> projectCreationFutures, boolean decreaseCreatedProj, Users owner) throws GenericException {
if (project == null) {
return;
}
int nbTry = 0;
while (nbTry < 2) {
YarnClientWrapper yarnClientWrapper = ycs.getYarnClientSuper(settings.getConfiguration());
YarnClient client = yarnClientWrapper.getYarnClient();
try {
// remove from project_team so that nobody can see the project anymore
updateProjectTeamRole(project, ProjectRoleTypes.UNDER_REMOVAL);
/*
* get all running yarn application owned by anny of the project members
* we will check later if this application have been stoped and their log aggregation have been finished
* it would be better to check all application (even the ones that have finished running)
* but the log aggregation status is not recovered when the resource manager restart. As a result
* we can't know if the status in "NOT_START" because we should wait for it or because the
* resourcemanager restarted.
*/
Collection<ProjectTeam> team = project.getProjectTeamCollection();
Set<String> hdfsUsers = new HashSet<>();
for (ProjectTeam pt : team) {
String hdfsUsername = hdfsUsersController.getHdfsUserName(project, pt.getUser());
hdfsUsers.add(hdfsUsername);
}
List<ApplicationReport> projectsApps = getYarnApplications(hdfsUsers, client);
// try and close all the jupyter jobs
removeJupyter(project);
removeAnacondaEnv(project);
removeAlertConfigs(project);
// kill jobs
killYarnJobs(project);
waitForJobLogs(projectsApps, client);
List<HdfsUsers> usersToClean = getUsersToClean(project);
List<HdfsGroups> groupsToClean = getGroupsToClean(project);
removeProjectInt(project, usersToClean, groupsToClean, projectCreationFutures, decreaseCreatedProj, owner);
removeCertificatesFromMaterializer(project);
// Delete online featurestore database
onlineFeaturestoreController.removeOnlineFeatureStore(project);
break;
} catch (Exception ex) {
nbTry++;
if (nbTry < 2) {
try {
Thread.sleep(nbTry * 1000);
} catch (InterruptedException ex1) {
LOGGER.log(Level.SEVERE, null, ex1);
}
} else {
throw new GenericException(RESTCodes.GenericErrorCode.UNKNOWN_ERROR, Level.SEVERE, null, ex.getMessage(), ex);
}
} finally {
ycs.closeYarnClient(yarnClientWrapper);
}
}
}
use of io.hops.hopsworks.exceptions.GenericException in project hopsworks by logicalclocks.
the class ProjectController method removeProjectInt.
private void removeProjectInt(Project project, List<HdfsUsers> usersToClean, List<HdfsGroups> groupsToClean, List<Future<?>> projectCreationFutures, boolean decreaseCreatedProj, Users owner) throws IOException, InterruptedException, HopsSecurityException, ServiceException, ProjectException, GenericException, TensorBoardException, FeaturestoreException {
DistributedFileSystemOps dfso = null;
try {
dfso = dfs.getDfsOps();
// Run custom handlers for project deletion
ProjectHandler.runProjectPreDeleteHandlers(projectHandlers, project);
// log removal to notify elastic search
logProject(project, OperationType.Delete);
// change the owner and group of the project folder to hdfs super user
Path location = new Path(Utils.getProjectPath(project.getName()));
changeOwnershipToSuperuser(location, dfso);
Path dumy = new Path("/tmp/" + project.getName());
changeOwnershipToSuperuser(dumy, dfso);
// remove kafka topics
removeKafkaTopics(project);
// projectCreationFutures will be null during project deletion.
if (projectCreationFutures != null) {
for (Future f : projectCreationFutures) {
if (f != null) {
try {
f.get();
} catch (ExecutionException ex) {
LOGGER.log(Level.SEVERE, "Error while waiting for ProjectCreationFutures to finish for Project " + project.getName(), ex);
}
}
}
}
try {
certificatesController.revokeProjectCertificates(project, owner);
} catch (HopsSecurityException ex) {
if (ex.getErrorCode() != RESTCodes.SecurityErrorCode.CERTIFICATE_NOT_FOUND) {
LOGGER.log(Level.SEVERE, "Could not delete certificates during cleanup for project " + project.getName() + ". Manual cleanup is needed!!!", ex);
throw ex;
}
} catch (IOException | GenericException ex) {
LOGGER.log(Level.SEVERE, "Could not delete certificates during cleanup for project " + project.getName() + ". Manual cleanup is needed!!!", ex);
throw ex;
}
// remove running tensorboards
removeTensorBoard(project);
// remove jupyter
removeJupyter(project);
removeProjectRelatedFiles(usersToClean, dfso);
// remove quota
removeQuotas(project);
// change owner for files in shared datasets
fixSharedDatasets(project, dfso);
// Delete online featurestore database
onlineFeaturestoreController.removeOnlineFeatureStore(project);
// Delete Hive database - will automatically cleanup all the Hive's metadata
hiveController.dropDatabases(project, dfso, false);
try {
// Delete elasticsearch template for this project
removeElasticsearch(project);
} catch (ElasticException ex) {
LOGGER.log(Level.WARNING, "Failure while removing elasticsearch indices", ex);
}
// delete project group and users
removeGroupAndUsers(groupsToClean, usersToClean);
// remove dumy Inode
dfso.rm(dumy, true);
// Remove servings
try {
servingController.deleteAll(project);
} catch (ServingException e) {
throw new IOException(e);
}
// Remove Airflow DAGs from local filesystem,
// JWT renewal monitors and materialized X.509
airflowManager.onProjectRemoval(project);
// remove folder
removeProjectFolder(project.getName(), dfso);
if (decreaseCreatedProj) {
usersController.decrementNumProjectsCreated(project.getOwner().getUid());
}
usersController.decrementNumActiveProjects(project.getOwner().getUid());
// Run custom handlers for project deletion
ProjectHandler.runProjectPostDeleteHandlers(projectHandlers, project);
LOGGER.log(Level.INFO, "{0} - project removed.", project.getName());
} finally {
if (dfso != null) {
dfso.close();
}
}
}
use of io.hops.hopsworks.exceptions.GenericException in project hopsworks by logicalclocks.
the class FlinkController method startJob.
public Execution startJob(final Jobs job, final Users user) throws GenericException, JobException, ServiceException {
// First: some parameter checking.
if (job == null) {
throw new NullPointerException("Cannot run a null job.");
} else if (user == null) {
throw new NullPointerException("Cannot run a job as a null user.");
} else if (job.getJobType() != JobType.FLINK) {
throw new IllegalArgumentException("Job configuration is not a Flink job configuration.");
}
// Set Hopsworks consul service domain, don't use the address, use the name
String username = hdfsUsersBean.getHdfsUserName(job.getProject(), user);
FlinkJob flinkjob = null;
try {
String hopsworksRestEndpoint = "https://" + serviceDiscoveryController.constructServiceFQDNWithPort(ServiceDiscoveryController.HopsworksService.HOPSWORKS_APP);
UserGroupInformation proxyUser = ugiService.getProxyUser(username);
try {
flinkjob = proxyUser.doAs((PrivilegedExceptionAction<FlinkJob>) () -> new FlinkJob(job, submitter, user, hdfsUsersBean.getHdfsUserName(job.getProject(), job.getCreator()), settings, kafkaBrokers.getKafkaBrokersString(), hopsworksRestEndpoint, servingConfig, serviceDiscoveryController));
} catch (InterruptedException ex) {
LOGGER.log(Level.SEVERE, null, ex);
}
} catch (IOException ex) {
throw new JobException(RESTCodes.JobErrorCode.PROXY_ERROR, Level.SEVERE, "job: " + job.getId() + ", user:" + user.getUsername(), ex.getMessage(), ex);
} catch (ServiceDiscoveryException ex) {
throw new ServiceException(RESTCodes.ServiceErrorCode.SERVICE_NOT_FOUND, Level.SEVERE, "job: " + job.getId() + ", user:" + user.getUsername(), ex.getMessage(), ex);
}
if (flinkjob == null) {
throw new GenericException(RESTCodes.GenericErrorCode.UNKNOWN_ERROR, Level.WARNING, "Could not instantiate job with name: " + job.getName() + " and id: " + job.getId(), "sparkjob object was null");
}
Execution execution = flinkjob.requestExecutionId();
submitter.startExecution(flinkjob);
activityFacade.persistActivity(ActivityFacade.RAN_JOB, job.getProject(), user.asUser(), ActivityFlag.JOB);
return execution;
}
Aggregations