Search in sources :

Example 1 with TensorBoard

use of io.hops.hopsworks.persistence.entity.tensorflow.TensorBoard in project hopsworks by logicalclocks.

the class TensorBoardProxyServlet method service.

// A request will come in with the format:
// http://127.0.0.1:8080/hopsworks-api/tensorboard/application_1507065031551_0005/hopsworks0:59460/#graphs
// 
@Override
protected void service(HttpServletRequest servletRequest, HttpServletResponse servletResponse) throws ServletException, IOException {
    String email = servletRequest.getUserPrincipal().getName();
    if (Strings.isNullOrEmpty(email)) {
        servletResponse.sendError(Response.Status.FORBIDDEN.getStatusCode(), "You don't have access to this TensorBoard");
        return;
    }
    LOGGER.log(Level.FINE, "Request URL: {0}", servletRequest.getRequestURL());
    String uri = servletRequest.getRequestURI();
    // valid hostname regex:
    // https://stackoverflow.com/questions/106179/regular-expression-to-match-dns-hostname-or-ip-address
    Pattern urlPattern = Pattern.compile("([a-zA-Z0-9\\-\\.]{2,255}:[0-9]{4,6})(/.*$)");
    Matcher urlMatcher = urlPattern.matcher(uri);
    String hostPortPair = "";
    String uriToFinish = "/";
    if (urlMatcher.find()) {
        hostPortPair = urlMatcher.group(1);
        uriToFinish = urlMatcher.group(2);
    }
    if (hostPortPair.isEmpty()) {
        servletResponse.sendError(Response.Status.FORBIDDEN.getStatusCode(), "This TensorBoard is not accessible right now");
        return;
    }
    Pattern appPattern = Pattern.compile("(application_.*?_\\d*)");
    Matcher appMatcher = appPattern.matcher(servletRequest.getRequestURI());
    Pattern elasticPattern = Pattern.compile("(experiments)");
    Matcher elasticMatcher = elasticPattern.matcher(servletRequest.getRequestURI());
    if (elasticMatcher.find()) {
        List<TensorBoard> TBList = tensorBoardFacade.findByUserEmail(email);
        if (TBList == null) {
            servletResponse.sendError(Response.Status.FORBIDDEN.getStatusCode(), "This TensorBoard is not running right now");
            return;
        }
        boolean foundTB = false;
        for (TensorBoard tb : TBList) {
            if (tb.getEndpoint().equals(hostPortPair)) {
                foundTB = true;
                break;
            }
        }
        if (!foundTB) {
            servletResponse.sendError(Response.Status.FORBIDDEN.getStatusCode(), "This TensorBoard is not running right now");
            return;
        }
        targetUri = uriToFinish;
        String theHost = "http://" + hostPortPair;
        URI targetUriHost;
        try {
            targetUriObj = new URI(targetUri);
            targetUriHost = new URI(theHost);
        } catch (Exception e) {
            LOGGER.log(Level.FINE, "An error occurred serving the request", e);
            return;
        }
        targetHost = URIUtils.extractHost(targetUriHost);
        servletRequest.setAttribute(ATTR_TARGET_URI, targetUri);
        servletRequest.setAttribute(ATTR_TARGET_HOST, targetHost);
        servletRequest.setAttribute(ATTR_URI_FINISH, uriToFinish);
        servletRequest.setAttribute(ATTR_HOST_PORT, hostPortPair);
        try {
            super.service(servletRequest, servletResponse);
        } catch (IOException ex) {
            servletResponse.sendError(Response.Status.NOT_FOUND.getStatusCode(), "This TensorBoard is not ready to serve requests right now, " + "try refreshing the page");
            return;
        }
    } else if (appMatcher.find()) {
        String appId = appMatcher.group(1);
        YarnApplicationstate appState = yarnApplicationstateFacade.findByAppId(appId);
        if (appState == null) {
            servletResponse.sendError(Response.Status.FORBIDDEN.getStatusCode(), "You don't have the access right for this application");
            return;
        }
        Users user = userFacade.findByEmail(email);
        String projectName = hdfsUsersBean.getProjectName(appState.getAppuser());
        Project project = projectFacade.findByName(projectName);
        if (project == null) {
            servletResponse.sendError(Response.Status.BAD_REQUEST.getStatusCode(), "Project does not exists");
            return;
        }
        if (!projectTeamFacade.isUserMemberOfProject(project, user)) {
            servletResponse.sendError(Response.Status.BAD_REQUEST.getStatusCode(), "You don't have the access right for this application");
            return;
        }
        if (appState.getAppsmstate() != null && (appState.getAppsmstate().equalsIgnoreCase(YarnApplicationState.FINISHED.toString()) || appState.getAppsmstate().equalsIgnoreCase(YarnApplicationState.KILLED.toString()))) {
            servletResponse.sendError(Response.Status.NOT_FOUND.getStatusCode(), "This TensorBoard has finished running.");
            return;
        }
        targetUri = uriToFinish;
        String theHost = "http://" + hostPortPair;
        URI targetUriHost;
        try {
            targetUriObj = new URI(targetUri);
            targetUriHost = new URI(theHost);
        } catch (Exception e) {
            servletResponse.sendError(Response.Status.INTERNAL_SERVER_ERROR.getStatusCode(), "An error occurred serving the request.");
            LOGGER.log(Level.FINE, "An error occurred serving the request", e);
            return;
        }
        targetHost = URIUtils.extractHost(targetUriHost);
        servletRequest.setAttribute(ATTR_TARGET_URI, targetUri);
        servletRequest.setAttribute(ATTR_TARGET_HOST, targetHost);
        servletRequest.setAttribute(ATTR_URI_FINISH, uriToFinish);
        servletRequest.setAttribute(ATTR_HOST_PORT, hostPortPair);
        try {
            super.service(servletRequest, servletResponse);
        } catch (IOException ex) {
            servletResponse.sendError(Response.Status.NOT_FOUND.getStatusCode(), "This TensorBoard is not running right now.");
            return;
        }
    } else {
        servletResponse.sendError(Response.Status.FORBIDDEN.getStatusCode(), "You don't have the access right for this application");
        return;
    }
}
Also used : Pattern(java.util.regex.Pattern) Project(io.hops.hopsworks.persistence.entity.project.Project) Matcher(java.util.regex.Matcher) YarnApplicationstate(io.hops.hopsworks.persistence.entity.jobs.history.YarnApplicationstate) TensorBoard(io.hops.hopsworks.persistence.entity.tensorflow.TensorBoard) IOException(java.io.IOException) Users(io.hops.hopsworks.persistence.entity.user.Users) URI(java.net.URI) ServletException(javax.servlet.ServletException) IOException(java.io.IOException)

Example 2 with TensorBoard

use of io.hops.hopsworks.persistence.entity.tensorflow.TensorBoard in project hopsworks by logicalclocks.

the class TensorBoardKillTimer method rotate.

@Schedule(persistent = false, minute = "*/10", hour = "*")
public void rotate(Timer timer) {
    try {
        LOGGER.log(Level.INFO, "Running TensorBoardKillTimer.");
        int tensorBoardMaxLastAccessed = settings.getTensorBoardMaxLastAccessed();
        Collection<TensorBoard> tensorBoardCollection = tensorBoardFacade.findAll();
        for (TensorBoard tensorBoard : tensorBoardCollection) {
            // Standard case, TB have been idle for a given amount of time
            Date accessed = tensorBoard.getLastAccessed();
            Date current = Calendar.getInstance().getTime();
            if ((current.getTime() - accessed.getTime()) > tensorBoardMaxLastAccessed) {
                try {
                    tensorBoardController.cleanup(tensorBoard);
                    LOGGER.log(Level.FINE, "Killed TensorBoard " + tensorBoard.toString() + " not accessed in the last " + tensorBoardMaxLastAccessed + " milliseconds");
                } catch (TensorBoardException ex) {
                    LOGGER.log(Level.SEVERE, "Failed to clean up running TensorBoard", ex);
                }
            }
        }
        // sanity check to make sure that all .pid files have a corresponding TB
        try {
            List<TensorBoard> TBs = tensorBoardFacade.findAll();
            String tbDirPath = settings.getStagingDir() + Settings.TENSORBOARD_DIRS;
            File tbDir = new File(tbDirPath);
            // For each project_projectmember directory try to find .pid file
            for (File currentTbDir : tbDir.listFiles()) {
                for (File possiblePidFile : currentTbDir.listFiles()) {
                    if (possiblePidFile.getName().endsWith(".pid")) {
                        String cid = com.google.common.io.Files.readFirstLine(possiblePidFile, Charset.defaultCharset());
                        if (cid != null) {
                            // do not kill TBs which are in the DB
                            boolean tbExists = false;
                            for (TensorBoard tb : TBs) {
                                if (tb.getCid().equals(cid)) {
                                    tbExists = true;
                                }
                            }
                            if (!tbExists) {
                                LOGGER.log(Level.WARNING, "Detected a stray TensorBoard with pid " + cid + " in directory " + currentTbDir.getAbsolutePath() + ", cleaning up...");
                                tensorBoardProcessMgr.killTensorBoard(cid);
                                tensorBoardProcessMgr.removeTensorBoardDirectory(currentTbDir.getAbsolutePath());
                            }
                        }
                    }
                }
            }
        } catch (IOException | NumberFormatException e) {
            LOGGER.log(Level.SEVERE, "Exception while reading .pid files", e);
        }
    } catch (Exception e) {
        LOGGER.log(Level.SEVERE, "An error occurred while checking for expired TensorBoards to be cleaned up", e);
    }
}
Also used : TensorBoard(io.hops.hopsworks.persistence.entity.tensorflow.TensorBoard) IOException(java.io.IOException) Date(java.util.Date) TensorBoardException(io.hops.hopsworks.exceptions.TensorBoardException) IOException(java.io.IOException) File(java.io.File) TensorBoardException(io.hops.hopsworks.exceptions.TensorBoardException) Schedule(javax.ejb.Schedule)

Example 3 with TensorBoard

use of io.hops.hopsworks.persistence.entity.tensorflow.TensorBoard in project hopsworks by logicalclocks.

the class TensorBoardFacade method findForProjectAndUser.

public TensorBoard findForProjectAndUser(Project project, Users user) {
    try {
        TypedQuery<TensorBoard> q = em.createNamedQuery("TensorBoard.findByProjectAndUser", TensorBoard.class);
        q.setParameter("projectId", project.getId());
        q.setParameter("userId", user.getUid());
        TensorBoard tb = q.getSingleResult();
        return tb;
    } catch (NoResultException nre) {
    // This is fine
    }
    return null;
}
Also used : TensorBoard(io.hops.hopsworks.persistence.entity.tensorflow.TensorBoard) NoResultException(javax.persistence.NoResultException)

Example 4 with TensorBoard

use of io.hops.hopsworks.persistence.entity.tensorflow.TensorBoard in project hopsworks by logicalclocks.

the class TensorBoardFacade method remove.

public void remove(TensorBoard tensorBoard) {
    try {
        TensorBoard managedTfServing = em.find(TensorBoard.class, tensorBoard.getTensorBoardPK());
        em.remove(em.merge(managedTfServing));
        em.flush();
    } catch (SecurityException | IllegalStateException ex) {
        LOGGER.log(Level.SEVERE, "Could not update TensorBoard", ex);
        throw ex;
    }
}
Also used : TensorBoard(io.hops.hopsworks.persistence.entity.tensorflow.TensorBoard)

Example 5 with TensorBoard

use of io.hops.hopsworks.persistence.entity.tensorflow.TensorBoard in project hopsworks by logicalclocks.

the class TensorBoardFacade method findByMlId.

public TensorBoard findByMlId(String mlId) {
    try {
        TypedQuery<TensorBoard> q = em.createNamedQuery("TensorBoard.findByMlId", TensorBoard.class);
        q.setParameter("mlId", mlId);
        TensorBoard tb = q.getSingleResult();
        return tb;
    } catch (NoResultException nre) {
    // This is fine
    }
    return null;
}
Also used : TensorBoard(io.hops.hopsworks.persistence.entity.tensorflow.TensorBoard) NoResultException(javax.persistence.NoResultException)

Aggregations

TensorBoard (io.hops.hopsworks.persistence.entity.tensorflow.TensorBoard)9 TensorBoardDTO (io.hops.hopsworks.common.dao.tensorflow.config.TensorBoardDTO)3 Date (java.util.Date)3 IOException (java.io.IOException)2 NoResultException (javax.persistence.NoResultException)2 TensorBoardException (io.hops.hopsworks.exceptions.TensorBoardException)1 HdfsUsers (io.hops.hopsworks.persistence.entity.hdfs.user.HdfsUsers)1 YarnApplicationstate (io.hops.hopsworks.persistence.entity.jobs.history.YarnApplicationstate)1 Project (io.hops.hopsworks.persistence.entity.project.Project)1 TensorBoardPK (io.hops.hopsworks.persistence.entity.tensorflow.TensorBoardPK)1 Users (io.hops.hopsworks.persistence.entity.user.Users)1 File (java.io.File)1 URI (java.net.URI)1 Matcher (java.util.regex.Matcher)1 Pattern (java.util.regex.Pattern)1 Schedule (javax.ejb.Schedule)1 TransactionAttribute (javax.ejb.TransactionAttribute)1 ServletException (javax.servlet.ServletException)1