Search in sources :

Example 1 with YarnMonitor

use of io.hops.hopsworks.common.jobs.yarn.YarnMonitor in project hopsworks by logicalclocks.

the class AbstractExecutionController method retryLogAggregation.

@Override
public JobLogDTO retryLogAggregation(Execution execution, JobLogDTO.LogType type) throws JobException {
    if (!execution.getState().isFinalState()) {
        throw new JobException(RESTCodes.JobErrorCode.JOB_EXECUTION_INVALID_STATE, Level.FINE, "Job still running.");
    }
    DistributedFileSystemOps dfso = null;
    DistributedFileSystemOps udfso = null;
    Users user = execution.getUser();
    String hdfsUser = hdfsUsersController.getHdfsUserName(execution.getJob().getProject(), user);
    String aggregatedLogPath = settings.getAggregatedLogPath(hdfsUser, execution.getAppId());
    if (aggregatedLogPath == null) {
        throw new JobException(RESTCodes.JobErrorCode.JOB_LOG, Level.INFO, "Log aggregation is not enabled");
    }
    try {
        dfso = dfs.getDfsOps();
        udfso = dfs.getDfsOps(hdfsUser);
        if (!dfso.exists(aggregatedLogPath)) {
            throw new JobException(RESTCodes.JobErrorCode.JOB_LOG, Level.WARNING, "Logs not available. This could be caused by the retention policy.");
        }
        String hdfsLogPath = null;
        String[] desiredLogTypes = null;
        switch(type) {
            case OUT:
                hdfsLogPath = REMOTE_PROTOCOL + execution.getStdoutPath();
                desiredLogTypes = new String[] { type.name() };
                break;
            case ERR:
                hdfsLogPath = REMOTE_PROTOCOL + execution.getStderrPath();
                desiredLogTypes = new String[] { type.name(), ".log" };
                break;
            default:
                break;
        }
        if (!Strings.isNullOrEmpty(hdfsLogPath)) {
            YarnClientWrapper yarnClientWrapper = ycs.getYarnClientSuper(settings.getConfiguration());
            ApplicationId applicationId = ConverterUtils.toApplicationId(execution.getAppId());
            YarnMonitor monitor = new YarnMonitor(applicationId, yarnClientWrapper, ycs);
            try {
                YarnLogUtil.copyAggregatedYarnLogs(udfso, aggregatedLogPath, hdfsLogPath, desiredLogTypes, monitor);
            } catch (IOException | InterruptedException | YarnException ex) {
                LOGGER.log(Level.SEVERE, null, ex);
                throw new JobException(RESTCodes.JobErrorCode.JOB_LOG, null, ex.getMessage());
            } finally {
                monitor.close();
            }
        }
    } catch (IOException ex) {
        LOGGER.log(Level.SEVERE, null, ex);
    } finally {
        if (dfso != null) {
            dfso.close();
        }
        if (udfso != null) {
            dfs.closeDfsClient(udfso);
        }
    }
    return getLog(execution, type);
}
Also used : JobException(io.hops.hopsworks.exceptions.JobException) DistributedFileSystemOps(io.hops.hopsworks.common.hdfs.DistributedFileSystemOps) Users(io.hops.hopsworks.persistence.entity.user.Users) IOException(java.io.IOException) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) YarnMonitor(io.hops.hopsworks.common.jobs.yarn.YarnMonitor) YarnClientWrapper(io.hops.hopsworks.common.yarn.YarnClientWrapper) YarnException(org.apache.hadoop.yarn.exceptions.YarnException)

Aggregations

DistributedFileSystemOps (io.hops.hopsworks.common.hdfs.DistributedFileSystemOps)1 YarnMonitor (io.hops.hopsworks.common.jobs.yarn.YarnMonitor)1 YarnClientWrapper (io.hops.hopsworks.common.yarn.YarnClientWrapper)1 JobException (io.hops.hopsworks.exceptions.JobException)1 Users (io.hops.hopsworks.persistence.entity.user.Users)1 IOException (java.io.IOException)1 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)1 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)1