Search in sources :

Example 1 with JobFiles

use of org.apache.hadoop.mapreduce.JobHistoryFileReplayHelper.JobFiles in project hadoop by apache.

the class JobHistoryFileReplayMapperV1 method map.

public void map(IntWritable key, IntWritable val, Context context) throws IOException {
    // collect the apps it needs to process
    TimelineClient tlc = new TimelineClientImpl();
    TimelineEntityConverterV1 converter = new TimelineEntityConverterV1();
    JobHistoryFileReplayHelper helper = new JobHistoryFileReplayHelper(context);
    int replayMode = helper.getReplayMode();
    Collection<JobFiles> jobs = helper.getJobFiles();
    JobHistoryFileParser parser = helper.getParser();
    if (jobs.isEmpty()) {
        LOG.info(context.getTaskAttemptID().getTaskID() + " will process no jobs");
    } else {
        LOG.info(context.getTaskAttemptID().getTaskID() + " will process " + jobs.size() + " jobs");
    }
    for (JobFiles job : jobs) {
        // process each job
        String jobIdStr = job.getJobId();
        LOG.info("processing " + jobIdStr + "...");
        JobId jobId = TypeConverter.toYarn(JobID.forName(jobIdStr));
        ApplicationId appId = jobId.getAppId();
        try {
            // parse the job info and configuration
            Path historyFilePath = job.getJobHistoryFilePath();
            Path confFilePath = job.getJobConfFilePath();
            if ((historyFilePath == null) || (confFilePath == null)) {
                continue;
            }
            JobInfo jobInfo = parser.parseHistoryFile(historyFilePath);
            Configuration jobConf = parser.parseConfiguration(confFilePath);
            LOG.info("parsed the job history file and the configuration file for job " + jobIdStr);
            // create entities from job history and write them
            long totalTime = 0;
            Set<TimelineEntity> entitySet = converter.createTimelineEntities(jobInfo, jobConf);
            LOG.info("converted them into timeline entities for job " + jobIdStr);
            // use the current user for this purpose
            UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
            long startWrite = System.nanoTime();
            try {
                switch(replayMode) {
                    case JobHistoryFileReplayHelper.WRITE_ALL_AT_ONCE:
                        writeAllEntities(tlc, entitySet, ugi);
                        break;
                    case JobHistoryFileReplayHelper.WRITE_PER_ENTITY:
                        writePerEntity(tlc, entitySet, ugi);
                        break;
                    default:
                        break;
                }
            } catch (Exception e) {
                context.getCounter(PerfCounters.TIMELINE_SERVICE_WRITE_FAILURES).increment(1);
                LOG.error("writing to the timeline service failed", e);
            }
            long endWrite = System.nanoTime();
            totalTime += TimeUnit.NANOSECONDS.toMillis(endWrite - startWrite);
            int numEntities = entitySet.size();
            LOG.info("wrote " + numEntities + " entities in " + totalTime + " ms");
            context.getCounter(PerfCounters.TIMELINE_SERVICE_WRITE_TIME).increment(totalTime);
            context.getCounter(PerfCounters.TIMELINE_SERVICE_WRITE_COUNTER).increment(numEntities);
        } finally {
            // move it along
            context.progress();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) JobFiles(org.apache.hadoop.mapreduce.JobHistoryFileReplayHelper.JobFiles) TimelineEntity(org.apache.hadoop.yarn.api.records.timeline.TimelineEntity) IOException(java.io.IOException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) TimelineClient(org.apache.hadoop.yarn.client.api.TimelineClient) JobInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo) TimelineClientImpl(org.apache.hadoop.yarn.client.api.impl.TimelineClientImpl) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 2 with JobFiles

use of org.apache.hadoop.mapreduce.JobHistoryFileReplayHelper.JobFiles in project hadoop by apache.

the class JobHistoryFileReplayMapperV2 method writeEntities.

@Override
protected void writeEntities(Configuration tlConf, TimelineCollectorManager manager, Context context) throws IOException {
    JobHistoryFileReplayHelper helper = new JobHistoryFileReplayHelper(context);
    int replayMode = helper.getReplayMode();
    JobHistoryFileParser parser = helper.getParser();
    TimelineEntityConverterV2 converter = new TimelineEntityConverterV2();
    // collect the apps it needs to process
    Collection<JobFiles> jobs = helper.getJobFiles();
    if (jobs.isEmpty()) {
        LOG.info(context.getTaskAttemptID().getTaskID() + " will process no jobs");
    } else {
        LOG.info(context.getTaskAttemptID().getTaskID() + " will process " + jobs.size() + " jobs");
    }
    for (JobFiles job : jobs) {
        // process each job
        String jobIdStr = job.getJobId();
        // skip if either of the file is missing
        if (job.getJobConfFilePath() == null || job.getJobHistoryFilePath() == null) {
            LOG.info(jobIdStr + " missing either the job history file or the " + "configuration file. Skipping.");
            continue;
        }
        LOG.info("processing " + jobIdStr + "...");
        JobId jobId = TypeConverter.toYarn(JobID.forName(jobIdStr));
        ApplicationId appId = jobId.getAppId();
        // create the app level timeline collector and start it
        AppLevelTimelineCollector collector = new AppLevelTimelineCollector(appId);
        manager.putIfAbsent(appId, collector);
        try {
            // parse the job info and configuration
            JobInfo jobInfo = parser.parseHistoryFile(job.getJobHistoryFilePath());
            Configuration jobConf = parser.parseConfiguration(job.getJobConfFilePath());
            LOG.info("parsed the job history file and the configuration file " + "for job " + jobIdStr);
            // set the context
            // flow id: job name, flow run id: timestamp, user id
            TimelineCollectorContext tlContext = collector.getTimelineEntityContext();
            tlContext.setFlowName(jobInfo.getJobname());
            tlContext.setFlowRunId(jobInfo.getSubmitTime());
            tlContext.setUserId(jobInfo.getUsername());
            // create entities from job history and write them
            long totalTime = 0;
            List<TimelineEntity> entitySet = converter.createTimelineEntities(jobInfo, jobConf);
            LOG.info("converted them into timeline entities for job " + jobIdStr);
            // use the current user for this purpose
            UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
            long startWrite = System.nanoTime();
            try {
                switch(replayMode) {
                    case JobHistoryFileReplayHelper.WRITE_ALL_AT_ONCE:
                        writeAllEntities(collector, entitySet, ugi);
                        break;
                    case JobHistoryFileReplayHelper.WRITE_PER_ENTITY:
                        writePerEntity(collector, entitySet, ugi);
                        break;
                    default:
                        break;
                }
            } catch (Exception e) {
                context.getCounter(PerfCounters.TIMELINE_SERVICE_WRITE_FAILURES).increment(1);
                LOG.error("writing to the timeline service failed", e);
            }
            long endWrite = System.nanoTime();
            totalTime += TimeUnit.NANOSECONDS.toMillis(endWrite - startWrite);
            int numEntities = entitySet.size();
            LOG.info("wrote " + numEntities + " entities in " + totalTime + " ms");
            context.getCounter(PerfCounters.TIMELINE_SERVICE_WRITE_TIME).increment(totalTime);
            context.getCounter(PerfCounters.TIMELINE_SERVICE_WRITE_COUNTER).increment(numEntities);
        } finally {
            manager.remove(appId);
            // move it along
            context.progress();
        }
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) AppLevelTimelineCollector(org.apache.hadoop.yarn.server.timelineservice.collector.AppLevelTimelineCollector) JobFiles(org.apache.hadoop.mapreduce.JobHistoryFileReplayHelper.JobFiles) TimelineEntity(org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity) IOException(java.io.IOException) JobInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo) TimelineCollectorContext(org.apache.hadoop.yarn.server.timelineservice.collector.TimelineCollectorContext) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Aggregations

IOException (java.io.IOException)2 Configuration (org.apache.hadoop.conf.Configuration)2 JobFiles (org.apache.hadoop.mapreduce.JobHistoryFileReplayHelper.JobFiles)2 JobInfo (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo)2 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)2 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)2 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)2 Path (org.apache.hadoop.fs.Path)1 TimelineEntity (org.apache.hadoop.yarn.api.records.timeline.TimelineEntity)1 TimelineEntity (org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity)1 TimelineClient (org.apache.hadoop.yarn.client.api.TimelineClient)1 TimelineClientImpl (org.apache.hadoop.yarn.client.api.impl.TimelineClientImpl)1 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)1 AppLevelTimelineCollector (org.apache.hadoop.yarn.server.timelineservice.collector.AppLevelTimelineCollector)1 TimelineCollectorContext (org.apache.hadoop.yarn.server.timelineservice.collector.TimelineCollectorContext)1