Search in sources :

Example 11 with JobInfo

use of org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo in project hadoop by apache.

the class TestJobHistoryParsing method testMultipleFailedTasks.

@Test
public void testMultipleFailedTasks() throws Exception {
    JobHistoryParser parser = new JobHistoryParser(Mockito.mock(FSDataInputStream.class));
    EventReader reader = Mockito.mock(EventReader.class);
    // Hack!
    final AtomicInteger numEventsRead = new AtomicInteger(0);
    final org.apache.hadoop.mapreduce.TaskType taskType = org.apache.hadoop.mapreduce.TaskType.MAP;
    final TaskID[] tids = new TaskID[2];
    final JobID jid = new JobID("1", 1);
    tids[0] = new TaskID(jid, taskType, 0);
    tids[1] = new TaskID(jid, taskType, 1);
    Mockito.when(reader.getNextEvent()).thenAnswer(new Answer<HistoryEvent>() {

        public HistoryEvent answer(InvocationOnMock invocation) throws IOException {
            // send two task start and two task fail events for tasks 0 and 1
            int eventId = numEventsRead.getAndIncrement();
            TaskID tid = tids[eventId & 0x1];
            if (eventId < 2) {
                return new TaskStartedEvent(tid, 0, taskType, "");
            }
            if (eventId < 4) {
                TaskFailedEvent tfe = new TaskFailedEvent(tid, 0, taskType, "failed", "FAILED", null, new Counters());
                tfe.setDatum(tfe.getDatum());
                return tfe;
            }
            if (eventId < 5) {
                JobUnsuccessfulCompletionEvent juce = new JobUnsuccessfulCompletionEvent(jid, 100L, 2, 0, "JOB_FAILED", Collections.singletonList("Task failed: " + tids[0].toString()));
                return juce;
            }
            return null;
        }
    });
    JobInfo info = parser.parse(reader);
    assertTrue("Task 0 not implicated", info.getErrorInfo().contains(tids[0].toString()));
}
Also used : EventReader(org.apache.hadoop.mapreduce.jobhistory.EventReader) TaskID(org.apache.hadoop.mapreduce.TaskID) JobUnsuccessfulCompletionEvent(org.apache.hadoop.mapreduce.jobhistory.JobUnsuccessfulCompletionEvent) IOException(java.io.IOException) HistoryEvent(org.apache.hadoop.mapreduce.jobhistory.HistoryEvent) TaskStartedEvent(org.apache.hadoop.mapreduce.jobhistory.TaskStartedEvent) JobHistoryParser(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) JobInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo) InvocationOnMock(org.mockito.invocation.InvocationOnMock) TaskFailedEvent(org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Counters(org.apache.hadoop.mapreduce.Counters) JobID(org.apache.hadoop.mapreduce.JobID) Test(org.junit.Test)

Example 12 with JobInfo

use of org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo in project hadoop by apache.

the class TestJobHistoryParsing method testFailedJobHistoryWithoutDiagnostics.

@Test
public void testFailedJobHistoryWithoutDiagnostics() throws Exception {
    final Path histPath = new Path(getClass().getClassLoader().getResource("job_1393307629410_0001-1393307687476-user-Sleep+job-1393307723835-0-0-FAILED-default-1393307693920.jhist").getFile());
    final FileSystem lfs = FileSystem.getLocal(new Configuration());
    final FSDataInputStream fsdis = lfs.open(histPath);
    try {
        JobHistoryParser parser = new JobHistoryParser(fsdis);
        JobInfo info = parser.parse();
        assertEquals("History parsed jobId incorrectly", info.getJobId(), JobID.forName("job_1393307629410_0001"));
        assertEquals("Default diagnostics incorrect ", "", info.getErrorInfo());
    } finally {
        fsdis.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) JobHistoryParser(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser) JobInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Test(org.junit.Test)

Example 13 with JobInfo

use of org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo in project hadoop by apache.

the class TestJobHistoryParsing method testHistoryParsingForFailedAttempts.

@Test(timeout = 30000)
public void testHistoryParsingForFailedAttempts() throws Exception {
    LOG.info("STARTING testHistoryParsingForFailedAttempts");
    try {
        Configuration conf = new Configuration();
        conf.setClass(NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, MyResolver.class, DNSToSwitchMapping.class);
        RackResolver.init(conf);
        MRApp app = new MRAppWithHistoryWithFailedAttempt(2, 1, true, this.getClass().getName(), true);
        app.submit(conf);
        Job job = app.getContext().getAllJobs().values().iterator().next();
        JobId jobId = job.getID();
        app.waitForState(job, JobState.SUCCEEDED);
        // make sure all events are flushed
        app.waitForState(Service.STATE.STOPPED);
        JobHistory jobHistory = new JobHistory();
        jobHistory.init(conf);
        HistoryFileInfo fileInfo = jobHistory.getJobFileInfo(jobId);
        JobHistoryParser parser;
        JobInfo jobInfo;
        synchronized (fileInfo) {
            Path historyFilePath = fileInfo.getHistoryFile();
            FSDataInputStream in = null;
            FileContext fc = null;
            try {
                fc = FileContext.getFileContext(conf);
                in = fc.open(fc.makeQualified(historyFilePath));
            } catch (IOException ioe) {
                LOG.info("Can not open history file: " + historyFilePath, ioe);
                throw (new Exception("Can not open History File"));
            }
            parser = new JobHistoryParser(in);
            jobInfo = parser.parse();
        }
        Exception parseException = parser.getParseException();
        Assert.assertNull("Caught an expected exception " + parseException, parseException);
        int noOffailedAttempts = 0;
        Map<TaskID, TaskInfo> allTasks = jobInfo.getAllTasks();
        for (Task task : job.getTasks().values()) {
            TaskInfo taskInfo = allTasks.get(TypeConverter.fromYarn(task.getID()));
            for (TaskAttempt taskAttempt : task.getAttempts().values()) {
                TaskAttemptInfo taskAttemptInfo = taskInfo.getAllTaskAttempts().get(TypeConverter.fromYarn((taskAttempt.getID())));
                // Verify rack-name for all task attempts
                Assert.assertEquals("rack-name is incorrect", taskAttemptInfo.getRackname(), RACK_NAME);
                if (taskAttemptInfo.getTaskStatus().equals("FAILED")) {
                    noOffailedAttempts++;
                }
            }
        }
        Assert.assertEquals("No of Failed tasks doesn't match.", 2, noOffailedAttempts);
    } finally {
        LOG.info("FINISHED testHistoryParsingForFailedAttempts");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HistoryFileInfo(org.apache.hadoop.mapreduce.v2.hs.HistoryFileManager.HistoryFileInfo) Task(org.apache.hadoop.mapreduce.v2.app.job.Task) TaskID(org.apache.hadoop.mapreduce.TaskID) Configuration(org.apache.hadoop.conf.Configuration) IOException(java.io.IOException) IOException(java.io.IOException) TaskInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo) JobHistoryParser(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser) JobInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo) TaskAttemptInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) FileContext(org.apache.hadoop.fs.FileContext) MRApp(org.apache.hadoop.mapreduce.v2.app.MRApp) Test(org.junit.Test)

Example 14 with JobInfo

use of org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo in project hadoop by apache.

the class JobHistoryFileReplayMapperV2 method writeEntities.

@Override
protected void writeEntities(Configuration tlConf, TimelineCollectorManager manager, Context context) throws IOException {
    JobHistoryFileReplayHelper helper = new JobHistoryFileReplayHelper(context);
    int replayMode = helper.getReplayMode();
    JobHistoryFileParser parser = helper.getParser();
    TimelineEntityConverterV2 converter = new TimelineEntityConverterV2();
    // collect the apps it needs to process
    Collection<JobFiles> jobs = helper.getJobFiles();
    if (jobs.isEmpty()) {
        LOG.info(context.getTaskAttemptID().getTaskID() + " will process no jobs");
    } else {
        LOG.info(context.getTaskAttemptID().getTaskID() + " will process " + jobs.size() + " jobs");
    }
    for (JobFiles job : jobs) {
        // process each job
        String jobIdStr = job.getJobId();
        // skip if either of the file is missing
        if (job.getJobConfFilePath() == null || job.getJobHistoryFilePath() == null) {
            LOG.info(jobIdStr + " missing either the job history file or the " + "configuration file. Skipping.");
            continue;
        }
        LOG.info("processing " + jobIdStr + "...");
        JobId jobId = TypeConverter.toYarn(JobID.forName(jobIdStr));
        ApplicationId appId = jobId.getAppId();
        // create the app level timeline collector and start it
        AppLevelTimelineCollector collector = new AppLevelTimelineCollector(appId);
        manager.putIfAbsent(appId, collector);
        try {
            // parse the job info and configuration
            JobInfo jobInfo = parser.parseHistoryFile(job.getJobHistoryFilePath());
            Configuration jobConf = parser.parseConfiguration(job.getJobConfFilePath());
            LOG.info("parsed the job history file and the configuration file " + "for job " + jobIdStr);
            // set the context
            // flow id: job name, flow run id: timestamp, user id
            TimelineCollectorContext tlContext = collector.getTimelineEntityContext();
            tlContext.setFlowName(jobInfo.getJobname());
            tlContext.setFlowRunId(jobInfo.getSubmitTime());
            tlContext.setUserId(jobInfo.getUsername());
            // create entities from job history and write them
            long totalTime = 0;
            List<TimelineEntity> entitySet = converter.createTimelineEntities(jobInfo, jobConf);
            LOG.info("converted them into timeline entities for job " + jobIdStr);
            // use the current user for this purpose
            UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
            long startWrite = System.nanoTime();
            try {
                switch(replayMode) {
                    case JobHistoryFileReplayHelper.WRITE_ALL_AT_ONCE:
                        writeAllEntities(collector, entitySet, ugi);
                        break;
                    case JobHistoryFileReplayHelper.WRITE_PER_ENTITY:
                        writePerEntity(collector, entitySet, ugi);
                        break;
                    default:
                        break;
                }
            } catch (Exception e) {
                context.getCounter(PerfCounters.TIMELINE_SERVICE_WRITE_FAILURES).increment(1);
                LOG.error("writing to the timeline service failed", e);
            }
            long endWrite = System.nanoTime();
            totalTime += TimeUnit.NANOSECONDS.toMillis(endWrite - startWrite);
            int numEntities = entitySet.size();
            LOG.info("wrote " + numEntities + " entities in " + totalTime + " ms");
            context.getCounter(PerfCounters.TIMELINE_SERVICE_WRITE_TIME).increment(totalTime);
            context.getCounter(PerfCounters.TIMELINE_SERVICE_WRITE_COUNTER).increment(numEntities);
        } finally {
            manager.remove(appId);
            // move it along
            context.progress();
        }
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) AppLevelTimelineCollector(org.apache.hadoop.yarn.server.timelineservice.collector.AppLevelTimelineCollector) JobFiles(org.apache.hadoop.mapreduce.JobHistoryFileReplayHelper.JobFiles) TimelineEntity(org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity) IOException(java.io.IOException) JobInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo) TimelineCollectorContext(org.apache.hadoop.yarn.server.timelineservice.collector.TimelineCollectorContext) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Aggregations

JobInfo (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo)14 JobHistoryParser (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser)11 Configuration (org.apache.hadoop.conf.Configuration)10 Path (org.apache.hadoop.fs.Path)10 Test (org.junit.Test)10 IOException (java.io.IOException)8 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)7 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)6 HistoryFileInfo (org.apache.hadoop.mapreduce.v2.hs.HistoryFileManager.HistoryFileInfo)5 FileContext (org.apache.hadoop.fs.FileContext)4 TaskID (org.apache.hadoop.mapreduce.TaskID)4 TaskInfo (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo)4 MRApp (org.apache.hadoop.mapreduce.v2.app.MRApp)4 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)4 HashMap (java.util.HashMap)3 TaskAttemptInfo (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo)3 Map (java.util.Map)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 JobFiles (org.apache.hadoop.mapreduce.JobHistoryFileReplayHelper.JobFiles)2 EventReader (org.apache.hadoop.mapreduce.jobhistory.EventReader)2