Search in sources :

Example 36 with Job

use of org.apache.hadoop.mapreduce.v2.app.job.Job in project hadoop by apache.

the class TestSpeculativeExecutionWithMRApp method testSepculateSuccessfulWithUpdateEvents.

@Test
public void testSepculateSuccessfulWithUpdateEvents() throws Exception {
    Clock actualClock = SystemClock.getInstance();
    final ControlledClock clock = new ControlledClock(actualClock);
    clock.setTime(System.currentTimeMillis());
    MRApp app = new MRApp(NUM_MAPPERS, NUM_REDUCERS, false, "test", true, clock);
    Job job = app.submit(new Configuration(), true, true);
    app.waitForState(job, JobState.RUNNING);
    Map<TaskId, Task> tasks = job.getTasks();
    Assert.assertEquals("Num tasks is not correct", NUM_MAPPERS + NUM_REDUCERS, tasks.size());
    Iterator<Task> taskIter = tasks.values().iterator();
    while (taskIter.hasNext()) {
        app.waitForState(taskIter.next(), TaskState.RUNNING);
    }
    // Process the update events
    clock.setTime(System.currentTimeMillis() + 1000);
    EventHandler appEventHandler = app.getContext().getEventHandler();
    for (Map.Entry<TaskId, Task> mapTask : tasks.entrySet()) {
        for (Map.Entry<TaskAttemptId, TaskAttempt> taskAttempt : mapTask.getValue().getAttempts().entrySet()) {
            TaskAttemptStatus status = createTaskAttemptStatus(taskAttempt.getKey(), (float) 0.5, TaskAttemptState.RUNNING);
            TaskAttemptStatusUpdateEvent event = new TaskAttemptStatusUpdateEvent(taskAttempt.getKey(), status);
            appEventHandler.handle(event);
        }
    }
    Task speculatedTask = null;
    int numTasksToFinish = NUM_MAPPERS + NUM_REDUCERS - 1;
    clock.setTime(System.currentTimeMillis() + 1000);
    for (Map.Entry<TaskId, Task> task : tasks.entrySet()) {
        for (Map.Entry<TaskAttemptId, TaskAttempt> taskAttempt : task.getValue().getAttempts().entrySet()) {
            if (numTasksToFinish > 0) {
                appEventHandler.handle(new TaskAttemptEvent(taskAttempt.getKey(), TaskAttemptEventType.TA_DONE));
                appEventHandler.handle(new TaskAttemptEvent(taskAttempt.getKey(), TaskAttemptEventType.TA_CONTAINER_COMPLETED));
                numTasksToFinish--;
                app.waitForState(taskAttempt.getValue(), TaskAttemptState.SUCCEEDED);
            } else {
                // The last task is chosen for speculation
                TaskAttemptStatus status = createTaskAttemptStatus(taskAttempt.getKey(), (float) 0.75, TaskAttemptState.RUNNING);
                speculatedTask = task.getValue();
                TaskAttemptStatusUpdateEvent event = new TaskAttemptStatusUpdateEvent(taskAttempt.getKey(), status);
                appEventHandler.handle(event);
            }
        }
    }
    clock.setTime(System.currentTimeMillis() + 15000);
    for (Map.Entry<TaskId, Task> task : tasks.entrySet()) {
        for (Map.Entry<TaskAttemptId, TaskAttempt> taskAttempt : task.getValue().getAttempts().entrySet()) {
            if (taskAttempt.getValue().getState() != TaskAttemptState.SUCCEEDED) {
                TaskAttemptStatus status = createTaskAttemptStatus(taskAttempt.getKey(), (float) 0.75, TaskAttemptState.RUNNING);
                TaskAttemptStatusUpdateEvent event = new TaskAttemptStatusUpdateEvent(taskAttempt.getKey(), status);
                appEventHandler.handle(event);
            }
        }
    }
    final Task speculatedTaskConst = speculatedTask;
    GenericTestUtils.waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            if (speculatedTaskConst.getAttempts().size() != 2) {
                clock.setTime(System.currentTimeMillis() + 1000);
                return false;
            } else {
                return true;
            }
        }
    }, 1000, 60000);
    TaskAttempt[] ta = makeFirstAttemptWin(appEventHandler, speculatedTask);
    verifySpeculationMessage(app, ta);
    app.waitForState(Service.STATE.STOPPED);
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) EventHandler(org.apache.hadoop.yarn.event.EventHandler) TaskAttemptStatus(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus) TaskAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent) Clock(org.apache.hadoop.yarn.util.Clock) ControlledClock(org.apache.hadoop.yarn.util.ControlledClock) SystemClock(org.apache.hadoop.yarn.util.SystemClock) ControlledClock(org.apache.hadoop.yarn.util.ControlledClock) TaskAttemptStatusUpdateEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) Map(java.util.Map) MRApp(org.apache.hadoop.mapreduce.v2.app.MRApp) Test(org.junit.Test)

Example 37 with Job

use of org.apache.hadoop.mapreduce.v2.app.job.Job in project hadoop by apache.

the class TestMRJobs method testThreadDumpOnTaskTimeout.

@Test(timeout = 120000)
public void testThreadDumpOnTaskTimeout() throws IOException, InterruptedException, ClassNotFoundException {
    if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test.");
        return;
    }
    final SleepJob sleepJob = new SleepJob();
    final JobConf sleepConf = new JobConf(mrCluster.getConfig());
    sleepConf.setLong(MRJobConfig.TASK_TIMEOUT, 3 * 1000L);
    sleepConf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 1);
    sleepJob.setConf(sleepConf);
    if (this instanceof TestUberAM) {
        sleepConf.setInt(MRJobConfig.MR_AM_TO_RM_HEARTBEAT_INTERVAL_MS, 30 * 1000);
    }
    // sleep for 10 seconds to trigger a kill with thread dump
    final Job job = sleepJob.createJob(1, 0, 10 * 60 * 1000L, 1, 0L, 0);
    job.setJarByClass(SleepJob.class);
    // The AppMaster jar itself.
    job.addFileToClassPath(APP_JAR);
    job.waitForCompletion(true);
    final JobId jobId = TypeConverter.toYarn(job.getJobID());
    final ApplicationId appID = jobId.getAppId();
    int pollElapsed = 0;
    while (true) {
        Thread.sleep(1000);
        pollElapsed += 1000;
        if (TERMINAL_RM_APP_STATES.contains(mrCluster.getResourceManager().getRMContext().getRMApps().get(appID).getState())) {
            break;
        }
        if (pollElapsed >= 60000) {
            LOG.warn("application did not reach terminal state within 60 seconds");
            break;
        }
    }
    // Job finished, verify logs
    //
    final String appIdStr = appID.toString();
    final String appIdSuffix = appIdStr.substring("application_".length(), appIdStr.length());
    final String containerGlob = "container_" + appIdSuffix + "_*_*";
    final String syslogGlob = appIdStr + Path.SEPARATOR + containerGlob + Path.SEPARATOR + TaskLog.LogName.SYSLOG;
    int numAppMasters = 0;
    int numMapTasks = 0;
    for (int i = 0; i < NUM_NODE_MGRS; i++) {
        final Configuration nmConf = mrCluster.getNodeManager(i).getConfig();
        for (String logDir : nmConf.getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS)) {
            final Path absSyslogGlob = new Path(logDir + Path.SEPARATOR + syslogGlob);
            LOG.info("Checking for glob: " + absSyslogGlob);
            for (FileStatus syslog : localFs.globStatus(absSyslogGlob)) {
                boolean foundAppMaster = false;
                boolean foundThreadDump = false;
                // Determine the container type
                final BufferedReader syslogReader = new BufferedReader(new InputStreamReader(localFs.open(syslog.getPath())));
                try {
                    for (String line; (line = syslogReader.readLine()) != null; ) {
                        if (line.contains(MRAppMaster.class.getName())) {
                            foundAppMaster = true;
                            break;
                        }
                    }
                } finally {
                    syslogReader.close();
                }
                // Check for thread dump in stdout
                final Path stdoutPath = new Path(syslog.getPath().getParent(), TaskLog.LogName.STDOUT.toString());
                final BufferedReader stdoutReader = new BufferedReader(new InputStreamReader(localFs.open(stdoutPath)));
                try {
                    for (String line; (line = stdoutReader.readLine()) != null; ) {
                        if (line.contains("Full thread dump")) {
                            foundThreadDump = true;
                            break;
                        }
                    }
                } finally {
                    stdoutReader.close();
                }
                if (foundAppMaster) {
                    numAppMasters++;
                    if (this instanceof TestUberAM) {
                        Assert.assertTrue("No thread dump", foundThreadDump);
                    } else {
                        Assert.assertFalse("Unexpected thread dump", foundThreadDump);
                    }
                } else {
                    numMapTasks++;
                    Assert.assertTrue("No thread dump", foundThreadDump);
                }
            }
        }
    }
    // Make sure we checked non-empty set
    //
    Assert.assertEquals("No AppMaster log found!", 1, numAppMasters);
    if (sleepConf.getBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false)) {
        Assert.assertSame("MapTask log with uber found!", 0, numMapTasks);
    } else {
        Assert.assertSame("No MapTask log found!", 1, numMapTasks);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MRAppMaster(org.apache.hadoop.mapreduce.v2.app.MRAppMaster) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) SleepJob(org.apache.hadoop.mapreduce.SleepJob) RunningJob(org.apache.hadoop.mapred.RunningJob) Job(org.apache.hadoop.mapreduce.Job) RandomTextWriterJob(org.apache.hadoop.RandomTextWriterJob) SleepJob(org.apache.hadoop.mapreduce.SleepJob) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) File(java.io.File) JobConf(org.apache.hadoop.mapred.JobConf) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 38 with Job

use of org.apache.hadoop.mapreduce.v2.app.job.Job in project hadoop by apache.

the class TestMRJobsWithHistoryService method testJobHistoryData.

@Test(timeout = 90000)
public void testJobHistoryData() throws IOException, InterruptedException, AvroRemoteException, ClassNotFoundException {
    if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test.");
        return;
    }
    SleepJob sleepJob = new SleepJob();
    sleepJob.setConf(mrCluster.getConfig());
    // Job with 3 maps and 2 reduces
    Job job = sleepJob.createJob(3, 2, 1000, 1, 500, 1);
    job.setJarByClass(SleepJob.class);
    // The AppMaster jar itself.
    job.addFileToClassPath(APP_JAR);
    job.waitForCompletion(true);
    Counters counterMR = job.getCounters();
    JobId jobId = TypeConverter.toYarn(job.getJobID());
    ApplicationId appID = jobId.getAppId();
    int pollElapsed = 0;
    while (true) {
        Thread.sleep(1000);
        pollElapsed += 1000;
        if (TERMINAL_RM_APP_STATES.contains(mrCluster.getResourceManager().getRMContext().getRMApps().get(appID).getState())) {
            break;
        }
        if (pollElapsed >= 60000) {
            LOG.warn("application did not reach terminal state within 60 seconds");
            break;
        }
    }
    Assert.assertEquals(RMAppState.FINISHED, mrCluster.getResourceManager().getRMContext().getRMApps().get(appID).getState());
    Counters counterHS = job.getCounters();
    //TODO the Assert below worked. need to check
    //Should we compare each field or convert to V2 counter and compare
    LOG.info("CounterHS " + counterHS);
    LOG.info("CounterMR " + counterMR);
    Assert.assertEquals(counterHS, counterMR);
    HSClientProtocol historyClient = instantiateHistoryProxy();
    GetJobReportRequest gjReq = Records.newRecord(GetJobReportRequest.class);
    gjReq.setJobId(jobId);
    JobReport jobReport = historyClient.getJobReport(gjReq).getJobReport();
    verifyJobReport(jobReport, jobId);
}
Also used : HSClientProtocol(org.apache.hadoop.mapreduce.v2.api.HSClientProtocol) SleepJob(org.apache.hadoop.mapreduce.SleepJob) Counters(org.apache.hadoop.mapreduce.Counters) SleepJob(org.apache.hadoop.mapreduce.SleepJob) Job(org.apache.hadoop.mapreduce.Job) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) File(java.io.File) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) GetJobReportRequest(org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportRequest) JobReport(org.apache.hadoop.mapreduce.v2.api.records.JobReport) Test(org.junit.Test)

Example 39 with Job

use of org.apache.hadoop.mapreduce.v2.app.job.Job in project hadoop by apache.

the class HSAdmin method refreshJobRetentionSettings.

private int refreshJobRetentionSettings() throws IOException {
    // Refresh job retention settings
    Configuration conf = getConf();
    InetSocketAddress address = conf.getSocketAddr(JHAdminConfig.JHS_ADMIN_ADDRESS, JHAdminConfig.DEFAULT_JHS_ADMIN_ADDRESS, JHAdminConfig.DEFAULT_JHS_ADMIN_PORT);
    HSAdminRefreshProtocol refreshProtocol = HSProxies.createProxy(conf, address, HSAdminRefreshProtocol.class, UserGroupInformation.getCurrentUser());
    refreshProtocol.refreshJobRetentionSettings();
    return 0;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) InetSocketAddress(java.net.InetSocketAddress) HSAdminRefreshProtocol(org.apache.hadoop.mapreduce.v2.api.HSAdminRefreshProtocol)

Example 40 with Job

use of org.apache.hadoop.mapreduce.v2.app.job.Job in project hadoop by apache.

the class HSAdmin method refreshLoadedJobCache.

private int refreshLoadedJobCache() throws IOException {
    // Refresh the loaded job cache
    Configuration conf = getConf();
    InetSocketAddress address = conf.getSocketAddr(JHAdminConfig.JHS_ADMIN_ADDRESS, JHAdminConfig.DEFAULT_JHS_ADMIN_ADDRESS, JHAdminConfig.DEFAULT_JHS_ADMIN_PORT);
    HSAdminRefreshProtocol refreshProtocol = HSProxies.createProxy(conf, address, HSAdminRefreshProtocol.class, UserGroupInformation.getCurrentUser());
    refreshProtocol.refreshLoadedJobCache();
    return 0;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) InetSocketAddress(java.net.InetSocketAddress) HSAdminRefreshProtocol(org.apache.hadoop.mapreduce.v2.api.HSAdminRefreshProtocol)

Aggregations

Job (org.apache.hadoop.mapreduce.v2.app.job.Job)291 Test (org.junit.Test)266 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)221 Configuration (org.apache.hadoop.conf.Configuration)145 Task (org.apache.hadoop.mapreduce.v2.app.job.Task)141 ClientResponse (com.sun.jersey.api.client.ClientResponse)110 WebResource (com.sun.jersey.api.client.WebResource)110 JSONObject (org.codehaus.jettison.json.JSONObject)90 TaskAttempt (org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt)80 TaskAttemptId (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId)49 TaskId (org.apache.hadoop.mapreduce.v2.api.records.TaskId)49 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)44 IOException (java.io.IOException)34 Path (org.apache.hadoop.fs.Path)31 JobEvent (org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent)30 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)30 AppContext (org.apache.hadoop.mapreduce.v2.app.AppContext)28 TaskAttemptEvent (org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent)28 DrainDispatcher (org.apache.hadoop.yarn.event.DrainDispatcher)25 Path (javax.ws.rs.Path)23