Search in sources :

Example 1 with FSError

use of org.apache.hadoop.fs.FSError in project hadoop by apache.

the class YarnChild method main.

public static void main(String[] args) throws Throwable {
    Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
    LOG.debug("Child starting");
    final JobConf job = new JobConf(MRJobConfig.JOB_CONF_FILE);
    // Initing with our JobConf allows us to avoid loading confs twice
    Limits.init(job);
    UserGroupInformation.setConfiguration(job);
    // MAPREDUCE-6565: need to set configuration for SecurityUtil.
    SecurityUtil.setConfiguration(job);
    String host = args[0];
    int port = Integer.parseInt(args[1]);
    final InetSocketAddress address = NetUtils.createSocketAddrForHost(host, port);
    final TaskAttemptID firstTaskid = TaskAttemptID.forName(args[2]);
    long jvmIdLong = Long.parseLong(args[3]);
    JVMId jvmId = new JVMId(firstTaskid.getJobID(), firstTaskid.getTaskType() == TaskType.MAP, jvmIdLong);
    CallerContext.setCurrent(new CallerContext.Builder("mr_" + firstTaskid.toString()).build());
    // initialize metrics
    DefaultMetricsSystem.initialize(StringUtils.camelize(firstTaskid.getTaskType().name()) + "Task");
    // Security framework already loaded the tokens into current ugi
    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    LOG.info("Executing with tokens:");
    for (Token<?> token : credentials.getAllTokens()) {
        LOG.info(token);
    }
    // Create TaskUmbilicalProtocol as actual task owner.
    UserGroupInformation taskOwner = UserGroupInformation.createRemoteUser(firstTaskid.getJobID().toString());
    Token<JobTokenIdentifier> jt = TokenCache.getJobToken(credentials);
    SecurityUtil.setTokenService(jt, address);
    taskOwner.addToken(jt);
    final TaskUmbilicalProtocol umbilical = taskOwner.doAs(new PrivilegedExceptionAction<TaskUmbilicalProtocol>() {

        @Override
        public TaskUmbilicalProtocol run() throws Exception {
            return (TaskUmbilicalProtocol) RPC.getProxy(TaskUmbilicalProtocol.class, TaskUmbilicalProtocol.versionID, address, job);
        }
    });
    // report non-pid to application master
    JvmContext context = new JvmContext(jvmId, "-1000");
    LOG.debug("PID: " + System.getenv().get("JVM_PID"));
    Task task = null;
    UserGroupInformation childUGI = null;
    ScheduledExecutorService logSyncer = null;
    try {
        int idleLoopCount = 0;
        JvmTask myTask = null;
        ;
        // poll for new task
        for (int idle = 0; null == myTask; ++idle) {
            long sleepTimeMilliSecs = Math.min(idle * 500, 1500);
            LOG.info("Sleeping for " + sleepTimeMilliSecs + "ms before retrying again. Got null now.");
            MILLISECONDS.sleep(sleepTimeMilliSecs);
            myTask = umbilical.getTask(context);
        }
        if (myTask.shouldDie()) {
            return;
        }
        task = myTask.getTask();
        YarnChild.taskid = task.getTaskID();
        // Create the job-conf and set credentials
        configureTask(job, task, credentials, jt);
        // log the system properties
        String systemPropsToLog = MRApps.getSystemPropertiesToLog(job);
        if (systemPropsToLog != null) {
            LOG.info(systemPropsToLog);
        }
        // Initiate Java VM metrics
        JvmMetrics.initSingleton(jvmId.toString(), job.getSessionId());
        childUGI = UserGroupInformation.createRemoteUser(System.getenv(ApplicationConstants.Environment.USER.toString()));
        // Add tokens to new user so that it may execute its task correctly.
        childUGI.addCredentials(credentials);
        // set job classloader if configured before invoking the task
        MRApps.setJobClassLoader(job);
        logSyncer = TaskLog.createLogSyncer();
        // Create a final reference to the task for the doAs block
        final Task taskFinal = task;
        childUGI.doAs(new PrivilegedExceptionAction<Object>() {

            @Override
            public Object run() throws Exception {
                // use job-specified working directory
                setEncryptedSpillKeyIfRequired(taskFinal);
                FileSystem.get(job).setWorkingDirectory(job.getWorkingDirectory());
                // run the task
                taskFinal.run(job, umbilical);
                return null;
            }
        });
    } catch (FSError e) {
        LOG.fatal("FSError from child", e);
        if (!ShutdownHookManager.get().isShutdownInProgress()) {
            umbilical.fsError(taskid, e.getMessage());
        }
    } catch (Exception exception) {
        LOG.warn("Exception running child : " + StringUtils.stringifyException(exception));
        try {
            if (task != null) {
                // do cleanup for the task
                if (childUGI == null) {
                    // no need to job into doAs block
                    task.taskCleanup(umbilical);
                } else {
                    final Task taskFinal = task;
                    childUGI.doAs(new PrivilegedExceptionAction<Object>() {

                        @Override
                        public Object run() throws Exception {
                            taskFinal.taskCleanup(umbilical);
                            return null;
                        }
                    });
                }
            }
        } catch (Exception e) {
            LOG.info("Exception cleaning up: " + StringUtils.stringifyException(e));
        }
        // Report back any failures, for diagnostic purposes
        if (taskid != null) {
            if (!ShutdownHookManager.get().isShutdownInProgress()) {
                umbilical.fatalError(taskid, StringUtils.stringifyException(exception));
            }
        }
    } catch (Throwable throwable) {
        LOG.fatal("Error running child : " + StringUtils.stringifyException(throwable));
        if (taskid != null) {
            if (!ShutdownHookManager.get().isShutdownInProgress()) {
                Throwable tCause = throwable.getCause();
                String cause = tCause == null ? throwable.getMessage() : StringUtils.stringifyException(tCause);
                umbilical.fatalError(taskid, cause);
            }
        }
    } finally {
        RPC.stopProxy(umbilical);
        DefaultMetricsSystem.shutdown();
        TaskLog.syncLogsShutdown(logSyncer);
    }
}
Also used : YarnUncaughtExceptionHandler(org.apache.hadoop.yarn.YarnUncaughtExceptionHandler) InetSocketAddress(java.net.InetSocketAddress) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) FSError(org.apache.hadoop.fs.FSError) JobTokenIdentifier(org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier) PrivilegedExceptionAction(java.security.PrivilegedExceptionAction) IOException(java.io.IOException) DiskErrorException(org.apache.hadoop.util.DiskChecker.DiskErrorException) Credentials(org.apache.hadoop.security.Credentials)

Example 2 with FSError

use of org.apache.hadoop.fs.FSError in project hadoop by apache.

the class TestResourceLocalizationService method testLocalizerRunnerException.

@Test(timeout = 10000)
// mocked generics
@SuppressWarnings("unchecked")
public void testLocalizerRunnerException() throws Exception {
    DrainDispatcher dispatcher = new DrainDispatcher();
    dispatcher.init(conf);
    dispatcher.start();
    EventHandler<ApplicationEvent> applicationBus = mock(EventHandler.class);
    dispatcher.register(ApplicationEventType.class, applicationBus);
    EventHandler<ContainerEvent> containerBus = mock(EventHandler.class);
    dispatcher.register(ContainerEventType.class, containerBus);
    ContainerExecutor exec = mock(ContainerExecutor.class);
    LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
    LocalDirsHandlerService dirsHandlerSpy = spy(dirsHandler);
    dirsHandlerSpy.init(conf);
    DeletionService delServiceReal = new DeletionService(exec);
    DeletionService delService = spy(delServiceReal);
    delService.init(new Configuration());
    delService.start();
    ResourceLocalizationService rawService = new ResourceLocalizationService(dispatcher, exec, delService, dirsHandlerSpy, nmContext);
    ResourceLocalizationService spyService = spy(rawService);
    doReturn(mockServer).when(spyService).createServer();
    try {
        spyService.init(conf);
        spyService.start();
        // init application
        final Application app = mock(Application.class);
        final ApplicationId appId = BuilderUtils.newApplicationId(314159265358979L, 3);
        when(app.getUser()).thenReturn("user0");
        when(app.getAppId()).thenReturn(appId);
        spyService.handle(new ApplicationLocalizationEvent(LocalizationEventType.INIT_APPLICATION_RESOURCES, app));
        dispatcher.await();
        Random r = new Random();
        long seed = r.nextLong();
        System.out.println("SEED: " + seed);
        r.setSeed(seed);
        final Container c = getMockContainer(appId, 42, "user0");
        final LocalResource resource1 = getPrivateMockedResource(r);
        System.out.println("Here 4");
        final LocalResourceRequest req1 = new LocalResourceRequest(resource1);
        Map<LocalResourceVisibility, Collection<LocalResourceRequest>> rsrcs = new HashMap<LocalResourceVisibility, Collection<LocalResourceRequest>>();
        List<LocalResourceRequest> privateResourceList = new ArrayList<LocalResourceRequest>();
        privateResourceList.add(req1);
        rsrcs.put(LocalResourceVisibility.PRIVATE, privateResourceList);
        final Constructor<?>[] constructors = FSError.class.getDeclaredConstructors();
        constructors[0].setAccessible(true);
        FSError fsError = (FSError) constructors[0].newInstance(new IOException("Disk Error"));
        Mockito.doThrow(fsError).when(dirsHandlerSpy).getLocalPathForWrite(isA(String.class));
        spyService.handle(new ContainerLocalizationRequestEvent(c, rsrcs));
        Thread.sleep(1000);
        dispatcher.await();
        // Verify if ContainerResourceFailedEvent is invoked on FSError
        verify(containerBus).handle(isA(ContainerResourceFailedEvent.class));
    } finally {
        spyService.stop();
        dispatcher.stop();
        delService.stop();
    }
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) ContainerExecutor(org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor) DefaultContainerExecutor(org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor) ContainerLocalizationRequestEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ContainerLocalizationRequestEvent) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LocalResourceVisibility(org.apache.hadoop.yarn.api.records.LocalResourceVisibility) Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container) Random(java.util.Random) ContainerResourceFailedEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerResourceFailedEvent) ApplicationLocalizationEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ApplicationLocalizationEvent) ContainerEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent) FSError(org.apache.hadoop.fs.FSError) Constructor(java.lang.reflect.Constructor) ApplicationEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent) DeletionService(org.apache.hadoop.yarn.server.nodemanager.DeletionService) IOException(java.io.IOException) LocalDirsHandlerService(org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) Collection(java.util.Collection) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Application(org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application) Test(org.junit.Test)

Aggregations

IOException (java.io.IOException)2 FSError (org.apache.hadoop.fs.FSError)2 Constructor (java.lang.reflect.Constructor)1 InetSocketAddress (java.net.InetSocketAddress)1 PrivilegedExceptionAction (java.security.PrivilegedExceptionAction)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 HashMap (java.util.HashMap)1 Random (java.util.Random)1 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)1 Configuration (org.apache.hadoop.conf.Configuration)1 JobTokenIdentifier (org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier)1 Credentials (org.apache.hadoop.security.Credentials)1 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)1 DiskErrorException (org.apache.hadoop.util.DiskChecker.DiskErrorException)1 YarnUncaughtExceptionHandler (org.apache.hadoop.yarn.YarnUncaughtExceptionHandler)1 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)1 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)1 LocalResourceVisibility (org.apache.hadoop.yarn.api.records.LocalResourceVisibility)1 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)1