Search in sources :

Example 36 with YarnRuntimeException

use of org.apache.hadoop.yarn.exceptions.YarnRuntimeException in project hadoop by apache.

the class ClientServiceDelegate method invoke.

private synchronized Object invoke(String method, Class argClass, Object args) throws IOException {
    Method methodOb = null;
    try {
        methodOb = MRClientProtocol.class.getMethod(method, argClass);
    } catch (SecurityException e) {
        throw new YarnRuntimeException(e);
    } catch (NoSuchMethodException e) {
        throw new YarnRuntimeException("Method name mismatch", e);
    }
    maxClientRetry = this.conf.getInt(MRJobConfig.MR_CLIENT_MAX_RETRIES, MRJobConfig.DEFAULT_MR_CLIENT_MAX_RETRIES);
    IOException lastException = null;
    while (maxClientRetry > 0) {
        MRClientProtocol MRClientProxy = null;
        try {
            MRClientProxy = getProxy();
            return methodOb.invoke(MRClientProxy, args);
        } catch (InvocationTargetException e) {
            // Will not throw out YarnException anymore
            LOG.debug("Failed to contact AM/History for job " + jobId + " retrying..", e.getTargetException());
            // Force reconnection by setting the proxy to null.
            realProxy = null;
            if (e.getCause() instanceof AuthorizationException) {
                throw new IOException(e.getTargetException());
            }
            // for its AM to be restarted.
            if (!usingAMProxy.get()) {
                maxClientRetry--;
            }
            usingAMProxy.set(false);
            lastException = new IOException(e.getTargetException());
            try {
                Thread.sleep(100);
            } catch (InterruptedException ie) {
                LOG.warn("ClientServiceDelegate invoke call interrupted", ie);
                throw new YarnRuntimeException(ie);
            }
        } catch (Exception e) {
            LOG.debug("Failed to contact AM/History for job " + jobId + "  Will retry..", e);
            // Force reconnection by setting the proxy to null.
            realProxy = null;
            // RM shutdown
            maxClientRetry--;
            lastException = new IOException(e.getMessage());
            try {
                Thread.sleep(100);
            } catch (InterruptedException ie) {
                LOG.warn("ClientServiceDelegate invoke call interrupted", ie);
                throw new YarnRuntimeException(ie);
            }
        }
    }
    throw lastException;
}
Also used : YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) AuthorizationException(org.apache.hadoop.security.authorize.AuthorizationException) Method(java.lang.reflect.Method) IOException(java.io.IOException) InvocationTargetException(java.lang.reflect.InvocationTargetException) AuthorizationException(org.apache.hadoop.security.authorize.AuthorizationException) InvocationTargetException(java.lang.reflect.InvocationTargetException) ApplicationNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) MRClientProtocol(org.apache.hadoop.mapreduce.v2.api.MRClientProtocol)

Example 37 with YarnRuntimeException

use of org.apache.hadoop.yarn.exceptions.YarnRuntimeException in project hadoop by apache.

the class ClientServiceDelegate method getProxy.

private MRClientProtocol getProxy() throws IOException {
    if (realProxy != null) {
        return realProxy;
    }
    // Possibly allow nulls through the PB tunnel, otherwise deal with an exception
    // and redirect to the history server.
    ApplicationReport application = null;
    try {
        application = rm.getApplicationReport(appId);
    } catch (ApplicationNotFoundException e) {
        application = null;
    } catch (YarnException e2) {
        throw new IOException(e2);
    }
    if (application != null) {
        trackingUrl = application.getTrackingUrl();
    }
    InetSocketAddress serviceAddr = null;
    while (application == null || YarnApplicationState.RUNNING == application.getYarnApplicationState()) {
        if (application == null) {
            LOG.info("Could not get Job info from RM for job " + jobId + ". Redirecting to job history server.");
            return checkAndGetHSProxy(null, JobState.NEW);
        }
        try {
            if (application.getHost() == null || "".equals(application.getHost())) {
                LOG.debug("AM not assigned to Job. Waiting to get the AM ...");
                Thread.sleep(2000);
                LOG.debug("Application state is " + application.getYarnApplicationState());
                application = rm.getApplicationReport(appId);
                continue;
            } else if (UNAVAILABLE.equals(application.getHost())) {
                if (!amAclDisabledStatusLogged) {
                    LOG.info("Job " + jobId + " is running, but the host is unknown." + " Verify user has VIEW_JOB access.");
                    amAclDisabledStatusLogged = true;
                }
                return getNotRunningJob(application, JobState.RUNNING);
            }
            if (!conf.getBoolean(MRJobConfig.JOB_AM_ACCESS_DISABLED, false)) {
                UserGroupInformation newUgi = UserGroupInformation.createRemoteUser(UserGroupInformation.getCurrentUser().getUserName());
                serviceAddr = NetUtils.createSocketAddrForHost(application.getHost(), application.getRpcPort());
                if (UserGroupInformation.isSecurityEnabled()) {
                    org.apache.hadoop.yarn.api.records.Token clientToAMToken = application.getClientToAMToken();
                    Token<ClientToAMTokenIdentifier> token = ConverterUtils.convertFromYarn(clientToAMToken, serviceAddr);
                    newUgi.addToken(token);
                }
                LOG.debug("Connecting to " + serviceAddr);
                final InetSocketAddress finalServiceAddr = serviceAddr;
                realProxy = newUgi.doAs(new PrivilegedExceptionAction<MRClientProtocol>() {

                    @Override
                    public MRClientProtocol run() throws IOException {
                        return instantiateAMProxy(finalServiceAddr);
                    }
                });
            } else {
                if (!amAclDisabledStatusLogged) {
                    LOG.info("Network ACL closed to AM for job " + jobId + ". Not going to try to reach the AM.");
                    amAclDisabledStatusLogged = true;
                }
                return getNotRunningJob(null, JobState.RUNNING);
            }
            return realProxy;
        } catch (IOException e) {
            //possibly the AM has crashed
            //there may be some time before AM is restarted
            //keep retrying by getting the address from RM
            LOG.info("Could not connect to " + serviceAddr + ". Waiting for getting the latest AM address...");
            try {
                Thread.sleep(2000);
            } catch (InterruptedException e1) {
                LOG.warn("getProxy() call interruped", e1);
                throw new YarnRuntimeException(e1);
            }
            try {
                application = rm.getApplicationReport(appId);
            } catch (YarnException e1) {
                throw new IOException(e1);
            }
            if (application == null) {
                LOG.info("Could not get Job info from RM for job " + jobId + ". Redirecting to job history server.");
                return checkAndGetHSProxy(null, JobState.RUNNING);
            }
        } catch (InterruptedException e) {
            LOG.warn("getProxy() call interruped", e);
            throw new YarnRuntimeException(e);
        } catch (YarnException e) {
            throw new IOException(e);
        }
    }
    /** we just want to return if its allocating, so that we don't
     * block on it. This is to be able to return job status
     * on an allocating Application.
     */
    String user = application.getUser();
    if (user == null) {
        throw new IOException("User is not set in the application report");
    }
    if (application.getYarnApplicationState() == YarnApplicationState.NEW || application.getYarnApplicationState() == YarnApplicationState.NEW_SAVING || application.getYarnApplicationState() == YarnApplicationState.SUBMITTED || application.getYarnApplicationState() == YarnApplicationState.ACCEPTED) {
        realProxy = null;
        return getNotRunningJob(application, JobState.NEW);
    }
    if (application.getYarnApplicationState() == YarnApplicationState.FAILED) {
        realProxy = null;
        return getNotRunningJob(application, JobState.FAILED);
    }
    if (application.getYarnApplicationState() == YarnApplicationState.KILLED) {
        realProxy = null;
        return getNotRunningJob(application, JobState.KILLED);
    }
    //succeeded.
    if (application.getYarnApplicationState() == YarnApplicationState.FINISHED) {
        LOG.info("Application state is completed. FinalApplicationStatus=" + application.getFinalApplicationStatus().toString() + ". Redirecting to job history server");
        realProxy = checkAndGetHSProxy(application, JobState.SUCCEEDED);
    }
    return realProxy;
}
Also used : InetSocketAddress(java.net.InetSocketAddress) IOException(java.io.IOException) PrivilegedExceptionAction(java.security.PrivilegedExceptionAction) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) ClientToAMTokenIdentifier(org.apache.hadoop.yarn.security.client.ClientToAMTokenIdentifier) ApplicationNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 38 with YarnRuntimeException

use of org.apache.hadoop.yarn.exceptions.YarnRuntimeException in project hadoop by apache.

the class TestNetworkedJob method testNetworkedJob.

/**
 * test JobConf 
 * @throws Exception
 */
@SuppressWarnings("deprecation")
@Test(timeout = 500000)
public void testNetworkedJob() throws Exception {
    // mock creation
    MiniMRClientCluster mr = null;
    FileSystem fileSys = null;
    try {
        mr = createMiniClusterWithCapacityScheduler();
        JobConf job = new JobConf(mr.getConfig());
        fileSys = FileSystem.get(job);
        fileSys.delete(testDir, true);
        FSDataOutputStream out = fileSys.create(inFile, true);
        out.writeBytes("This is a test file");
        out.close();
        FileInputFormat.setInputPaths(job, inFile);
        FileOutputFormat.setOutputPath(job, outDir);
        job.setInputFormat(TextInputFormat.class);
        job.setOutputFormat(TextOutputFormat.class);
        job.setMapperClass(IdentityMapper.class);
        job.setReducerClass(IdentityReducer.class);
        job.setNumReduceTasks(0);
        JobClient client = new JobClient(mr.getConfig());
        RunningJob rj = client.submitJob(job);
        JobID jobId = rj.getID();
        NetworkedJob runningJob = (NetworkedJob) client.getJob(jobId);
        runningJob.setJobPriority(JobPriority.HIGH.name());
        // test getters
        assertTrue(runningJob.getConfiguration().toString().endsWith("0001/job.xml"));
        assertEquals(jobId, runningJob.getID());
        assertEquals(jobId.toString(), runningJob.getJobID());
        assertEquals("N/A", runningJob.getJobName());
        assertTrue(runningJob.getJobFile().endsWith(".staging/" + runningJob.getJobID() + "/job.xml"));
        assertTrue(runningJob.getTrackingURL().length() > 0);
        assertTrue(runningJob.mapProgress() == 0.0f);
        assertTrue(runningJob.reduceProgress() == 0.0f);
        assertTrue(runningJob.cleanupProgress() == 0.0f);
        assertTrue(runningJob.setupProgress() == 0.0f);
        TaskCompletionEvent[] tce = runningJob.getTaskCompletionEvents(0);
        assertEquals(tce.length, 0);
        assertEquals("", runningJob.getHistoryUrl());
        assertFalse(runningJob.isRetired());
        assertEquals("", runningJob.getFailureInfo());
        assertEquals("N/A", runningJob.getJobStatus().getJobName());
        assertEquals(0, client.getMapTaskReports(jobId).length);
        try {
            client.getSetupTaskReports(jobId);
        } catch (YarnRuntimeException e) {
            assertEquals("Unrecognized task type: JOB_SETUP", e.getMessage());
        }
        try {
            client.getCleanupTaskReports(jobId);
        } catch (YarnRuntimeException e) {
            assertEquals("Unrecognized task type: JOB_CLEANUP", e.getMessage());
        }
        assertEquals(0, client.getReduceTaskReports(jobId).length);
        // test ClusterStatus
        ClusterStatus status = client.getClusterStatus(true);
        assertEquals(2, status.getActiveTrackerNames().size());
        // it method does not implemented and always return empty array or null;
        assertEquals(0, status.getBlacklistedTrackers());
        assertEquals(0, status.getBlacklistedTrackerNames().size());
        assertEquals(0, status.getBlackListedTrackersInfo().size());
        assertEquals(JobTrackerStatus.RUNNING, status.getJobTrackerStatus());
        assertEquals(1, status.getMapTasks());
        assertEquals(20, status.getMaxMapTasks());
        assertEquals(4, status.getMaxReduceTasks());
        assertEquals(0, status.getNumExcludedNodes());
        assertEquals(1, status.getReduceTasks());
        assertEquals(2, status.getTaskTrackers());
        assertEquals(0, status.getTTExpiryInterval());
        assertEquals(JobTrackerStatus.RUNNING, status.getJobTrackerStatus());
        assertEquals(0, status.getGraylistedTrackers());
        // test read and write
        ByteArrayOutputStream dataOut = new ByteArrayOutputStream();
        status.write(new DataOutputStream(dataOut));
        ClusterStatus status2 = new ClusterStatus();
        status2.readFields(new DataInputStream(new ByteArrayInputStream(dataOut.toByteArray())));
        assertEquals(status.getActiveTrackerNames(), status2.getActiveTrackerNames());
        assertEquals(status.getBlackListedTrackersInfo(), status2.getBlackListedTrackersInfo());
        assertEquals(status.getMapTasks(), status2.getMapTasks());
        // test taskStatusfilter
        JobClient.setTaskOutputFilter(job, TaskStatusFilter.ALL);
        assertEquals(TaskStatusFilter.ALL, JobClient.getTaskOutputFilter(job));
        // runningJob.setJobPriority(JobPriority.HIGH.name());
        // test default map
        assertEquals(20, client.getDefaultMaps());
        assertEquals(4, client.getDefaultReduces());
        assertEquals("jobSubmitDir", client.getSystemDir().getName());
        // test queue information
        JobQueueInfo[] rootQueueInfo = client.getRootQueues();
        assertEquals(1, rootQueueInfo.length);
        assertEquals("default", rootQueueInfo[0].getQueueName());
        JobQueueInfo[] qinfo = client.getQueues();
        assertEquals(1, qinfo.length);
        assertEquals("default", qinfo[0].getQueueName());
        assertEquals(0, client.getChildQueues("default").length);
        assertEquals(1, client.getJobsFromQueue("default").length);
        assertTrue(client.getJobsFromQueue("default")[0].getJobFile().endsWith("/job.xml"));
        JobQueueInfo qi = client.getQueueInfo("default");
        assertEquals("default", qi.getQueueName());
        assertEquals("running", qi.getQueueState());
        QueueAclsInfo[] aai = client.getQueueAclsForCurrentUser();
        assertEquals(2, aai.length);
        assertEquals("root", aai[0].getQueueName());
        assertEquals("default", aai[1].getQueueName());
        // test JobClient
        // The following asserts read JobStatus twice and ensure the returned
        // JobStatus objects correspond to the same Job.
        assertEquals("Expected matching JobIDs", jobId, client.getJob(jobId).getJobStatus().getJobID());
        assertEquals("Expected matching startTimes", rj.getJobStatus().getStartTime(), client.getJob(jobId).getJobStatus().getStartTime());
    } finally {
        if (fileSys != null) {
            fileSys.delete(testDir, true);
        }
        if (mr != null) {
            mr.stop();
        }
    }
}
Also used : FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) DataOutputStream(java.io.DataOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataInputStream(java.io.DataInputStream) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) ByteArrayInputStream(java.io.ByteArrayInputStream) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) NetworkedJob(org.apache.hadoop.mapred.JobClient.NetworkedJob) Test(org.junit.Test)

Example 39 with YarnRuntimeException

use of org.apache.hadoop.yarn.exceptions.YarnRuntimeException in project hadoop by apache.

the class WebServer method serviceStart.

@Override
protected void serviceStart() throws Exception {
    Configuration conf = getConfig();
    String bindAddress = WebAppUtils.getWebAppBindURL(conf, YarnConfiguration.NM_BIND_HOST, WebAppUtils.getNMWebAppURLWithoutScheme(conf));
    boolean enableCors = conf.getBoolean(YarnConfiguration.NM_WEBAPP_ENABLE_CORS_FILTER, YarnConfiguration.DEFAULT_NM_WEBAPP_ENABLE_CORS_FILTER);
    if (enableCors) {
        getConfig().setBoolean(HttpCrossOriginFilterInitializer.PREFIX + HttpCrossOriginFilterInitializer.ENABLED_SUFFIX, true);
    }
    // Always load pseudo authentication filter to parse "user.name" in an URL
    // to identify a HTTP request's user.
    boolean hasHadoopAuthFilterInitializer = false;
    String filterInitializerConfKey = "hadoop.http.filter.initializers";
    Class<?>[] initializersClasses = conf.getClasses(filterInitializerConfKey);
    List<String> targets = new ArrayList<String>();
    if (initializersClasses != null) {
        for (Class<?> initializer : initializersClasses) {
            if (initializer.getName().equals(AuthenticationFilterInitializer.class.getName())) {
                hasHadoopAuthFilterInitializer = true;
                break;
            }
            targets.add(initializer.getName());
        }
    }
    if (!hasHadoopAuthFilterInitializer) {
        targets.add(AuthenticationFilterInitializer.class.getName());
        conf.set(filterInitializerConfKey, StringUtils.join(",", targets));
    }
    LOG.info("Instantiating NMWebApp at " + bindAddress);
    try {
        this.webApp = WebApps.$for("node", Context.class, this.nmContext, "ws").at(bindAddress).with(conf).withHttpSpnegoPrincipalKey(YarnConfiguration.NM_WEBAPP_SPNEGO_USER_NAME_KEY).withHttpSpnegoKeytabKey(YarnConfiguration.NM_WEBAPP_SPNEGO_KEYTAB_FILE_KEY).withCSRFProtection(YarnConfiguration.NM_CSRF_PREFIX).withXFSProtection(YarnConfiguration.NM_XFS_PREFIX).start(this.nmWebApp);
        this.port = this.webApp.httpServer().getConnectorAddress(0).getPort();
    } catch (Exception e) {
        String msg = "NMWebapps failed to start.";
        LOG.error(msg, e);
        throw new YarnRuntimeException(msg, e);
    }
    super.serviceStart();
}
Also used : YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) AuthenticationFilterInitializer(org.apache.hadoop.security.AuthenticationFilterInitializer) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException)

Example 40 with YarnRuntimeException

use of org.apache.hadoop.yarn.exceptions.YarnRuntimeException in project hadoop by apache.

the class TestLogAggregationService method testLogAggregationInitAppFailsWithoutKillingNM.

@Test
@SuppressWarnings("unchecked")
public void testLogAggregationInitAppFailsWithoutKillingNM() throws Exception {
    this.conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath());
    this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, this.remoteRootLogDir.getAbsolutePath());
    LogAggregationService logAggregationService = spy(new LogAggregationService(dispatcher, this.context, this.delSrvc, super.dirsHandler));
    logAggregationService.init(this.conf);
    logAggregationService.start();
    ApplicationId appId = BuilderUtils.newApplicationId(System.currentTimeMillis(), (int) (Math.random() * 1000));
    doThrow(new YarnRuntimeException("KABOOM!")).when(logAggregationService).initAppAggregator(eq(appId), eq(user), any(Credentials.class), anyMap(), any(LogAggregationContext.class), anyLong());
    LogAggregationContext contextWithAMAndFailed = Records.newRecord(LogAggregationContext.class);
    contextWithAMAndFailed.setLogAggregationPolicyClassName(AMOrFailedContainerLogAggregationPolicy.class.getName());
    logAggregationService.handle(new LogHandlerAppStartedEvent(appId, this.user, null, this.acls, contextWithAMAndFailed));
    dispatcher.await();
    ApplicationEvent[] expectedEvents = new ApplicationEvent[] { new ApplicationEvent(appId, ApplicationEventType.APPLICATION_LOG_HANDLING_FAILED) };
    checkEvents(appEventHandler, expectedEvents, false, "getType", "getApplicationID", "getDiagnostic");
    // no filesystems instantiated yet
    verify(logAggregationService, never()).closeFileSystems(any(UserGroupInformation.class));
    // verify trying to collect logs for containers/apps we don't know about
    // doesn't blow up and tear down the NM
    logAggregationService.handle(new LogHandlerContainerFinishedEvent(BuilderUtils.newContainerId(4, 1, 1, 1), 0));
    dispatcher.await();
    logAggregationService.handle(new LogHandlerAppFinishedEvent(BuilderUtils.newApplicationId(1, 5)));
    dispatcher.await();
}
Also used : YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) LogHandlerAppStartedEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppStartedEvent) ApplicationEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent) LogHandlerAppFinishedEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppFinishedEvent) LogHandlerContainerFinishedEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerContainerFinishedEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Credentials(org.apache.hadoop.security.Credentials) LogAggregationContext(org.apache.hadoop.yarn.api.records.LogAggregationContext) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) BaseContainerManagerTest(org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest) Test(org.junit.Test)

Aggregations

YarnRuntimeException (org.apache.hadoop.yarn.exceptions.YarnRuntimeException)147 IOException (java.io.IOException)56 Configuration (org.apache.hadoop.conf.Configuration)38 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)28 Test (org.junit.Test)28 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)17 InetSocketAddress (java.net.InetSocketAddress)12 Path (org.apache.hadoop.fs.Path)12 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)9 InvocationTargetException (java.lang.reflect.InvocationTargetException)8 Server (org.apache.hadoop.ipc.Server)8 FileSystem (org.apache.hadoop.fs.FileSystem)7 FsPermission (org.apache.hadoop.fs.permission.FsPermission)7 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)7 FileNotFoundException (java.io.FileNotFoundException)6 ArrayList (java.util.ArrayList)6 HashMap (java.util.HashMap)6 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)6 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)6 ConnectException (java.net.ConnectException)5