use of org.apache.hadoop.yarn.exceptions.YarnRuntimeException in project hadoop by apache.
the class ClientServiceDelegate method invoke.
private synchronized Object invoke(String method, Class argClass, Object args) throws IOException {
Method methodOb = null;
try {
methodOb = MRClientProtocol.class.getMethod(method, argClass);
} catch (SecurityException e) {
throw new YarnRuntimeException(e);
} catch (NoSuchMethodException e) {
throw new YarnRuntimeException("Method name mismatch", e);
}
maxClientRetry = this.conf.getInt(MRJobConfig.MR_CLIENT_MAX_RETRIES, MRJobConfig.DEFAULT_MR_CLIENT_MAX_RETRIES);
IOException lastException = null;
while (maxClientRetry > 0) {
MRClientProtocol MRClientProxy = null;
try {
MRClientProxy = getProxy();
return methodOb.invoke(MRClientProxy, args);
} catch (InvocationTargetException e) {
// Will not throw out YarnException anymore
LOG.debug("Failed to contact AM/History for job " + jobId + " retrying..", e.getTargetException());
// Force reconnection by setting the proxy to null.
realProxy = null;
if (e.getCause() instanceof AuthorizationException) {
throw new IOException(e.getTargetException());
}
// for its AM to be restarted.
if (!usingAMProxy.get()) {
maxClientRetry--;
}
usingAMProxy.set(false);
lastException = new IOException(e.getTargetException());
try {
Thread.sleep(100);
} catch (InterruptedException ie) {
LOG.warn("ClientServiceDelegate invoke call interrupted", ie);
throw new YarnRuntimeException(ie);
}
} catch (Exception e) {
LOG.debug("Failed to contact AM/History for job " + jobId + " Will retry..", e);
// Force reconnection by setting the proxy to null.
realProxy = null;
// RM shutdown
maxClientRetry--;
lastException = new IOException(e.getMessage());
try {
Thread.sleep(100);
} catch (InterruptedException ie) {
LOG.warn("ClientServiceDelegate invoke call interrupted", ie);
throw new YarnRuntimeException(ie);
}
}
}
throw lastException;
}
use of org.apache.hadoop.yarn.exceptions.YarnRuntimeException in project hadoop by apache.
the class ClientServiceDelegate method getProxy.
private MRClientProtocol getProxy() throws IOException {
if (realProxy != null) {
return realProxy;
}
// Possibly allow nulls through the PB tunnel, otherwise deal with an exception
// and redirect to the history server.
ApplicationReport application = null;
try {
application = rm.getApplicationReport(appId);
} catch (ApplicationNotFoundException e) {
application = null;
} catch (YarnException e2) {
throw new IOException(e2);
}
if (application != null) {
trackingUrl = application.getTrackingUrl();
}
InetSocketAddress serviceAddr = null;
while (application == null || YarnApplicationState.RUNNING == application.getYarnApplicationState()) {
if (application == null) {
LOG.info("Could not get Job info from RM for job " + jobId + ". Redirecting to job history server.");
return checkAndGetHSProxy(null, JobState.NEW);
}
try {
if (application.getHost() == null || "".equals(application.getHost())) {
LOG.debug("AM not assigned to Job. Waiting to get the AM ...");
Thread.sleep(2000);
LOG.debug("Application state is " + application.getYarnApplicationState());
application = rm.getApplicationReport(appId);
continue;
} else if (UNAVAILABLE.equals(application.getHost())) {
if (!amAclDisabledStatusLogged) {
LOG.info("Job " + jobId + " is running, but the host is unknown." + " Verify user has VIEW_JOB access.");
amAclDisabledStatusLogged = true;
}
return getNotRunningJob(application, JobState.RUNNING);
}
if (!conf.getBoolean(MRJobConfig.JOB_AM_ACCESS_DISABLED, false)) {
UserGroupInformation newUgi = UserGroupInformation.createRemoteUser(UserGroupInformation.getCurrentUser().getUserName());
serviceAddr = NetUtils.createSocketAddrForHost(application.getHost(), application.getRpcPort());
if (UserGroupInformation.isSecurityEnabled()) {
org.apache.hadoop.yarn.api.records.Token clientToAMToken = application.getClientToAMToken();
Token<ClientToAMTokenIdentifier> token = ConverterUtils.convertFromYarn(clientToAMToken, serviceAddr);
newUgi.addToken(token);
}
LOG.debug("Connecting to " + serviceAddr);
final InetSocketAddress finalServiceAddr = serviceAddr;
realProxy = newUgi.doAs(new PrivilegedExceptionAction<MRClientProtocol>() {
@Override
public MRClientProtocol run() throws IOException {
return instantiateAMProxy(finalServiceAddr);
}
});
} else {
if (!amAclDisabledStatusLogged) {
LOG.info("Network ACL closed to AM for job " + jobId + ". Not going to try to reach the AM.");
amAclDisabledStatusLogged = true;
}
return getNotRunningJob(null, JobState.RUNNING);
}
return realProxy;
} catch (IOException e) {
//possibly the AM has crashed
//there may be some time before AM is restarted
//keep retrying by getting the address from RM
LOG.info("Could not connect to " + serviceAddr + ". Waiting for getting the latest AM address...");
try {
Thread.sleep(2000);
} catch (InterruptedException e1) {
LOG.warn("getProxy() call interruped", e1);
throw new YarnRuntimeException(e1);
}
try {
application = rm.getApplicationReport(appId);
} catch (YarnException e1) {
throw new IOException(e1);
}
if (application == null) {
LOG.info("Could not get Job info from RM for job " + jobId + ". Redirecting to job history server.");
return checkAndGetHSProxy(null, JobState.RUNNING);
}
} catch (InterruptedException e) {
LOG.warn("getProxy() call interruped", e);
throw new YarnRuntimeException(e);
} catch (YarnException e) {
throw new IOException(e);
}
}
/** we just want to return if its allocating, so that we don't
* block on it. This is to be able to return job status
* on an allocating Application.
*/
String user = application.getUser();
if (user == null) {
throw new IOException("User is not set in the application report");
}
if (application.getYarnApplicationState() == YarnApplicationState.NEW || application.getYarnApplicationState() == YarnApplicationState.NEW_SAVING || application.getYarnApplicationState() == YarnApplicationState.SUBMITTED || application.getYarnApplicationState() == YarnApplicationState.ACCEPTED) {
realProxy = null;
return getNotRunningJob(application, JobState.NEW);
}
if (application.getYarnApplicationState() == YarnApplicationState.FAILED) {
realProxy = null;
return getNotRunningJob(application, JobState.FAILED);
}
if (application.getYarnApplicationState() == YarnApplicationState.KILLED) {
realProxy = null;
return getNotRunningJob(application, JobState.KILLED);
}
//succeeded.
if (application.getYarnApplicationState() == YarnApplicationState.FINISHED) {
LOG.info("Application state is completed. FinalApplicationStatus=" + application.getFinalApplicationStatus().toString() + ". Redirecting to job history server");
realProxy = checkAndGetHSProxy(application, JobState.SUCCEEDED);
}
return realProxy;
}
use of org.apache.hadoop.yarn.exceptions.YarnRuntimeException in project hadoop by apache.
the class TestNetworkedJob method testNetworkedJob.
/**
* test JobConf
* @throws Exception
*/
@SuppressWarnings("deprecation")
@Test(timeout = 500000)
public void testNetworkedJob() throws Exception {
// mock creation
MiniMRClientCluster mr = null;
FileSystem fileSys = null;
try {
mr = createMiniClusterWithCapacityScheduler();
JobConf job = new JobConf(mr.getConfig());
fileSys = FileSystem.get(job);
fileSys.delete(testDir, true);
FSDataOutputStream out = fileSys.create(inFile, true);
out.writeBytes("This is a test file");
out.close();
FileInputFormat.setInputPaths(job, inFile);
FileOutputFormat.setOutputPath(job, outDir);
job.setInputFormat(TextInputFormat.class);
job.setOutputFormat(TextOutputFormat.class);
job.setMapperClass(IdentityMapper.class);
job.setReducerClass(IdentityReducer.class);
job.setNumReduceTasks(0);
JobClient client = new JobClient(mr.getConfig());
RunningJob rj = client.submitJob(job);
JobID jobId = rj.getID();
NetworkedJob runningJob = (NetworkedJob) client.getJob(jobId);
runningJob.setJobPriority(JobPriority.HIGH.name());
// test getters
assertTrue(runningJob.getConfiguration().toString().endsWith("0001/job.xml"));
assertEquals(jobId, runningJob.getID());
assertEquals(jobId.toString(), runningJob.getJobID());
assertEquals("N/A", runningJob.getJobName());
assertTrue(runningJob.getJobFile().endsWith(".staging/" + runningJob.getJobID() + "/job.xml"));
assertTrue(runningJob.getTrackingURL().length() > 0);
assertTrue(runningJob.mapProgress() == 0.0f);
assertTrue(runningJob.reduceProgress() == 0.0f);
assertTrue(runningJob.cleanupProgress() == 0.0f);
assertTrue(runningJob.setupProgress() == 0.0f);
TaskCompletionEvent[] tce = runningJob.getTaskCompletionEvents(0);
assertEquals(tce.length, 0);
assertEquals("", runningJob.getHistoryUrl());
assertFalse(runningJob.isRetired());
assertEquals("", runningJob.getFailureInfo());
assertEquals("N/A", runningJob.getJobStatus().getJobName());
assertEquals(0, client.getMapTaskReports(jobId).length);
try {
client.getSetupTaskReports(jobId);
} catch (YarnRuntimeException e) {
assertEquals("Unrecognized task type: JOB_SETUP", e.getMessage());
}
try {
client.getCleanupTaskReports(jobId);
} catch (YarnRuntimeException e) {
assertEquals("Unrecognized task type: JOB_CLEANUP", e.getMessage());
}
assertEquals(0, client.getReduceTaskReports(jobId).length);
// test ClusterStatus
ClusterStatus status = client.getClusterStatus(true);
assertEquals(2, status.getActiveTrackerNames().size());
// it method does not implemented and always return empty array or null;
assertEquals(0, status.getBlacklistedTrackers());
assertEquals(0, status.getBlacklistedTrackerNames().size());
assertEquals(0, status.getBlackListedTrackersInfo().size());
assertEquals(JobTrackerStatus.RUNNING, status.getJobTrackerStatus());
assertEquals(1, status.getMapTasks());
assertEquals(20, status.getMaxMapTasks());
assertEquals(4, status.getMaxReduceTasks());
assertEquals(0, status.getNumExcludedNodes());
assertEquals(1, status.getReduceTasks());
assertEquals(2, status.getTaskTrackers());
assertEquals(0, status.getTTExpiryInterval());
assertEquals(JobTrackerStatus.RUNNING, status.getJobTrackerStatus());
assertEquals(0, status.getGraylistedTrackers());
// test read and write
ByteArrayOutputStream dataOut = new ByteArrayOutputStream();
status.write(new DataOutputStream(dataOut));
ClusterStatus status2 = new ClusterStatus();
status2.readFields(new DataInputStream(new ByteArrayInputStream(dataOut.toByteArray())));
assertEquals(status.getActiveTrackerNames(), status2.getActiveTrackerNames());
assertEquals(status.getBlackListedTrackersInfo(), status2.getBlackListedTrackersInfo());
assertEquals(status.getMapTasks(), status2.getMapTasks());
// test taskStatusfilter
JobClient.setTaskOutputFilter(job, TaskStatusFilter.ALL);
assertEquals(TaskStatusFilter.ALL, JobClient.getTaskOutputFilter(job));
// runningJob.setJobPriority(JobPriority.HIGH.name());
// test default map
assertEquals(20, client.getDefaultMaps());
assertEquals(4, client.getDefaultReduces());
assertEquals("jobSubmitDir", client.getSystemDir().getName());
// test queue information
JobQueueInfo[] rootQueueInfo = client.getRootQueues();
assertEquals(1, rootQueueInfo.length);
assertEquals("default", rootQueueInfo[0].getQueueName());
JobQueueInfo[] qinfo = client.getQueues();
assertEquals(1, qinfo.length);
assertEquals("default", qinfo[0].getQueueName());
assertEquals(0, client.getChildQueues("default").length);
assertEquals(1, client.getJobsFromQueue("default").length);
assertTrue(client.getJobsFromQueue("default")[0].getJobFile().endsWith("/job.xml"));
JobQueueInfo qi = client.getQueueInfo("default");
assertEquals("default", qi.getQueueName());
assertEquals("running", qi.getQueueState());
QueueAclsInfo[] aai = client.getQueueAclsForCurrentUser();
assertEquals(2, aai.length);
assertEquals("root", aai[0].getQueueName());
assertEquals("default", aai[1].getQueueName());
// test JobClient
// The following asserts read JobStatus twice and ensure the returned
// JobStatus objects correspond to the same Job.
assertEquals("Expected matching JobIDs", jobId, client.getJob(jobId).getJobStatus().getJobID());
assertEquals("Expected matching startTimes", rj.getJobStatus().getStartTime(), client.getJob(jobId).getJobStatus().getStartTime());
} finally {
if (fileSys != null) {
fileSys.delete(testDir, true);
}
if (mr != null) {
mr.stop();
}
}
}
use of org.apache.hadoop.yarn.exceptions.YarnRuntimeException in project hadoop by apache.
the class WebServer method serviceStart.
@Override
protected void serviceStart() throws Exception {
Configuration conf = getConfig();
String bindAddress = WebAppUtils.getWebAppBindURL(conf, YarnConfiguration.NM_BIND_HOST, WebAppUtils.getNMWebAppURLWithoutScheme(conf));
boolean enableCors = conf.getBoolean(YarnConfiguration.NM_WEBAPP_ENABLE_CORS_FILTER, YarnConfiguration.DEFAULT_NM_WEBAPP_ENABLE_CORS_FILTER);
if (enableCors) {
getConfig().setBoolean(HttpCrossOriginFilterInitializer.PREFIX + HttpCrossOriginFilterInitializer.ENABLED_SUFFIX, true);
}
// Always load pseudo authentication filter to parse "user.name" in an URL
// to identify a HTTP request's user.
boolean hasHadoopAuthFilterInitializer = false;
String filterInitializerConfKey = "hadoop.http.filter.initializers";
Class<?>[] initializersClasses = conf.getClasses(filterInitializerConfKey);
List<String> targets = new ArrayList<String>();
if (initializersClasses != null) {
for (Class<?> initializer : initializersClasses) {
if (initializer.getName().equals(AuthenticationFilterInitializer.class.getName())) {
hasHadoopAuthFilterInitializer = true;
break;
}
targets.add(initializer.getName());
}
}
if (!hasHadoopAuthFilterInitializer) {
targets.add(AuthenticationFilterInitializer.class.getName());
conf.set(filterInitializerConfKey, StringUtils.join(",", targets));
}
LOG.info("Instantiating NMWebApp at " + bindAddress);
try {
this.webApp = WebApps.$for("node", Context.class, this.nmContext, "ws").at(bindAddress).with(conf).withHttpSpnegoPrincipalKey(YarnConfiguration.NM_WEBAPP_SPNEGO_USER_NAME_KEY).withHttpSpnegoKeytabKey(YarnConfiguration.NM_WEBAPP_SPNEGO_KEYTAB_FILE_KEY).withCSRFProtection(YarnConfiguration.NM_CSRF_PREFIX).withXFSProtection(YarnConfiguration.NM_XFS_PREFIX).start(this.nmWebApp);
this.port = this.webApp.httpServer().getConnectorAddress(0).getPort();
} catch (Exception e) {
String msg = "NMWebapps failed to start.";
LOG.error(msg, e);
throw new YarnRuntimeException(msg, e);
}
super.serviceStart();
}
use of org.apache.hadoop.yarn.exceptions.YarnRuntimeException in project hadoop by apache.
the class TestLogAggregationService method testLogAggregationInitAppFailsWithoutKillingNM.
@Test
@SuppressWarnings("unchecked")
public void testLogAggregationInitAppFailsWithoutKillingNM() throws Exception {
this.conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath());
this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, this.remoteRootLogDir.getAbsolutePath());
LogAggregationService logAggregationService = spy(new LogAggregationService(dispatcher, this.context, this.delSrvc, super.dirsHandler));
logAggregationService.init(this.conf);
logAggregationService.start();
ApplicationId appId = BuilderUtils.newApplicationId(System.currentTimeMillis(), (int) (Math.random() * 1000));
doThrow(new YarnRuntimeException("KABOOM!")).when(logAggregationService).initAppAggregator(eq(appId), eq(user), any(Credentials.class), anyMap(), any(LogAggregationContext.class), anyLong());
LogAggregationContext contextWithAMAndFailed = Records.newRecord(LogAggregationContext.class);
contextWithAMAndFailed.setLogAggregationPolicyClassName(AMOrFailedContainerLogAggregationPolicy.class.getName());
logAggregationService.handle(new LogHandlerAppStartedEvent(appId, this.user, null, this.acls, contextWithAMAndFailed));
dispatcher.await();
ApplicationEvent[] expectedEvents = new ApplicationEvent[] { new ApplicationEvent(appId, ApplicationEventType.APPLICATION_LOG_HANDLING_FAILED) };
checkEvents(appEventHandler, expectedEvents, false, "getType", "getApplicationID", "getDiagnostic");
// no filesystems instantiated yet
verify(logAggregationService, never()).closeFileSystems(any(UserGroupInformation.class));
// verify trying to collect logs for containers/apps we don't know about
// doesn't blow up and tear down the NM
logAggregationService.handle(new LogHandlerContainerFinishedEvent(BuilderUtils.newContainerId(4, 1, 1, 1), 0));
dispatcher.await();
logAggregationService.handle(new LogHandlerAppFinishedEvent(BuilderUtils.newApplicationId(1, 5)));
dispatcher.await();
}
Aggregations