use of org.apache.hadoop.yarn.exceptions.NMNotYetReadyException in project hadoop by apache.
the class TestApplicationMasterLauncher method testRetriesOnFailures.
@Test
public void testRetriesOnFailures() throws Exception {
final ContainerManagementProtocol mockProxy = mock(ContainerManagementProtocol.class);
final StartContainersResponse mockResponse = mock(StartContainersResponse.class);
when(mockProxy.startContainers(any(StartContainersRequest.class))).thenThrow(new NMNotYetReadyException("foo")).thenReturn(mockResponse);
Configuration conf = new Configuration();
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
conf.setInt(YarnConfiguration.CLIENT_NM_CONNECT_RETRY_INTERVAL_MS, 1);
final DrainDispatcher dispatcher = new DrainDispatcher();
MockRM rm = new MockRMWithCustomAMLauncher(conf, null) {
@Override
protected ApplicationMasterLauncher createAMLauncher() {
return new ApplicationMasterLauncher(getRMContext()) {
@Override
protected Runnable createRunnableLauncher(RMAppAttempt application, AMLauncherEventType event) {
return new AMLauncher(context, application, event, getConfig()) {
@Override
protected YarnRPC getYarnRPC() {
YarnRPC mockRpc = mock(YarnRPC.class);
when(mockRpc.getProxy(any(Class.class), any(InetSocketAddress.class), any(Configuration.class))).thenReturn(mockProxy);
return mockRpc;
}
};
}
};
}
@Override
protected Dispatcher createDispatcher() {
return dispatcher;
}
};
rm.start();
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 5120);
RMApp app = rm.submitApp(2000);
// kick the scheduling
nm1.nodeHeartbeat(true);
dispatcher.await();
MockRM.waitForState(app.getCurrentAppAttempt(), RMAppAttemptState.LAUNCHED, 500);
}
use of org.apache.hadoop.yarn.exceptions.NMNotYetReadyException in project hadoop by apache.
the class ContainerManagerImpl method startContainers.
/**
* Start a list of containers on this NodeManager.
*/
@Override
public StartContainersResponse startContainers(StartContainersRequest requests) throws YarnException, IOException {
if (blockNewContainerRequests.get()) {
throw new NMNotYetReadyException("Rejecting new containers as NodeManager has not" + " yet connected with ResourceManager");
}
UserGroupInformation remoteUgi = getRemoteUgi();
NMTokenIdentifier nmTokenIdentifier = selectNMTokenIdentifier(remoteUgi);
authorizeUser(remoteUgi, nmTokenIdentifier);
List<ContainerId> succeededContainers = new ArrayList<ContainerId>();
Map<ContainerId, SerializedException> failedContainers = new HashMap<ContainerId, SerializedException>();
// been added to the containers map in NMContext.
synchronized (this.context) {
for (StartContainerRequest request : requests.getStartContainerRequests()) {
ContainerId containerId = null;
try {
if (request.getContainerToken() == null || request.getContainerToken().getIdentifier() == null) {
throw new IOException(INVALID_CONTAINERTOKEN_MSG);
}
ContainerTokenIdentifier containerTokenIdentifier = BuilderUtils.newContainerTokenIdentifier(request.getContainerToken());
verifyAndGetContainerTokenIdentifier(request.getContainerToken(), containerTokenIdentifier);
containerId = containerTokenIdentifier.getContainerID();
// type AM and if the AMRMProxy service is enabled
if (amrmProxyEnabled && containerTokenIdentifier.getContainerType().equals(ContainerType.APPLICATION_MASTER)) {
this.getAMRMProxyService().processApplicationStartRequest(request);
}
performContainerPreStartChecks(nmTokenIdentifier, request, containerTokenIdentifier);
startContainerInternal(containerTokenIdentifier, request);
succeededContainers.add(containerId);
} catch (YarnException e) {
failedContainers.put(containerId, SerializedException.newInstance(e));
} catch (InvalidToken ie) {
failedContainers.put(containerId, SerializedException.newInstance(ie));
throw ie;
} catch (IOException e) {
throw RPCUtil.getRemoteException(e);
}
}
return StartContainersResponse.newInstance(getAuxServiceMetaData(), succeededContainers, failedContainers);
}
}
use of org.apache.hadoop.yarn.exceptions.NMNotYetReadyException in project hadoop by apache.
the class ContainerManagerImpl method increaseContainersResource.
/**
* Increase resource of a list of containers on this NodeManager.
*/
@Override
public IncreaseContainersResourceResponse increaseContainersResource(IncreaseContainersResourceRequest requests) throws YarnException, IOException {
if (blockNewContainerRequests.get()) {
throw new NMNotYetReadyException("Rejecting container resource increase as NodeManager has not" + " yet connected with ResourceManager");
}
UserGroupInformation remoteUgi = getRemoteUgi();
NMTokenIdentifier nmTokenIdentifier = selectNMTokenIdentifier(remoteUgi);
authorizeUser(remoteUgi, nmTokenIdentifier);
List<ContainerId> successfullyIncreasedContainers = new ArrayList<ContainerId>();
Map<ContainerId, SerializedException> failedContainers = new HashMap<ContainerId, SerializedException>();
// map in NMContext.
synchronized (this.context) {
// Process container resource increase requests
for (org.apache.hadoop.yarn.api.records.Token token : requests.getContainersToIncrease()) {
ContainerId containerId = null;
try {
if (token.getIdentifier() == null) {
throw new IOException(INVALID_CONTAINERTOKEN_MSG);
}
ContainerTokenIdentifier containerTokenIdentifier = BuilderUtils.newContainerTokenIdentifier(token);
verifyAndGetContainerTokenIdentifier(token, containerTokenIdentifier);
authorizeStartAndResourceIncreaseRequest(nmTokenIdentifier, containerTokenIdentifier, false);
containerId = containerTokenIdentifier.getContainerID();
// Reuse the startContainer logic to update NMToken,
// as container resource increase request will have come with
// an updated NMToken.
updateNMTokenIdentifier(nmTokenIdentifier);
Resource resource = containerTokenIdentifier.getResource();
changeContainerResourceInternal(containerId, containerTokenIdentifier.getVersion(), resource, true);
successfullyIncreasedContainers.add(containerId);
} catch (YarnException | InvalidToken e) {
failedContainers.put(containerId, SerializedException.newInstance(e));
} catch (IOException e) {
throw RPCUtil.getRemoteException(e);
}
}
}
return IncreaseContainersResourceResponse.newInstance(successfullyIncreasedContainers, failedContainers);
}
use of org.apache.hadoop.yarn.exceptions.NMNotYetReadyException in project hadoop by apache.
the class ContainerManagerImpl method stopContainerInternal.
@SuppressWarnings("unchecked")
protected void stopContainerInternal(ContainerId containerID) throws YarnException, IOException {
String containerIDStr = containerID.toString();
Container container = this.context.getContainers().get(containerID);
LOG.info("Stopping container with container Id: " + containerIDStr);
if (container == null) {
if (!nodeStatusUpdater.isContainerRecentlyStopped(containerID)) {
throw RPCUtil.getRemoteException("Container " + containerIDStr + " is not handled by this NodeManager");
}
} else {
if (container.isRecovering()) {
throw new NMNotYetReadyException("Container " + containerIDStr + " is recovering, try later");
}
context.getNMStateStore().storeContainerKilled(containerID);
container.sendKillEvent(ContainerExitStatus.KILLED_BY_APPMASTER, "Container killed by the ApplicationMaster.");
NMAuditLogger.logSuccess(container.getUser(), AuditConstants.STOP_CONTAINER, "ContainerManageImpl", containerID.getApplicationAttemptId().getApplicationId(), containerID);
}
}
Aggregations