Search in sources :

Example 11 with DrainDispatcher

use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.

the class TestAMRMClientOnRMRestart method testAMRMClientOnAMRMTokenRollOverOnRMRestart.

// Test verify for AM issued with rolled-over AMRMToken
// is still able to communicate with restarted RM.
@Test(timeout = 30000)
public void testAMRMClientOnAMRMTokenRollOverOnRMRestart() throws Exception {
    conf.setLong(YarnConfiguration.RM_AMRM_TOKEN_MASTER_KEY_ROLLING_INTERVAL_SECS, rolling_interval_sec);
    conf.setLong(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, am_expire_ms);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    // start first RM
    MyResourceManager2 rm1 = new MyResourceManager2(conf, memStore);
    rm1.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm1.getRMContext().getDispatcher();
    Long startTime = System.currentTimeMillis();
    // Submit the application
    RMApp app = rm1.submitApp(1024);
    dispatcher.await();
    MockNM nm1 = new MockNM("h1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // Node heartbeat
    nm1.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm1.sendAMLaunched(appAttemptId);
    dispatcher.await();
    AMRMTokenSecretManager amrmTokenSecretManagerForRM1 = rm1.getRMContext().getAMRMTokenSecretManager();
    org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> token = amrmTokenSecretManagerForRM1.createAndGetAMRMToken(appAttemptId);
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
    ugi.addTokenIdentifier(token.decodeIdentifier());
    AMRMClient<ContainerRequest> amClient = new MyAMRMClientImpl(rm1);
    amClient.init(conf);
    amClient.start();
    amClient.registerApplicationMaster("h1", 10000, "");
    amClient.allocate(0.1f);
    // At mean time, the old AMRMToken should continue to work
    while (System.currentTimeMillis() - startTime < rolling_interval_sec * 1000) {
        amClient.allocate(0.1f);
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
        // DO NOTHING
        }
    }
    Assert.assertTrue(amrmTokenSecretManagerForRM1.getMasterKey().getMasterKey().getKeyId() != token.decodeIdentifier().getKeyId());
    amClient.allocate(0.1f);
    // active the nextMasterKey, and replace the currentMasterKey
    org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> newToken = amrmTokenSecretManagerForRM1.createAndGetAMRMToken(appAttemptId);
    int waitCount = 0;
    while (waitCount++ <= 50) {
        if (amrmTokenSecretManagerForRM1.getCurrnetMasterKeyData().getMasterKey().getKeyId() != token.decodeIdentifier().getKeyId()) {
            break;
        }
        try {
            amClient.allocate(0.1f);
        } catch (Exception ex) {
            break;
        }
        Thread.sleep(500);
    }
    Assert.assertTrue(amrmTokenSecretManagerForRM1.getNextMasterKeyData() == null);
    Assert.assertTrue(amrmTokenSecretManagerForRM1.getCurrnetMasterKeyData().getMasterKey().getKeyId() == newToken.decodeIdentifier().getKeyId());
    // start 2nd RM
    conf.set(YarnConfiguration.RM_SCHEDULER_ADDRESS, "0.0.0.0:" + ServerSocketUtil.getPort(45020, 10));
    final MyResourceManager2 rm2 = new MyResourceManager2(conf, memStore);
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    ((MyAMRMClientImpl) amClient).updateRMProxy(rm2);
    dispatcher = (DrainDispatcher) rm2.getRMContext().getDispatcher();
    AMRMTokenSecretManager amrmTokenSecretManagerForRM2 = rm2.getRMContext().getAMRMTokenSecretManager();
    Assert.assertTrue(amrmTokenSecretManagerForRM2.getCurrnetMasterKeyData().getMasterKey().getKeyId() == newToken.decodeIdentifier().getKeyId());
    Assert.assertTrue(amrmTokenSecretManagerForRM2.getNextMasterKeyData() == null);
    try {
        UserGroupInformation testUser = UserGroupInformation.createRemoteUser("testUser");
        SecurityUtil.setTokenService(token, rm2.getApplicationMasterService().getBindAddress());
        testUser.addToken(token);
        testUser.doAs(new PrivilegedAction<ApplicationMasterProtocol>() {

            @Override
            public ApplicationMasterProtocol run() {
                return (ApplicationMasterProtocol) YarnRPC.create(conf).getProxy(ApplicationMasterProtocol.class, rm2.getApplicationMasterService().getBindAddress(), conf);
            }
        }).allocate(Records.newRecord(AllocateRequest.class));
        Assert.fail("The old Token should not work");
    } catch (Exception ex) {
        Assert.assertTrue(ex instanceof InvalidToken);
        Assert.assertTrue(ex.getMessage().contains("Invalid AMRMToken from " + token.decodeIdentifier().getApplicationAttemptId()));
    }
    // make sure the recovered AMRMToken works for new RM
    amClient.allocate(0.1f);
    amClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null);
    amClient.stop();
    rm1.stop();
    rm2.stop();
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) AllocateRequest(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest) ApplicationMasterProtocol(org.apache.hadoop.yarn.api.ApplicationMasterProtocol) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) AMRMTokenSecretManager(org.apache.hadoop.yarn.server.resourcemanager.security.AMRMTokenSecretManager) IOException(java.io.IOException) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) AMRMTokenIdentifier(org.apache.hadoop.yarn.security.AMRMTokenIdentifier) PrivilegedAction(java.security.PrivilegedAction) InvalidToken(org.apache.hadoop.security.token.SecretManager.InvalidToken) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) UpdateContainerRequest(org.apache.hadoop.yarn.api.records.UpdateContainerRequest) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Test(org.junit.Test)

Example 12 with DrainDispatcher

use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.

the class TestAMRMClientOnRMRestart method testAMRMClientForUnregisterAMOnRMRestart.

// Test verify for
// 1. AM try to unregister without registering
// 2. AM register to RM, and try to unregister immediately after RM restart
@Test(timeout = 60000)
public void testAMRMClientForUnregisterAMOnRMRestart() throws Exception {
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    // Phase-1 Start 1st RM
    MyResourceManager rm1 = new MyResourceManager(conf, memStore);
    rm1.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm1.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm1.submitApp(1024);
    dispatcher.await();
    MockNM nm1 = new MockNM("h1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // Node heartbeat
    nm1.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm1.sendAMLaunched(appAttemptId);
    dispatcher.await();
    org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> token = rm1.getRMContext().getRMApps().get(appAttemptId.getApplicationId()).getRMAppAttempt(appAttemptId).getAMRMToken();
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
    ugi.addTokenIdentifier(token.decodeIdentifier());
    AMRMClient<ContainerRequest> amClient = new MyAMRMClientImpl(rm1);
    amClient.init(conf);
    amClient.start();
    amClient.registerApplicationMaster("h1", 10000, "");
    amClient.allocate(0.1f);
    // Phase-2 start 2nd RM is up
    MyResourceManager rm2 = new MyResourceManager(conf, memStore);
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    ((MyAMRMClientImpl) amClient).updateRMProxy(rm2);
    dispatcher = (DrainDispatcher) rm2.getRMContext().getDispatcher();
    // NM should be rebooted on heartbeat, even first heartbeat for nm2
    NodeHeartbeatResponse hbResponse = nm1.nodeHeartbeat(true);
    Assert.assertEquals(NodeAction.RESYNC, hbResponse.getNodeAction());
    // new NM to represent NM re-register
    nm1 = new MockNM("h1:1234", 10240, rm2.getResourceTrackerService());
    ContainerId containerId = ContainerId.newContainerId(appAttemptId, 1);
    NMContainerStatus containerReport = NMContainerStatus.newInstance(containerId, 0, ContainerState.RUNNING, Resource.newInstance(1024, 1), "recover container", 0, Priority.newInstance(0), 0);
    nm1.registerNode(Arrays.asList(containerReport), null);
    nm1.nodeHeartbeat(true);
    dispatcher.await();
    amClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null);
    rm2.waitForState(appAttemptId, RMAppAttemptState.FINISHING);
    nm1.nodeHeartbeat(appAttemptId, 1, ContainerState.COMPLETE);
    rm2.waitForState(appAttemptId, RMAppAttemptState.FINISHED);
    rm2.waitForState(app.getApplicationId(), RMAppState.FINISHED);
    amClient.stop();
    rm1.stop();
    rm2.stop();
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeHeartbeatResponse(org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) AMRMTokenIdentifier(org.apache.hadoop.yarn.security.AMRMTokenIdentifier) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) UpdateContainerRequest(org.apache.hadoop.yarn.api.records.UpdateContainerRequest) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Test(org.junit.Test)

Example 13 with DrainDispatcher

use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.

the class RMHATestBase method startRMs.

protected void startRMs() throws IOException {
    rm1 = new MockRM(confForRM1, null, false, false) {

        @Override
        protected Dispatcher createDispatcher() {
            return new DrainDispatcher();
        }
    };
    rm2 = new MockRM(confForRM2, null, false, false) {

        @Override
        protected Dispatcher createDispatcher() {
            return new DrainDispatcher();
        }
    };
    startRMs(rm1, confForRM1, rm2, confForRM2);
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher)

Example 14 with DrainDispatcher

use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.

the class ReservationACLsTestBase method registerNode.

private void registerNode(String host, int memory, int vCores) throws Exception {
    try {
        resourceManager.registerNode(host, memory, vCores);
        int attempts = 10;
        Collection<Plan> plans;
        do {
            DrainDispatcher dispatcher = (DrainDispatcher) resourceManager.getRMContext().getDispatcher();
            dispatcher.await();
            LOG.info("Waiting for node capacity to be added to plan");
            plans = resourceManager.getRMContext().getReservationSystem().getAllPlans().values();
            if (checkCapacity(plans)) {
                break;
            }
            Thread.sleep(100);
        } while (attempts-- > 0);
        if (attempts <= 0) {
            Assert.fail("Exhausted attempts in checking if node capacity was " + "added to the plan");
        }
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) Plan(org.apache.hadoop.yarn.server.resourcemanager.reservation.Plan) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException)

Example 15 with DrainDispatcher

use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.

the class TestResourceLocalizationService method testPublicResourceInitializesLocalDir.

@Test
@SuppressWarnings("unchecked")
public void testPublicResourceInitializesLocalDir() throws Exception {
    // Setup state to simulate restart NM with existing state meaning no
    // directory creation during initialization
    NMStateStoreService spyStateStore = spy(nmContext.getNMStateStore());
    when(spyStateStore.canRecover()).thenReturn(true);
    NMContext spyContext = spy(nmContext);
    when(spyContext.getNMStateStore()).thenReturn(spyStateStore);
    List<Path> localDirs = new ArrayList<Path>();
    String[] sDirs = new String[4];
    for (int i = 0; i < 4; ++i) {
        localDirs.add(lfs.makeQualified(new Path(basedir, i + "")));
        sDirs[i] = localDirs.get(i).toString();
    }
    conf.setStrings(YarnConfiguration.NM_LOCAL_DIRS, sDirs);
    DrainDispatcher dispatcher = new DrainDispatcher();
    EventHandler<ApplicationEvent> applicationBus = mock(EventHandler.class);
    dispatcher.register(ApplicationEventType.class, applicationBus);
    EventHandler<ContainerEvent> containerBus = mock(EventHandler.class);
    dispatcher.register(ContainerEventType.class, containerBus);
    ContainerExecutor exec = mock(ContainerExecutor.class);
    DeletionService delService = mock(DeletionService.class);
    LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
    dirsHandler.init(conf);
    dispatcher.init(conf);
    dispatcher.start();
    try {
        ResourceLocalizationService rawService = new ResourceLocalizationService(dispatcher, exec, delService, dirsHandler, spyContext);
        ResourceLocalizationService spyService = spy(rawService);
        doReturn(mockServer).when(spyService).createServer();
        doReturn(lfs).when(spyService).getLocalFileContext(isA(Configuration.class));
        spyService.init(conf);
        final FsPermission defaultPerm = new FsPermission((short) 0755);
        // verify directory is not created at initialization
        for (Path p : localDirs) {
            p = new Path((new URI(p.toString())).getPath());
            Path publicCache = new Path(p, ContainerLocalizer.FILECACHE);
            verify(spylfs, never()).mkdir(eq(publicCache), eq(defaultPerm), eq(true));
        }
        spyService.start();
        final String user = "user0";
        // init application
        final Application app = mock(Application.class);
        final ApplicationId appId = BuilderUtils.newApplicationId(314159265358979L, 3);
        when(app.getUser()).thenReturn(user);
        when(app.getAppId()).thenReturn(appId);
        spyService.handle(new ApplicationLocalizationEvent(LocalizationEventType.INIT_APPLICATION_RESOURCES, app));
        dispatcher.await();
        // init container.
        final Container c = getMockContainer(appId, 42, user);
        // init resources
        Random r = new Random();
        long seed = r.nextLong();
        System.out.println("SEED: " + seed);
        r.setSeed(seed);
        // Queue up public resource localization
        final LocalResource pubResource1 = getPublicMockedResource(r);
        final LocalResourceRequest pubReq1 = new LocalResourceRequest(pubResource1);
        LocalResource pubResource2 = null;
        do {
            pubResource2 = getPublicMockedResource(r);
        } while (pubResource2 == null || pubResource2.equals(pubResource1));
        // above call to make sure we don't get identical resources.
        final LocalResourceRequest pubReq2 = new LocalResourceRequest(pubResource2);
        Set<LocalResourceRequest> pubRsrcs = new HashSet<LocalResourceRequest>();
        pubRsrcs.add(pubReq1);
        pubRsrcs.add(pubReq2);
        Map<LocalResourceVisibility, Collection<LocalResourceRequest>> req = new HashMap<LocalResourceVisibility, Collection<LocalResourceRequest>>();
        req.put(LocalResourceVisibility.PUBLIC, pubRsrcs);
        spyService.handle(new ContainerLocalizationRequestEvent(c, req));
        dispatcher.await();
        verify(spyService, times(1)).checkAndInitializeLocalDirs();
        // verify directory creation
        for (Path p : localDirs) {
            p = new Path((new URI(p.toString())).getPath());
            Path publicCache = new Path(p, ContainerLocalizer.FILECACHE);
            verify(spylfs).mkdir(eq(publicCache), eq(defaultPerm), eq(true));
        }
    } finally {
        dispatcher.stop();
    }
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) ContainerExecutor(org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor) DefaultContainerExecutor(org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor) ContainerLocalizationRequestEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ContainerLocalizationRequestEvent) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) URI(java.net.URI) LocalResourceVisibility(org.apache.hadoop.yarn.api.records.LocalResourceVisibility) Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container) Random(java.util.Random) ApplicationLocalizationEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ApplicationLocalizationEvent) FsPermission(org.apache.hadoop.fs.permission.FsPermission) HashSet(java.util.HashSet) Path(org.apache.hadoop.fs.Path) ContainerEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent) ApplicationEvent(org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent) DeletionService(org.apache.hadoop.yarn.server.nodemanager.DeletionService) LocalDirsHandlerService(org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService) NMStateStoreService(org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) NMContext(org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext) Collection(java.util.Collection) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Application(org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application) Test(org.junit.Test)

Aggregations

DrainDispatcher (org.apache.hadoop.yarn.event.DrainDispatcher)84 Test (org.junit.Test)73 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)54 Configuration (org.apache.hadoop.conf.Configuration)50 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)41 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)35 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)32 Path (org.apache.hadoop.fs.Path)24 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)24 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)23 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)21 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)20 IOException (java.io.IOException)16 ArrayList (java.util.ArrayList)16 DeletionService (org.apache.hadoop.yarn.server.nodemanager.DeletionService)16 LocalizerEvent (org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerEvent)15 HashMap (java.util.HashMap)14 LocalDirsHandlerService (org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService)14 LocalResourceVisibility (org.apache.hadoop.yarn.api.records.LocalResourceVisibility)12 Container (org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container)12