Search in sources :

Example 21 with ClusterManagerConfig

use of org.apache.samza.config.ClusterManagerConfig in project samza by apache.

the class TestContainerProcessManager method testContainerRequestedRetriesNotExceedingWindowOnFailureWithUnknownCode.

private void testContainerRequestedRetriesNotExceedingWindowOnFailureWithUnknownCode(boolean withHostAffinity, boolean failAfterRetries) throws Exception {
    int maxRetries = 3;
    String processorId = "0";
    ClusterManagerConfig clusterManagerConfig = new ClusterManagerConfig(getConfigWithHostAffinityAndRetries(withHostAffinity, maxRetries, failAfterRetries));
    SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(1));
    MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
    MockClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
    FaultDomainManager faultDomainManager = mock(FaultDomainManager.class);
    LocalityManager mockLocalityManager = mock(LocalityManager.class);
    if (withHostAffinity) {
        when(mockLocalityManager.readLocality()).thenReturn(new LocalityModel(ImmutableMap.of("0", new ProcessorLocality("0", "host1"))));
    } else {
        when(mockLocalityManager.readLocality()).thenReturn(new LocalityModel(new HashMap<>()));
    }
    ContainerManager containerManager = buildContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, clusterManagerConfig.getHostAffinityEnabled(), false, mockLocalityManager, faultDomainManager);
    MockContainerAllocatorWithoutHostAffinity allocator = new MockContainerAllocatorWithoutHostAffinity(clusterResourceManager, clusterManagerConfig, state, containerManager);
    ContainerProcessManager cpm = buildContainerProcessManager(clusterManagerConfig, state, clusterResourceManager, Optional.of(allocator), mockLocalityManager, false, faultDomainManager);
    // start triggers a request
    cpm.start();
    assertFalse(cpm.shouldShutdown());
    assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
    assertEquals(0, allocator.getContainerRequestState().numDelayedRequests());
    SamzaResource container = new SamzaResource(1, 1024, "host1", "id0");
    cpm.onResourceAllocated(container);
    // Allow container to run and update state
    if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    cpm.onStreamProcessorLaunchSuccess(container);
    // Mock 2nd failure not exceeding retry window.
    cpm.getProcessorFailures().put(processorId, new ProcessorFailure(1, Instant.now(), Duration.ZERO));
    cpm.onResourceCompleted(new SamzaResourceStatus(container.getContainerId(), "diagnostics", 1));
    assertEquals(false, cpm.getJobFailureCriteriaMet());
    assertEquals(2, cpm.getProcessorFailures().get(processorId).getCount());
    assertFalse(cpm.shouldShutdown());
    assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
    assertEquals(0, allocator.getContainerRequestState().numDelayedRequests());
    cpm.onResourceAllocated(container);
    // Allow container to run and update state
    if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    cpm.onStreamProcessorLaunchSuccess(container);
    // Mock 3rd failure not exceeding retry window.
    cpm.getProcessorFailures().put(processorId, new ProcessorFailure(2, Instant.now(), Duration.ZERO));
    cpm.onResourceCompleted(new SamzaResourceStatus(container.getContainerId(), "diagnostics", 1));
    assertEquals(false, cpm.getJobFailureCriteriaMet());
    assertEquals(3, cpm.getProcessorFailures().get(processorId).getCount());
    assertFalse(cpm.shouldShutdown());
    if (withHostAffinity) {
        assertEquals(0, allocator.getContainerRequestState().numPendingRequests());
        assertEquals(1, allocator.getContainerRequestState().numDelayedRequests());
    } else {
        assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
        assertEquals(0, allocator.getContainerRequestState().numDelayedRequests());
    }
    cpm.onResourceAllocated(container);
    if (withHostAffinity) {
        if (allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
            // No delayed retry requests for there host affinity is disabled. Call back should return immediately.
            fail("Expecting a delayed request so allocator callback should have timed out waiting for a response.");
        }
        // For the sake of testing the mocked 4th failure below, send delayed requests now.
        SamzaResourceRequest request = allocator.getContainerRequestState().getDelayedRequestsQueue().poll();
        SamzaResourceRequest fastForwardRequest = new SamzaResourceRequest(request.getNumCores(), request.getMemoryMB(), request.getPreferredHost(), request.getProcessorId(), Instant.now().minusSeconds(1));
        allocator.getContainerRequestState().getDelayedRequestsQueue().add(fastForwardRequest);
        int numSent = allocator.getContainerRequestState().sendPendingDelayedResourceRequests();
        assertEquals(1, numSent);
        cpm.onResourceAllocated(container);
    }
    if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
        // No delayed retry requests for there host affinity is disabled. Call back should return immediately.
        fail("Timed out waiting for the containers to start");
    }
    cpm.onStreamProcessorLaunchSuccess(container);
    // Mock 4th failure not exceeding retry window.
    cpm.getProcessorFailures().put(processorId, new ProcessorFailure(3, Instant.now(), Duration.ZERO));
    cpm.onResourceCompleted(new SamzaResourceStatus(container.getContainerId(), "diagnostics", 1));
    // expecting failed container
    assertEquals(failAfterRetries, cpm.getJobFailureCriteriaMet());
    // count won't update on failure
    assertEquals(3, cpm.getProcessorFailures().get(processorId).getCount());
    if (failAfterRetries) {
        assertTrue(cpm.shouldShutdown());
    } else {
        assertFalse(cpm.shouldShutdown());
    }
    assertEquals(0, allocator.getContainerRequestState().numPendingRequests());
    assertEquals(0, allocator.getContainerRequestState().numDelayedRequests());
    cpm.stop();
}
Also used : HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) LocalityModel(org.apache.samza.job.model.LocalityModel) ProcessorLocality(org.apache.samza.job.model.ProcessorLocality) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) LocalityManager(org.apache.samza.container.LocalityManager)

Example 22 with ClusterManagerConfig

use of org.apache.samza.config.ClusterManagerConfig in project samza by apache.

the class TestContainerProcessManager method testOnInitToForceRestartAMHighAvailability.

@Test
public void testOnInitToForceRestartAMHighAvailability() throws Exception {
    Map<String, String> configMap = new HashMap<>(configVals);
    configMap.put(JobConfig.YARN_AM_HIGH_AVAILABILITY_ENABLED, "true");
    Config conf = new MapConfig(configMap);
    SamzaResource samzaResource = new SamzaResource(1, 1024, "host", "0");
    SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(2));
    state.runningProcessors.put("0", samzaResource);
    MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
    ClusterResourceManager clusterResourceManager = spy(new MockClusterResourceManager(callback, state));
    ClusterManagerConfig clusterManagerConfig = spy(new ClusterManagerConfig(conf));
    ContainerManager containerManager = buildContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, clusterManagerConfig.getHostAffinityEnabled(), false);
    ContainerProcessManager cpm = buildContainerProcessManager(clusterManagerConfig, state, clusterResourceManager, Optional.empty(), true);
    MockContainerAllocatorWithoutHostAffinity allocator = new MockContainerAllocatorWithoutHostAffinity(clusterResourceManager, conf, state, containerManager);
    getPrivateFieldFromCpm("containerAllocator", cpm).set(cpm, allocator);
    CountDownLatch latch = new CountDownLatch(1);
    getPrivateFieldFromCpm("allocatorThread", cpm).set(cpm, new Thread() {

        public void run() {
            isRunning = true;
            latch.countDown();
        }
    });
    cpm.start();
    if (!latch.await(2, TimeUnit.SECONDS)) {
        Assert.fail("timed out waiting for the latch to expire");
    }
    verify(clusterResourceManager, times(1)).stopStreamProcessor(samzaResource);
    assertEquals("CPM should stop the running container", 1, callback.resourceStatuses.size());
    SamzaResourceStatus actualResourceStatus = callback.resourceStatuses.get(0);
    assertEquals("Container 0 should be stopped", "0", actualResourceStatus.getContainerId());
    assertEquals("Container 0 should have exited with preempted status", SamzaResourceStatus.PREEMPTED, actualResourceStatus.getExitCode());
    cpm.stop();
}
Also used : HashMap(java.util.HashMap) JobConfig(org.apache.samza.config.JobConfig) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) Matchers.anyString(org.mockito.Matchers.anyString) CountDownLatch(java.util.concurrent.CountDownLatch) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) MapConfig(org.apache.samza.config.MapConfig) Test(org.junit.Test)

Example 23 with ClusterManagerConfig

use of org.apache.samza.config.ClusterManagerConfig in project samza by apache.

the class TestContainerProcessManager method testInvalidNotificationsAreIgnored.

@Test
public void testInvalidNotificationsAreIgnored() throws Exception {
    Config conf = getConfig();
    SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(1));
    MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
    MockClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
    ClusterManagerConfig clusterManagerConfig = spy(new ClusterManagerConfig(conf));
    ContainerManager containerManager = buildContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, clusterManagerConfig.getHostAffinityEnabled(), false);
    MockContainerAllocatorWithoutHostAffinity allocator = new MockContainerAllocatorWithoutHostAffinity(clusterResourceManager, conf, state, containerManager);
    ContainerProcessManager cpm = spy(buildContainerProcessManager(clusterManagerConfig, state, clusterResourceManager, Optional.of(allocator)));
    // Start the task clusterResourceManager
    cpm.start();
    SamzaResource container = new SamzaResource(1, 1000, "host1", "id1");
    cpm.onResourceAllocated(container);
    // Allow container to run and update state
    if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    // Create container failure - with ContainerExitStatus.DISKS_FAILED
    cpm.onResourceCompleted(new SamzaResourceStatus("invalidContainerID", "Disk failure", SamzaResourceStatus.DISK_FAIL));
    verify(cpm, never()).onResourceCompletedWithUnknownStatus(any(SamzaResourceStatus.class), anyString(), anyString(), anyInt());
    // The above failure should not trigger any container requests, since it is for an invalid container ID
    assertEquals(0, allocator.getContainerRequestState().numPendingRequests());
    assertFalse(cpm.shouldShutdown());
    assertTrue(state.jobHealthy.get());
    assertEquals(state.redundantNotifications.get(), 1);
    cpm.stop();
}
Also used : ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) JobConfig(org.apache.samza.config.JobConfig) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) Test(org.junit.Test)

Aggregations

ClusterManagerConfig (org.apache.samza.config.ClusterManagerConfig)23 MapConfig (org.apache.samza.config.MapConfig)17 Test (org.junit.Test)15 JobConfig (org.apache.samza.config.JobConfig)12 Config (org.apache.samza.config.Config)11 HashMap (java.util.HashMap)10 Matchers.anyString (org.mockito.Matchers.anyString)9 MetricsRegistryMap (org.apache.samza.metrics.MetricsRegistryMap)8 LocalityManager (org.apache.samza.container.LocalityManager)6 LocalityModel (org.apache.samza.job.model.LocalityModel)6 ProcessorLocality (org.apache.samza.job.model.ProcessorLocality)6 CountDownLatch (java.util.concurrent.CountDownLatch)3 InvocationOnMock (org.mockito.invocation.InvocationOnMock)3 ImmutableList (com.google.common.collect.ImmutableList)2 List (java.util.List)2 SamzaException (org.apache.samza.SamzaException)2 ContainerPlacementMetadata (org.apache.samza.clustermanager.container.placement.ContainerPlacementMetadata)2 TaskConfig (org.apache.samza.config.TaskConfig)2 ContainerPlacementRequestMessage (org.apache.samza.container.placement.ContainerPlacementRequestMessage)2 ServletHolder (org.eclipse.jetty.servlet.ServletHolder)2