Search in sources :

Example 16 with LocalityManager

use of org.apache.samza.container.LocalityManager in project samza by apache.

the class TestContainerAllocatorWithoutHostAffinity method testExpiredRequestInfiniteLoop.

/**
 * See SAMZA-2601: we want to prevent an infinite loop in the case of expired request call with host affinity
 * disabled. This test make sure we don't have that infinite loop.
 */
@Test
public void testExpiredRequestInfiniteLoop() throws Exception {
    Config override = new MapConfig(new HashMap<String, String>() {

        {
            // override to have a proper sleep interval for this test
            put("cluster-manager.allocator.sleep.ms", "100");
        }
    });
    LocalityManager mockLocalityManager = mock(LocalityManager.class);
    when(mockLocalityManager.readLocality()).thenReturn(new LocalityModel(new HashMap<>()));
    ContainerManager containerManager = new ContainerManager(containerPlacementMetadataStore, state, manager, false, false, mockLocalityManager, faultDomainManager, config);
    containerAllocator = MockContainerAllocatorWithoutHostAffinity.createContainerAllocatorWithConfigOverride(manager, config, state, containerManager, override);
    MockContainerAllocatorWithoutHostAffinity mockAllocator = (MockContainerAllocatorWithoutHostAffinity) containerAllocator;
    mockAllocator.setOverrideIsRequestExpired();
    allocatorThread = new Thread(containerAllocator);
    Map<String, String> containersToHostMapping = new HashMap<String, String>() {

        {
            put("0", null);
            put("1", null);
            put("2", null);
            put("3", null);
        }
    };
    allocatorThread.start();
    mockAllocator.requestResources(containersToHostMapping);
    // Wait for at least one expired request call is made, which should happen.
    // If the test passes, this should return immediately (within 100 ms). Only when the test fails will it exhaust the
    // timeout, which is worth the wait to find out the failure
    assertTrue(mockAllocator.awaitIsRequestExpiredCall(TimeUnit.SECONDS.toMillis(10)));
    // TODO: we can eliminate the thread sleep if the whole container allocator and test codes are refactored to use
    // a Clock which can be simulated and controlled.
    Thread.sleep(500);
    // Given that we wait for 500 ms above, and a sleep interval of 100 ms, we should roughly see 5 times the
    // isRequestExpired is called. We give some extra buffer here (<100). Because if we do run into infinite loop,
    // isRequestExpired would be called MILLIONS of times (4~5 million times after a dozen of runs on my machine).
    assertTrue(String.format("Too many call count: %d. Seems to be in infinite loop", mockAllocator.getExpiredRequestCallCount()), mockAllocator.getExpiredRequestCallCount() < 100);
}
Also used : HashMap(java.util.HashMap) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) MapConfig(org.apache.samza.config.MapConfig) LocalityManager(org.apache.samza.container.LocalityManager) LocalityModel(org.apache.samza.job.model.LocalityModel) Test(org.junit.Test)

Example 17 with LocalityManager

use of org.apache.samza.container.LocalityManager in project samza by apache.

the class TestContainerProcessManager method buildContainerProcessManager.

private ContainerProcessManager buildContainerProcessManager(ClusterManagerConfig clusterManagerConfig, SamzaApplicationState state, ClusterResourceManager clusterResourceManager, Optional<ContainerAllocator> allocator, boolean restartContainer) {
    LocalityManager mockLocalityManager = mock(LocalityManager.class);
    FaultDomainManager faultDomainManager = mock(FaultDomainManager.class);
    when(mockLocalityManager.readLocality()).thenReturn(new LocalityModel(new HashMap<>()));
    return buildContainerProcessManager(clusterManagerConfig, state, clusterResourceManager, allocator, mockLocalityManager, restartContainer, faultDomainManager);
}
Also used : HashMap(java.util.HashMap) LocalityManager(org.apache.samza.container.LocalityManager) LocalityModel(org.apache.samza.job.model.LocalityModel)

Example 18 with LocalityManager

use of org.apache.samza.container.LocalityManager in project samza by apache.

the class TestContainerProcessManager method testAllBufferedResourcesAreUtilized.

@Test
public void testAllBufferedResourcesAreUtilized() throws Exception {
    Map<String, String> config = new HashMap<>();
    config.putAll(getConfigWithHostAffinity());
    config.put("job.container.count", "2");
    config.put("cluster-manager.container.retry.count", "2");
    config.put("cluster-manager.container.request.timeout.ms", "10000");
    Config cfg = new MapConfig(config);
    // 1. Request two containers on hosts - host1 and host2
    SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(2));
    MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
    MockClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
    FaultDomainManager faultDomainManager = mock(FaultDomainManager.class);
    LocalityManager mockLocalityManager = mock(LocalityManager.class);
    when(mockLocalityManager.readLocality()).thenReturn(new LocalityModel(ImmutableMap.of("0", new ProcessorLocality("0", "host1"), "1", new ProcessorLocality("1", "host2"))));
    ContainerManager containerManager = buildContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, Boolean.parseBoolean(config.get(ClusterManagerConfig.HOST_AFFINITY_ENABLED)), false, mockLocalityManager, faultDomainManager);
    MockContainerAllocatorWithHostAffinity allocator = new MockContainerAllocatorWithHostAffinity(clusterResourceManager, cfg, state, containerManager);
    ContainerProcessManager cpm = spy(buildContainerProcessManager(new ClusterManagerConfig(cfg), state, clusterResourceManager, Optional.of(allocator), mockLocalityManager, false, faultDomainManager));
    cpm.start();
    assertFalse(cpm.shouldShutdown());
    // 2. When the task manager starts, there should have been a pending request on host1 and host2
    assertEquals(2, allocator.getContainerRequestState().numPendingRequests());
    // 3. Allocate an extra resource on host1 and no resource on host2 yet.
    SamzaResource resource1 = new SamzaResource(1, 1000, "host1", "id1");
    SamzaResource resource2 = new SamzaResource(1, 1000, "host1", "id2");
    cpm.onResourceAllocated(resource1);
    cpm.onResourceAllocated(resource2);
    // 4. Wait for the container to start on host1 and immediately fail
    if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    cpm.onStreamProcessorLaunchSuccess(resource1);
    assertEquals("host2", allocator.getContainerRequestState().peekPendingRequest().getPreferredHost());
    assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
    cpm.onResourceCompleted(new SamzaResourceStatus(resource1.getContainerId(), "App Error", 1));
    verify(cpm).onResourceCompletedWithUnknownStatus(any(SamzaResourceStatus.class), anyString(), anyString(), anyInt());
    assertEquals(2, allocator.getContainerRequestState().numPendingRequests());
    assertFalse(cpm.shouldShutdown());
    assertFalse(state.jobHealthy.get());
    assertEquals(3, clusterResourceManager.resourceRequests.size());
    assertEquals(0, clusterResourceManager.releasedResources.size());
    // 5. Do not allocate any further resource on host1, and verify that the re-run of the container on host1 uses the
    // previously allocated extra resource
    SamzaResource resource3 = new SamzaResource(1, 1000, "host2", "id3");
    cpm.onResourceAllocated(resource3);
    if (!allocator.awaitContainersStart(2, 2, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    cpm.onStreamProcessorLaunchSuccess(resource2);
    cpm.onStreamProcessorLaunchSuccess(resource3);
    assertTrue(state.jobHealthy.get());
    cpm.stop();
}
Also used : HashMap(java.util.HashMap) JobConfig(org.apache.samza.config.JobConfig) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) Matchers.anyString(org.mockito.Matchers.anyString) LocalityModel(org.apache.samza.job.model.LocalityModel) ProcessorLocality(org.apache.samza.job.model.ProcessorLocality) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) MapConfig(org.apache.samza.config.MapConfig) LocalityManager(org.apache.samza.container.LocalityManager) Test(org.junit.Test)

Example 19 with LocalityManager

use of org.apache.samza.container.LocalityManager in project samza by apache.

the class TestContainerProcessManager method testContainerRequestedRetriesNotExceedingWindowOnFailureWithUnknownCode.

private void testContainerRequestedRetriesNotExceedingWindowOnFailureWithUnknownCode(boolean withHostAffinity, boolean failAfterRetries) throws Exception {
    int maxRetries = 3;
    String processorId = "0";
    ClusterManagerConfig clusterManagerConfig = new ClusterManagerConfig(getConfigWithHostAffinityAndRetries(withHostAffinity, maxRetries, failAfterRetries));
    SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(1));
    MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
    MockClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
    FaultDomainManager faultDomainManager = mock(FaultDomainManager.class);
    LocalityManager mockLocalityManager = mock(LocalityManager.class);
    if (withHostAffinity) {
        when(mockLocalityManager.readLocality()).thenReturn(new LocalityModel(ImmutableMap.of("0", new ProcessorLocality("0", "host1"))));
    } else {
        when(mockLocalityManager.readLocality()).thenReturn(new LocalityModel(new HashMap<>()));
    }
    ContainerManager containerManager = buildContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, clusterManagerConfig.getHostAffinityEnabled(), false, mockLocalityManager, faultDomainManager);
    MockContainerAllocatorWithoutHostAffinity allocator = new MockContainerAllocatorWithoutHostAffinity(clusterResourceManager, clusterManagerConfig, state, containerManager);
    ContainerProcessManager cpm = buildContainerProcessManager(clusterManagerConfig, state, clusterResourceManager, Optional.of(allocator), mockLocalityManager, false, faultDomainManager);
    // start triggers a request
    cpm.start();
    assertFalse(cpm.shouldShutdown());
    assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
    assertEquals(0, allocator.getContainerRequestState().numDelayedRequests());
    SamzaResource container = new SamzaResource(1, 1024, "host1", "id0");
    cpm.onResourceAllocated(container);
    // Allow container to run and update state
    if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    cpm.onStreamProcessorLaunchSuccess(container);
    // Mock 2nd failure not exceeding retry window.
    cpm.getProcessorFailures().put(processorId, new ProcessorFailure(1, Instant.now(), Duration.ZERO));
    cpm.onResourceCompleted(new SamzaResourceStatus(container.getContainerId(), "diagnostics", 1));
    assertEquals(false, cpm.getJobFailureCriteriaMet());
    assertEquals(2, cpm.getProcessorFailures().get(processorId).getCount());
    assertFalse(cpm.shouldShutdown());
    assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
    assertEquals(0, allocator.getContainerRequestState().numDelayedRequests());
    cpm.onResourceAllocated(container);
    // Allow container to run and update state
    if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    cpm.onStreamProcessorLaunchSuccess(container);
    // Mock 3rd failure not exceeding retry window.
    cpm.getProcessorFailures().put(processorId, new ProcessorFailure(2, Instant.now(), Duration.ZERO));
    cpm.onResourceCompleted(new SamzaResourceStatus(container.getContainerId(), "diagnostics", 1));
    assertEquals(false, cpm.getJobFailureCriteriaMet());
    assertEquals(3, cpm.getProcessorFailures().get(processorId).getCount());
    assertFalse(cpm.shouldShutdown());
    if (withHostAffinity) {
        assertEquals(0, allocator.getContainerRequestState().numPendingRequests());
        assertEquals(1, allocator.getContainerRequestState().numDelayedRequests());
    } else {
        assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
        assertEquals(0, allocator.getContainerRequestState().numDelayedRequests());
    }
    cpm.onResourceAllocated(container);
    if (withHostAffinity) {
        if (allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
            // No delayed retry requests for there host affinity is disabled. Call back should return immediately.
            fail("Expecting a delayed request so allocator callback should have timed out waiting for a response.");
        }
        // For the sake of testing the mocked 4th failure below, send delayed requests now.
        SamzaResourceRequest request = allocator.getContainerRequestState().getDelayedRequestsQueue().poll();
        SamzaResourceRequest fastForwardRequest = new SamzaResourceRequest(request.getNumCores(), request.getMemoryMB(), request.getPreferredHost(), request.getProcessorId(), Instant.now().minusSeconds(1));
        allocator.getContainerRequestState().getDelayedRequestsQueue().add(fastForwardRequest);
        int numSent = allocator.getContainerRequestState().sendPendingDelayedResourceRequests();
        assertEquals(1, numSent);
        cpm.onResourceAllocated(container);
    }
    if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
        // No delayed retry requests for there host affinity is disabled. Call back should return immediately.
        fail("Timed out waiting for the containers to start");
    }
    cpm.onStreamProcessorLaunchSuccess(container);
    // Mock 4th failure not exceeding retry window.
    cpm.getProcessorFailures().put(processorId, new ProcessorFailure(3, Instant.now(), Duration.ZERO));
    cpm.onResourceCompleted(new SamzaResourceStatus(container.getContainerId(), "diagnostics", 1));
    // expecting failed container
    assertEquals(failAfterRetries, cpm.getJobFailureCriteriaMet());
    // count won't update on failure
    assertEquals(3, cpm.getProcessorFailures().get(processorId).getCount());
    if (failAfterRetries) {
        assertTrue(cpm.shouldShutdown());
    } else {
        assertFalse(cpm.shouldShutdown());
    }
    assertEquals(0, allocator.getContainerRequestState().numPendingRequests());
    assertEquals(0, allocator.getContainerRequestState().numDelayedRequests());
    cpm.stop();
}
Also used : HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) LocalityModel(org.apache.samza.job.model.LocalityModel) ProcessorLocality(org.apache.samza.job.model.ProcessorLocality) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) LocalityManager(org.apache.samza.container.LocalityManager)

Example 20 with LocalityManager

use of org.apache.samza.container.LocalityManager in project samza by apache.

the class SamzaTaskProxy method readTasksFromCoordinatorStream.

/**
 * Builds list of {@link Task} from job model in coordinator stream.
 * @param consumer system consumer associated with a job's coordinator stream.
 * @return list of {@link Task} constructed from job model in coordinator stream.
 */
protected List<Task> readTasksFromCoordinatorStream(CoordinatorStreamSystemConsumer consumer) {
    CoordinatorStreamStore coordinatorStreamStore = new CoordinatorStreamStore(consumer.getConfig(), new MetricsRegistryMap());
    LocalityManager localityManager = new LocalityManager(coordinatorStreamStore);
    Map<String, ProcessorLocality> containerLocalities = localityManager.readLocality().getProcessorLocalities();
    TaskAssignmentManager taskAssignmentManager = new TaskAssignmentManager(new NamespaceAwareCoordinatorStreamStore(coordinatorStreamStore, SetTaskContainerMapping.TYPE), new NamespaceAwareCoordinatorStreamStore(coordinatorStreamStore, SetTaskModeMapping.TYPE));
    Map<String, String> taskNameToContainerIdMapping = taskAssignmentManager.readTaskAssignment();
    StorageConfig storageConfig = new StorageConfig(consumer.getConfig());
    List<String> storeNames = storageConfig.getStoreNames();
    return taskNameToContainerIdMapping.entrySet().stream().map(entry -> {
        String hostName = Optional.ofNullable(containerLocalities.get(entry.getValue())).map(ProcessorLocality::host).orElse(null);
        return new Task(hostName, entry.getKey(), entry.getValue(), new ArrayList<>(), storeNames);
    }).collect(Collectors.toList());
}
Also used : CoordinatorStreamStore(org.apache.samza.coordinator.metadatastore.CoordinatorStreamStore) SetTaskModeMapping(org.apache.samza.coordinator.stream.messages.SetTaskModeMapping) Task(org.apache.samza.rest.model.Task) LoggerFactory(org.slf4j.LoggerFactory) JobConfig(org.apache.samza.config.JobConfig) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) TaskAssignmentManager(org.apache.samza.container.grouper.task.TaskAssignmentManager) JobInstance(org.apache.samza.rest.proxy.job.JobInstance) URI(java.net.URI) MapConfig(org.apache.samza.config.MapConfig) NamespaceAwareCoordinatorStreamStore(org.apache.samza.coordinator.metadatastore.NamespaceAwareCoordinatorStreamStore) StorageConfig(org.apache.samza.config.StorageConfig) Logger(org.slf4j.Logger) ImmutableMap(com.google.common.collect.ImmutableMap) InstallationFinder(org.apache.samza.rest.proxy.installation.InstallationFinder) ConfigFactory(org.apache.samza.config.ConfigFactory) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) SamzaException(org.apache.samza.SamzaException) LocalityManager(org.apache.samza.container.LocalityManager) CoordinatorStreamSystemConsumer(org.apache.samza.coordinator.stream.CoordinatorStreamSystemConsumer) List(java.util.List) ReflectionUtil(org.apache.samza.util.ReflectionUtil) ProcessorLocality(org.apache.samza.job.model.ProcessorLocality) Optional(java.util.Optional) InstallationRecord(org.apache.samza.rest.proxy.installation.InstallationRecord) Preconditions(com.google.common.base.Preconditions) Config(org.apache.samza.config.Config) SetTaskContainerMapping(org.apache.samza.coordinator.stream.messages.SetTaskContainerMapping) CoordinatorStreamUtil(org.apache.samza.util.CoordinatorStreamUtil) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) Task(org.apache.samza.rest.model.Task) StorageConfig(org.apache.samza.config.StorageConfig) ArrayList(java.util.ArrayList) ProcessorLocality(org.apache.samza.job.model.ProcessorLocality) NamespaceAwareCoordinatorStreamStore(org.apache.samza.coordinator.metadatastore.NamespaceAwareCoordinatorStreamStore) CoordinatorStreamStore(org.apache.samza.coordinator.metadatastore.CoordinatorStreamStore) NamespaceAwareCoordinatorStreamStore(org.apache.samza.coordinator.metadatastore.NamespaceAwareCoordinatorStreamStore) TaskAssignmentManager(org.apache.samza.container.grouper.task.TaskAssignmentManager) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) LocalityManager(org.apache.samza.container.LocalityManager)

Aggregations

LocalityManager (org.apache.samza.container.LocalityManager)20 LocalityModel (org.apache.samza.job.model.LocalityModel)13 HashMap (java.util.HashMap)10 ProcessorLocality (org.apache.samza.job.model.ProcessorLocality)10 MetricsRegistryMap (org.apache.samza.metrics.MetricsRegistryMap)8 MapConfig (org.apache.samza.config.MapConfig)7 ClusterManagerConfig (org.apache.samza.config.ClusterManagerConfig)6 Config (org.apache.samza.config.Config)5 JobConfig (org.apache.samza.config.JobConfig)5 CoordinatorStreamStore (org.apache.samza.coordinator.metadatastore.CoordinatorStreamStore)5 Before (org.junit.Before)5 NamespaceAwareCoordinatorStreamStore (org.apache.samza.coordinator.metadatastore.NamespaceAwareCoordinatorStreamStore)4 Test (org.junit.Test)4 Matchers.anyString (org.mockito.Matchers.anyString)4 Map (java.util.Map)3 ContainerPlacementMetadataStore (org.apache.samza.clustermanager.container.placement.ContainerPlacementMetadataStore)3 TaskName (org.apache.samza.container.TaskName)3 TaskAssignmentManager (org.apache.samza.container.grouper.task.TaskAssignmentManager)3 CoordinatorStreamStoreTestUtil (org.apache.samza.coordinator.metadatastore.CoordinatorStreamStoreTestUtil)3 Field (java.lang.reflect.Field)2