use of org.apache.samza.container.LocalityManager in project samza by apache.
the class TestContainerAllocatorWithoutHostAffinity method testExpiredRequestInfiniteLoop.
/**
* See SAMZA-2601: we want to prevent an infinite loop in the case of expired request call with host affinity
* disabled. This test make sure we don't have that infinite loop.
*/
@Test
public void testExpiredRequestInfiniteLoop() throws Exception {
Config override = new MapConfig(new HashMap<String, String>() {
{
// override to have a proper sleep interval for this test
put("cluster-manager.allocator.sleep.ms", "100");
}
});
LocalityManager mockLocalityManager = mock(LocalityManager.class);
when(mockLocalityManager.readLocality()).thenReturn(new LocalityModel(new HashMap<>()));
ContainerManager containerManager = new ContainerManager(containerPlacementMetadataStore, state, manager, false, false, mockLocalityManager, faultDomainManager, config);
containerAllocator = MockContainerAllocatorWithoutHostAffinity.createContainerAllocatorWithConfigOverride(manager, config, state, containerManager, override);
MockContainerAllocatorWithoutHostAffinity mockAllocator = (MockContainerAllocatorWithoutHostAffinity) containerAllocator;
mockAllocator.setOverrideIsRequestExpired();
allocatorThread = new Thread(containerAllocator);
Map<String, String> containersToHostMapping = new HashMap<String, String>() {
{
put("0", null);
put("1", null);
put("2", null);
put("3", null);
}
};
allocatorThread.start();
mockAllocator.requestResources(containersToHostMapping);
// Wait for at least one expired request call is made, which should happen.
// If the test passes, this should return immediately (within 100 ms). Only when the test fails will it exhaust the
// timeout, which is worth the wait to find out the failure
assertTrue(mockAllocator.awaitIsRequestExpiredCall(TimeUnit.SECONDS.toMillis(10)));
// TODO: we can eliminate the thread sleep if the whole container allocator and test codes are refactored to use
// a Clock which can be simulated and controlled.
Thread.sleep(500);
// Given that we wait for 500 ms above, and a sleep interval of 100 ms, we should roughly see 5 times the
// isRequestExpired is called. We give some extra buffer here (<100). Because if we do run into infinite loop,
// isRequestExpired would be called MILLIONS of times (4~5 million times after a dozen of runs on my machine).
assertTrue(String.format("Too many call count: %d. Seems to be in infinite loop", mockAllocator.getExpiredRequestCallCount()), mockAllocator.getExpiredRequestCallCount() < 100);
}
use of org.apache.samza.container.LocalityManager in project samza by apache.
the class TestContainerProcessManager method buildContainerProcessManager.
private ContainerProcessManager buildContainerProcessManager(ClusterManagerConfig clusterManagerConfig, SamzaApplicationState state, ClusterResourceManager clusterResourceManager, Optional<ContainerAllocator> allocator, boolean restartContainer) {
LocalityManager mockLocalityManager = mock(LocalityManager.class);
FaultDomainManager faultDomainManager = mock(FaultDomainManager.class);
when(mockLocalityManager.readLocality()).thenReturn(new LocalityModel(new HashMap<>()));
return buildContainerProcessManager(clusterManagerConfig, state, clusterResourceManager, allocator, mockLocalityManager, restartContainer, faultDomainManager);
}
use of org.apache.samza.container.LocalityManager in project samza by apache.
the class TestContainerProcessManager method testAllBufferedResourcesAreUtilized.
@Test
public void testAllBufferedResourcesAreUtilized() throws Exception {
Map<String, String> config = new HashMap<>();
config.putAll(getConfigWithHostAffinity());
config.put("job.container.count", "2");
config.put("cluster-manager.container.retry.count", "2");
config.put("cluster-manager.container.request.timeout.ms", "10000");
Config cfg = new MapConfig(config);
// 1. Request two containers on hosts - host1 and host2
SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(2));
MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
MockClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
FaultDomainManager faultDomainManager = mock(FaultDomainManager.class);
LocalityManager mockLocalityManager = mock(LocalityManager.class);
when(mockLocalityManager.readLocality()).thenReturn(new LocalityModel(ImmutableMap.of("0", new ProcessorLocality("0", "host1"), "1", new ProcessorLocality("1", "host2"))));
ContainerManager containerManager = buildContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, Boolean.parseBoolean(config.get(ClusterManagerConfig.HOST_AFFINITY_ENABLED)), false, mockLocalityManager, faultDomainManager);
MockContainerAllocatorWithHostAffinity allocator = new MockContainerAllocatorWithHostAffinity(clusterResourceManager, cfg, state, containerManager);
ContainerProcessManager cpm = spy(buildContainerProcessManager(new ClusterManagerConfig(cfg), state, clusterResourceManager, Optional.of(allocator), mockLocalityManager, false, faultDomainManager));
cpm.start();
assertFalse(cpm.shouldShutdown());
// 2. When the task manager starts, there should have been a pending request on host1 and host2
assertEquals(2, allocator.getContainerRequestState().numPendingRequests());
// 3. Allocate an extra resource on host1 and no resource on host2 yet.
SamzaResource resource1 = new SamzaResource(1, 1000, "host1", "id1");
SamzaResource resource2 = new SamzaResource(1, 1000, "host1", "id2");
cpm.onResourceAllocated(resource1);
cpm.onResourceAllocated(resource2);
// 4. Wait for the container to start on host1 and immediately fail
if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
cpm.onStreamProcessorLaunchSuccess(resource1);
assertEquals("host2", allocator.getContainerRequestState().peekPendingRequest().getPreferredHost());
assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
cpm.onResourceCompleted(new SamzaResourceStatus(resource1.getContainerId(), "App Error", 1));
verify(cpm).onResourceCompletedWithUnknownStatus(any(SamzaResourceStatus.class), anyString(), anyString(), anyInt());
assertEquals(2, allocator.getContainerRequestState().numPendingRequests());
assertFalse(cpm.shouldShutdown());
assertFalse(state.jobHealthy.get());
assertEquals(3, clusterResourceManager.resourceRequests.size());
assertEquals(0, clusterResourceManager.releasedResources.size());
// 5. Do not allocate any further resource on host1, and verify that the re-run of the container on host1 uses the
// previously allocated extra resource
SamzaResource resource3 = new SamzaResource(1, 1000, "host2", "id3");
cpm.onResourceAllocated(resource3);
if (!allocator.awaitContainersStart(2, 2, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
cpm.onStreamProcessorLaunchSuccess(resource2);
cpm.onStreamProcessorLaunchSuccess(resource3);
assertTrue(state.jobHealthy.get());
cpm.stop();
}
use of org.apache.samza.container.LocalityManager in project samza by apache.
the class TestContainerProcessManager method testContainerRequestedRetriesNotExceedingWindowOnFailureWithUnknownCode.
private void testContainerRequestedRetriesNotExceedingWindowOnFailureWithUnknownCode(boolean withHostAffinity, boolean failAfterRetries) throws Exception {
int maxRetries = 3;
String processorId = "0";
ClusterManagerConfig clusterManagerConfig = new ClusterManagerConfig(getConfigWithHostAffinityAndRetries(withHostAffinity, maxRetries, failAfterRetries));
SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(1));
MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
MockClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
FaultDomainManager faultDomainManager = mock(FaultDomainManager.class);
LocalityManager mockLocalityManager = mock(LocalityManager.class);
if (withHostAffinity) {
when(mockLocalityManager.readLocality()).thenReturn(new LocalityModel(ImmutableMap.of("0", new ProcessorLocality("0", "host1"))));
} else {
when(mockLocalityManager.readLocality()).thenReturn(new LocalityModel(new HashMap<>()));
}
ContainerManager containerManager = buildContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, clusterManagerConfig.getHostAffinityEnabled(), false, mockLocalityManager, faultDomainManager);
MockContainerAllocatorWithoutHostAffinity allocator = new MockContainerAllocatorWithoutHostAffinity(clusterResourceManager, clusterManagerConfig, state, containerManager);
ContainerProcessManager cpm = buildContainerProcessManager(clusterManagerConfig, state, clusterResourceManager, Optional.of(allocator), mockLocalityManager, false, faultDomainManager);
// start triggers a request
cpm.start();
assertFalse(cpm.shouldShutdown());
assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
assertEquals(0, allocator.getContainerRequestState().numDelayedRequests());
SamzaResource container = new SamzaResource(1, 1024, "host1", "id0");
cpm.onResourceAllocated(container);
// Allow container to run and update state
if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
cpm.onStreamProcessorLaunchSuccess(container);
// Mock 2nd failure not exceeding retry window.
cpm.getProcessorFailures().put(processorId, new ProcessorFailure(1, Instant.now(), Duration.ZERO));
cpm.onResourceCompleted(new SamzaResourceStatus(container.getContainerId(), "diagnostics", 1));
assertEquals(false, cpm.getJobFailureCriteriaMet());
assertEquals(2, cpm.getProcessorFailures().get(processorId).getCount());
assertFalse(cpm.shouldShutdown());
assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
assertEquals(0, allocator.getContainerRequestState().numDelayedRequests());
cpm.onResourceAllocated(container);
// Allow container to run and update state
if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
cpm.onStreamProcessorLaunchSuccess(container);
// Mock 3rd failure not exceeding retry window.
cpm.getProcessorFailures().put(processorId, new ProcessorFailure(2, Instant.now(), Duration.ZERO));
cpm.onResourceCompleted(new SamzaResourceStatus(container.getContainerId(), "diagnostics", 1));
assertEquals(false, cpm.getJobFailureCriteriaMet());
assertEquals(3, cpm.getProcessorFailures().get(processorId).getCount());
assertFalse(cpm.shouldShutdown());
if (withHostAffinity) {
assertEquals(0, allocator.getContainerRequestState().numPendingRequests());
assertEquals(1, allocator.getContainerRequestState().numDelayedRequests());
} else {
assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
assertEquals(0, allocator.getContainerRequestState().numDelayedRequests());
}
cpm.onResourceAllocated(container);
if (withHostAffinity) {
if (allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
// No delayed retry requests for there host affinity is disabled. Call back should return immediately.
fail("Expecting a delayed request so allocator callback should have timed out waiting for a response.");
}
// For the sake of testing the mocked 4th failure below, send delayed requests now.
SamzaResourceRequest request = allocator.getContainerRequestState().getDelayedRequestsQueue().poll();
SamzaResourceRequest fastForwardRequest = new SamzaResourceRequest(request.getNumCores(), request.getMemoryMB(), request.getPreferredHost(), request.getProcessorId(), Instant.now().minusSeconds(1));
allocator.getContainerRequestState().getDelayedRequestsQueue().add(fastForwardRequest);
int numSent = allocator.getContainerRequestState().sendPendingDelayedResourceRequests();
assertEquals(1, numSent);
cpm.onResourceAllocated(container);
}
if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
// No delayed retry requests for there host affinity is disabled. Call back should return immediately.
fail("Timed out waiting for the containers to start");
}
cpm.onStreamProcessorLaunchSuccess(container);
// Mock 4th failure not exceeding retry window.
cpm.getProcessorFailures().put(processorId, new ProcessorFailure(3, Instant.now(), Duration.ZERO));
cpm.onResourceCompleted(new SamzaResourceStatus(container.getContainerId(), "diagnostics", 1));
// expecting failed container
assertEquals(failAfterRetries, cpm.getJobFailureCriteriaMet());
// count won't update on failure
assertEquals(3, cpm.getProcessorFailures().get(processorId).getCount());
if (failAfterRetries) {
assertTrue(cpm.shouldShutdown());
} else {
assertFalse(cpm.shouldShutdown());
}
assertEquals(0, allocator.getContainerRequestState().numPendingRequests());
assertEquals(0, allocator.getContainerRequestState().numDelayedRequests());
cpm.stop();
}
use of org.apache.samza.container.LocalityManager in project samza by apache.
the class SamzaTaskProxy method readTasksFromCoordinatorStream.
/**
* Builds list of {@link Task} from job model in coordinator stream.
* @param consumer system consumer associated with a job's coordinator stream.
* @return list of {@link Task} constructed from job model in coordinator stream.
*/
protected List<Task> readTasksFromCoordinatorStream(CoordinatorStreamSystemConsumer consumer) {
CoordinatorStreamStore coordinatorStreamStore = new CoordinatorStreamStore(consumer.getConfig(), new MetricsRegistryMap());
LocalityManager localityManager = new LocalityManager(coordinatorStreamStore);
Map<String, ProcessorLocality> containerLocalities = localityManager.readLocality().getProcessorLocalities();
TaskAssignmentManager taskAssignmentManager = new TaskAssignmentManager(new NamespaceAwareCoordinatorStreamStore(coordinatorStreamStore, SetTaskContainerMapping.TYPE), new NamespaceAwareCoordinatorStreamStore(coordinatorStreamStore, SetTaskModeMapping.TYPE));
Map<String, String> taskNameToContainerIdMapping = taskAssignmentManager.readTaskAssignment();
StorageConfig storageConfig = new StorageConfig(consumer.getConfig());
List<String> storeNames = storageConfig.getStoreNames();
return taskNameToContainerIdMapping.entrySet().stream().map(entry -> {
String hostName = Optional.ofNullable(containerLocalities.get(entry.getValue())).map(ProcessorLocality::host).orElse(null);
return new Task(hostName, entry.getKey(), entry.getValue(), new ArrayList<>(), storeNames);
}).collect(Collectors.toList());
}
Aggregations