use of org.apache.samza.config.ClusterManagerConfig in project samza by apache.
the class TestContainerProcessManager method testOnInit.
@Test
public void testOnInit() throws Exception {
Config conf = getConfig();
SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(1));
MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
ClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
ClusterManagerConfig clusterManagerConfig = spy(new ClusterManagerConfig(conf));
ContainerManager containerManager = buildContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, clusterManagerConfig.getHostAffinityEnabled(), false);
ContainerProcessManager cpm = buildContainerProcessManager(clusterManagerConfig, state, clusterResourceManager, Optional.empty());
MockContainerAllocatorWithoutHostAffinity allocator = new MockContainerAllocatorWithoutHostAffinity(clusterResourceManager, conf, state, containerManager);
getPrivateFieldFromCpm("containerAllocator", cpm).set(cpm, allocator);
CountDownLatch latch = new CountDownLatch(1);
getPrivateFieldFromCpm("allocatorThread", cpm).set(cpm, new Thread() {
public void run() {
isRunning = true;
latch.countDown();
}
});
cpm.start();
if (!latch.await(2, TimeUnit.SECONDS)) {
Assert.fail("timed out waiting for the latch to expire");
}
// Verify Allocator thread has started running
assertTrue(isRunning);
// Verify the remaining state
assertEquals(1, state.neededProcessors.get());
assertEquals(1, allocator.requestedContainers);
cpm.stop();
}
use of org.apache.samza.config.ClusterManagerConfig in project samza by apache.
the class TestContainerProcessManager method testContainerProcessManager.
@Test
public void testContainerProcessManager() throws Exception {
Map<String, String> conf = new HashMap<>();
conf.putAll(getConfig());
conf.put("cluster-manager.container.memory.mb", "500");
conf.put("cluster-manager.container.cpu.cores", "5");
SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(1));
MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
MockClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
FaultDomainManager faultDomainManager = mock(FaultDomainManager.class);
LocalityManager mockLocalityManager = mock(LocalityManager.class);
when(mockLocalityManager.readLocality()).thenReturn(new LocalityModel(ImmutableMap.of("0", new ProcessorLocality("0", "host1"))));
ContainerManager containerManager = buildContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, true, false, mockLocalityManager, faultDomainManager);
ContainerProcessManager cpm = buildContainerProcessManager(new ClusterManagerConfig(new MapConfig(conf)), state, clusterResourceManager, Optional.empty());
ContainerAllocator allocator = (ContainerAllocator) getPrivateFieldFromCpm("containerAllocator", cpm).get(cpm);
assertEquals(ContainerAllocator.class, allocator.getClass());
// Asserts that samza exposed container configs is honored by allocator thread
assertEquals(500, allocator.containerMemoryMb);
assertEquals(5, allocator.containerNumCpuCores);
conf.clear();
conf.putAll(getConfigWithHostAffinity());
conf.put("cluster-manager.container.memory.mb", "500");
conf.put("cluster-manager.container.cpu.cores", "5");
state = new SamzaApplicationState(getJobModelManager(1));
callback = new MockClusterResourceManagerCallback();
clusterResourceManager = new MockClusterResourceManager(callback, state);
cpm = new ContainerProcessManager(new ClusterManagerConfig(new MapConfig(conf)), state, new MetricsRegistryMap(), clusterResourceManager, Optional.empty(), containerManager, mockLocalityManager, false);
allocator = (ContainerAllocator) getPrivateFieldFromCpm("containerAllocator", cpm).get(cpm);
assertEquals(ContainerAllocator.class, allocator.getClass());
// Asserts that samza exposed container configs is honored by allocator thread
assertEquals(500, allocator.containerMemoryMb);
assertEquals(5, allocator.containerNumCpuCores);
}
use of org.apache.samza.config.ClusterManagerConfig in project samza by apache.
the class TestContainerProcessManager method testContainerRequestedRetriesExceedingWindowOnFailureWithUnknownCode.
private void testContainerRequestedRetriesExceedingWindowOnFailureWithUnknownCode(boolean withHostAffinity, boolean failAfterRetries) throws Exception {
int maxRetries = 3;
String processorId = "0";
ClusterManagerConfig clusterManagerConfig = new ClusterManagerConfig(getConfigWithHostAffinityAndRetries(withHostAffinity, maxRetries, failAfterRetries));
SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(1));
MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
MockClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
ContainerManager containerManager = buildContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, clusterManagerConfig.getHostAffinityEnabled(), false);
MockContainerAllocatorWithoutHostAffinity allocator = new MockContainerAllocatorWithoutHostAffinity(clusterResourceManager, clusterManagerConfig, state, containerManager);
ContainerProcessManager cpm = buildContainerProcessManager(clusterManagerConfig, state, clusterResourceManager, Optional.of(allocator));
// start triggers a request
cpm.start();
assertFalse(cpm.shouldShutdown());
assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
SamzaResource container = new SamzaResource(1, 1024, "host1", "id0");
cpm.onResourceAllocated(container);
// Allow container to run and update state
if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
cpm.onStreamProcessorLaunchSuccess(container);
// Mock 2nd failure exceeding retry window.
int longWindow = clusterManagerConfig.getContainerRetryWindowMs() + 10;
cpm.getProcessorFailures().put(processorId, new ProcessorFailure(1, Instant.now().minusMillis(longWindow), Duration.ZERO));
assertEquals(1, cpm.getProcessorFailures().get(processorId).getCount());
cpm.onResourceCompleted(new SamzaResourceStatus(container.getContainerId(), "diagnostics", 1));
assertEquals(false, cpm.getJobFailureCriteriaMet());
assertEquals(1, cpm.getProcessorFailures().get(processorId).getCount());
cpm.onResourceAllocated(container);
// Allow container to run and update state
if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
cpm.onStreamProcessorLaunchSuccess(container);
// Mock 3rd failure exceeding retry window.
cpm.getProcessorFailures().put(processorId, new ProcessorFailure(2, Instant.now().minusMillis(longWindow), Duration.ZERO));
cpm.onResourceCompleted(new SamzaResourceStatus(container.getContainerId(), "diagnostics", 1));
assertEquals(false, cpm.getJobFailureCriteriaMet());
assertEquals(1, cpm.getProcessorFailures().get(processorId).getCount());
cpm.onResourceAllocated(container);
// Allow container to run and update state
if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
cpm.onStreamProcessorLaunchSuccess(container);
// Mock 4th failure exceeding retry window.
cpm.getProcessorFailures().put(processorId, new ProcessorFailure(3, Instant.now().minusMillis(longWindow), Duration.ZERO));
cpm.onResourceCompleted(new SamzaResourceStatus(container.getContainerId(), "diagnostics", 1));
assertEquals(false, cpm.getJobFailureCriteriaMet());
assertEquals(1, cpm.getProcessorFailures().get(processorId).getCount());
cpm.stop();
}
use of org.apache.samza.config.ClusterManagerConfig in project samza by apache.
the class TestContainerProcessManager method testRerequestOnAnyHostIfContainerStartFails.
@Test
public void testRerequestOnAnyHostIfContainerStartFails() throws Exception {
SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(1));
Map<String, String> configMap = new HashMap<>();
configMap.putAll(getConfig());
MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
MockClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
FaultDomainManager faultDomainManager = mock(FaultDomainManager.class);
LocalityManager mockLocalityManager = mock(LocalityManager.class);
when(mockLocalityManager.readLocality()).thenReturn(new LocalityModel(ImmutableMap.of("0", new ProcessorLocality("1", "host1"))));
ContainerManager containerManager = buildContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, Boolean.valueOf(config.get(ClusterManagerConfig.HOST_AFFINITY_ENABLED)), false, mockLocalityManager, faultDomainManager);
MockContainerAllocatorWithoutHostAffinity allocator = new MockContainerAllocatorWithoutHostAffinity(clusterResourceManager, new MapConfig(config), state, containerManager);
ContainerProcessManager manager = new ContainerProcessManager(new ClusterManagerConfig(config), state, new MetricsRegistryMap(), clusterResourceManager, Optional.of(allocator), containerManager, mockLocalityManager, false);
manager.start();
SamzaResource resource = new SamzaResource(1, 1024, "host1", "resource-1");
state.pendingProcessors.put("1", resource);
Assert.assertEquals(clusterResourceManager.resourceRequests.size(), 1);
manager.onStreamProcessorLaunchFailure(resource, new Exception("cannot launch container!"));
Assert.assertEquals(clusterResourceManager.resourceRequests.size(), 2);
Assert.assertEquals(clusterResourceManager.resourceRequests.get(1).getHost(), ResourceRequestState.ANY_HOST);
manager.stop();
}
use of org.apache.samza.config.ClusterManagerConfig in project samza by apache.
the class TestContainerProcessManager method testNewContainerRequestedOnFailureWithUnknownCode.
/**
* Test Container Process Manager should request a new container when a task fails with unknown exit code
* When host-affinity is not enabled, it will always request for ANY_HOST
*/
@Test
public void testNewContainerRequestedOnFailureWithUnknownCode() throws Exception {
Config conf = getConfig();
SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(1));
MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
MockClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
ClusterManagerConfig clusterManagerConfig = spy(new ClusterManagerConfig(conf));
ContainerManager containerManager = buildContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, clusterManagerConfig.getHostAffinityEnabled(), false);
MockContainerAllocatorWithoutHostAffinity allocator = new MockContainerAllocatorWithoutHostAffinity(clusterResourceManager, conf, state, containerManager);
ContainerProcessManager cpm = spy(buildContainerProcessManager(clusterManagerConfig, state, clusterResourceManager, Optional.of(allocator)));
// start triggers a request
cpm.start();
verify(clusterManagerConfig, never()).getContainerPreferredHostLastRetryDelayMs();
verify(cpm, never()).onResourceCompletedWithUnknownStatus(any(), anyString(), anyString(), anyInt());
assertFalse(cpm.shouldShutdown());
assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
SamzaResource container = new SamzaResource(1, 1024, "host1", "id0");
cpm.onResourceAllocated(container);
// Allow container to run and update state
if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
cpm.onStreamProcessorLaunchSuccess(container);
// Create first container failure
SamzaResourceStatus samzaResourceStatus = new SamzaResourceStatus(container.getContainerId(), "diagnostics", 1);
cpm.onResourceCompleted(samzaResourceStatus);
// The above failure should trigger a container request
verify(cpm).onResourceCompletedWithUnknownStatus(eq(samzaResourceStatus), eq(container.getContainerId()), eq("0"), eq(1));
verify(clusterManagerConfig, never()).getContainerPreferredHostLastRetryDelayMs();
assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
assertEquals(ResourceRequestState.ANY_HOST, allocator.getContainerRequestState().peekPendingRequest().getPreferredHost());
assertFalse(cpm.shouldShutdown());
assertFalse(state.jobHealthy.get());
assertEquals(2, clusterResourceManager.resourceRequests.size());
assertEquals(0, clusterResourceManager.releasedResources.size());
cpm.onResourceAllocated(container);
// Allow container to run and update state
if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
cpm.onStreamProcessorLaunchSuccess(container);
assertTrue(state.jobHealthy.get());
// Create a second failure
cpm.onResourceCompleted(samzaResourceStatus);
// The above failure should trigger a job shutdown because our retry count is set to 1
verify(cpm, times(2)).onResourceCompletedWithUnknownStatus(eq(samzaResourceStatus), eq(container.getContainerId()), eq("0"), eq(1));
verify(clusterManagerConfig, never()).getContainerPreferredHostLastRetryDelayMs();
assertEquals(0, allocator.getContainerRequestState().numPendingRequests());
assertEquals(2, clusterResourceManager.resourceRequests.size());
assertEquals(0, clusterResourceManager.releasedResources.size());
assertFalse(state.jobHealthy.get());
assertTrue(cpm.shouldShutdown());
assertEquals(SamzaApplicationState.SamzaAppStatus.FAILED, state.status);
cpm.stop();
}
Aggregations