use of org.apache.samza.config.ClusterManagerConfig in project samza by apache.
the class TestContainerProcessManager method testContainerRequestedRetriesNotExceedingWindowOnFailureWithUnknownCode.
private void testContainerRequestedRetriesNotExceedingWindowOnFailureWithUnknownCode(boolean withHostAffinity, boolean failAfterRetries) throws Exception {
int maxRetries = 3;
String processorId = "0";
ClusterManagerConfig clusterManagerConfig = new ClusterManagerConfig(getConfigWithHostAffinityAndRetries(withHostAffinity, maxRetries, failAfterRetries));
SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(1));
MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
MockClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
FaultDomainManager faultDomainManager = mock(FaultDomainManager.class);
LocalityManager mockLocalityManager = mock(LocalityManager.class);
if (withHostAffinity) {
when(mockLocalityManager.readLocality()).thenReturn(new LocalityModel(ImmutableMap.of("0", new ProcessorLocality("0", "host1"))));
} else {
when(mockLocalityManager.readLocality()).thenReturn(new LocalityModel(new HashMap<>()));
}
ContainerManager containerManager = buildContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, clusterManagerConfig.getHostAffinityEnabled(), false, mockLocalityManager, faultDomainManager);
MockContainerAllocatorWithoutHostAffinity allocator = new MockContainerAllocatorWithoutHostAffinity(clusterResourceManager, clusterManagerConfig, state, containerManager);
ContainerProcessManager cpm = buildContainerProcessManager(clusterManagerConfig, state, clusterResourceManager, Optional.of(allocator), mockLocalityManager, false, faultDomainManager);
// start triggers a request
cpm.start();
assertFalse(cpm.shouldShutdown());
assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
assertEquals(0, allocator.getContainerRequestState().numDelayedRequests());
SamzaResource container = new SamzaResource(1, 1024, "host1", "id0");
cpm.onResourceAllocated(container);
// Allow container to run and update state
if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
cpm.onStreamProcessorLaunchSuccess(container);
// Mock 2nd failure not exceeding retry window.
cpm.getProcessorFailures().put(processorId, new ProcessorFailure(1, Instant.now(), Duration.ZERO));
cpm.onResourceCompleted(new SamzaResourceStatus(container.getContainerId(), "diagnostics", 1));
assertEquals(false, cpm.getJobFailureCriteriaMet());
assertEquals(2, cpm.getProcessorFailures().get(processorId).getCount());
assertFalse(cpm.shouldShutdown());
assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
assertEquals(0, allocator.getContainerRequestState().numDelayedRequests());
cpm.onResourceAllocated(container);
// Allow container to run and update state
if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
cpm.onStreamProcessorLaunchSuccess(container);
// Mock 3rd failure not exceeding retry window.
cpm.getProcessorFailures().put(processorId, new ProcessorFailure(2, Instant.now(), Duration.ZERO));
cpm.onResourceCompleted(new SamzaResourceStatus(container.getContainerId(), "diagnostics", 1));
assertEquals(false, cpm.getJobFailureCriteriaMet());
assertEquals(3, cpm.getProcessorFailures().get(processorId).getCount());
assertFalse(cpm.shouldShutdown());
if (withHostAffinity) {
assertEquals(0, allocator.getContainerRequestState().numPendingRequests());
assertEquals(1, allocator.getContainerRequestState().numDelayedRequests());
} else {
assertEquals(1, allocator.getContainerRequestState().numPendingRequests());
assertEquals(0, allocator.getContainerRequestState().numDelayedRequests());
}
cpm.onResourceAllocated(container);
if (withHostAffinity) {
if (allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
// No delayed retry requests for there host affinity is disabled. Call back should return immediately.
fail("Expecting a delayed request so allocator callback should have timed out waiting for a response.");
}
// For the sake of testing the mocked 4th failure below, send delayed requests now.
SamzaResourceRequest request = allocator.getContainerRequestState().getDelayedRequestsQueue().poll();
SamzaResourceRequest fastForwardRequest = new SamzaResourceRequest(request.getNumCores(), request.getMemoryMB(), request.getPreferredHost(), request.getProcessorId(), Instant.now().minusSeconds(1));
allocator.getContainerRequestState().getDelayedRequestsQueue().add(fastForwardRequest);
int numSent = allocator.getContainerRequestState().sendPendingDelayedResourceRequests();
assertEquals(1, numSent);
cpm.onResourceAllocated(container);
}
if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
// No delayed retry requests for there host affinity is disabled. Call back should return immediately.
fail("Timed out waiting for the containers to start");
}
cpm.onStreamProcessorLaunchSuccess(container);
// Mock 4th failure not exceeding retry window.
cpm.getProcessorFailures().put(processorId, new ProcessorFailure(3, Instant.now(), Duration.ZERO));
cpm.onResourceCompleted(new SamzaResourceStatus(container.getContainerId(), "diagnostics", 1));
// expecting failed container
assertEquals(failAfterRetries, cpm.getJobFailureCriteriaMet());
// count won't update on failure
assertEquals(3, cpm.getProcessorFailures().get(processorId).getCount());
if (failAfterRetries) {
assertTrue(cpm.shouldShutdown());
} else {
assertFalse(cpm.shouldShutdown());
}
assertEquals(0, allocator.getContainerRequestState().numPendingRequests());
assertEquals(0, allocator.getContainerRequestState().numDelayedRequests());
cpm.stop();
}
use of org.apache.samza.config.ClusterManagerConfig in project samza by apache.
the class TestContainerProcessManager method testOnInitToForceRestartAMHighAvailability.
@Test
public void testOnInitToForceRestartAMHighAvailability() throws Exception {
Map<String, String> configMap = new HashMap<>(configVals);
configMap.put(JobConfig.YARN_AM_HIGH_AVAILABILITY_ENABLED, "true");
Config conf = new MapConfig(configMap);
SamzaResource samzaResource = new SamzaResource(1, 1024, "host", "0");
SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(2));
state.runningProcessors.put("0", samzaResource);
MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
ClusterResourceManager clusterResourceManager = spy(new MockClusterResourceManager(callback, state));
ClusterManagerConfig clusterManagerConfig = spy(new ClusterManagerConfig(conf));
ContainerManager containerManager = buildContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, clusterManagerConfig.getHostAffinityEnabled(), false);
ContainerProcessManager cpm = buildContainerProcessManager(clusterManagerConfig, state, clusterResourceManager, Optional.empty(), true);
MockContainerAllocatorWithoutHostAffinity allocator = new MockContainerAllocatorWithoutHostAffinity(clusterResourceManager, conf, state, containerManager);
getPrivateFieldFromCpm("containerAllocator", cpm).set(cpm, allocator);
CountDownLatch latch = new CountDownLatch(1);
getPrivateFieldFromCpm("allocatorThread", cpm).set(cpm, new Thread() {
public void run() {
isRunning = true;
latch.countDown();
}
});
cpm.start();
if (!latch.await(2, TimeUnit.SECONDS)) {
Assert.fail("timed out waiting for the latch to expire");
}
verify(clusterResourceManager, times(1)).stopStreamProcessor(samzaResource);
assertEquals("CPM should stop the running container", 1, callback.resourceStatuses.size());
SamzaResourceStatus actualResourceStatus = callback.resourceStatuses.get(0);
assertEquals("Container 0 should be stopped", "0", actualResourceStatus.getContainerId());
assertEquals("Container 0 should have exited with preempted status", SamzaResourceStatus.PREEMPTED, actualResourceStatus.getExitCode());
cpm.stop();
}
use of org.apache.samza.config.ClusterManagerConfig in project samza by apache.
the class TestContainerProcessManager method testInvalidNotificationsAreIgnored.
@Test
public void testInvalidNotificationsAreIgnored() throws Exception {
Config conf = getConfig();
SamzaApplicationState state = new SamzaApplicationState(getJobModelManager(1));
MockClusterResourceManagerCallback callback = new MockClusterResourceManagerCallback();
MockClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
ClusterManagerConfig clusterManagerConfig = spy(new ClusterManagerConfig(conf));
ContainerManager containerManager = buildContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, clusterManagerConfig.getHostAffinityEnabled(), false);
MockContainerAllocatorWithoutHostAffinity allocator = new MockContainerAllocatorWithoutHostAffinity(clusterResourceManager, conf, state, containerManager);
ContainerProcessManager cpm = spy(buildContainerProcessManager(clusterManagerConfig, state, clusterResourceManager, Optional.of(allocator)));
// Start the task clusterResourceManager
cpm.start();
SamzaResource container = new SamzaResource(1, 1000, "host1", "id1");
cpm.onResourceAllocated(container);
// Allow container to run and update state
if (!allocator.awaitContainersStart(1, 2, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
// Create container failure - with ContainerExitStatus.DISKS_FAILED
cpm.onResourceCompleted(new SamzaResourceStatus("invalidContainerID", "Disk failure", SamzaResourceStatus.DISK_FAIL));
verify(cpm, never()).onResourceCompletedWithUnknownStatus(any(SamzaResourceStatus.class), anyString(), anyString(), anyInt());
// The above failure should not trigger any container requests, since it is for an invalid container ID
assertEquals(0, allocator.getContainerRequestState().numPendingRequests());
assertFalse(cpm.shouldShutdown());
assertTrue(state.jobHealthy.get());
assertEquals(state.redundantNotifications.get(), 1);
cpm.stop();
}
Aggregations