use of org.apache.samza.clustermanager.container.placement.ContainerPlacementMetadata in project samza by apache.
the class TestContainerPlacementActions method testActiveContainerLaunchFailureOnControlActionShouldFallbackToSourceHost.
@Test(timeout = 10000)
public void testActiveContainerLaunchFailureOnControlActionShouldFallbackToSourceHost() throws Exception {
doAnswer(new Answer<Void>() {
public Void answer(InvocationOnMock invocation) {
new Thread(() -> {
Object[] args = invocation.getArguments();
cpm.onResourcesAvailable((List<SamzaResource>) args[0]);
}, "AMRMClientAsync").start();
return null;
}
}).when(callback).onResourcesAvailable(anyList());
// Mimic stream processor launch failure only on host-3
doAnswer(new Answer<Void>() {
public Void answer(InvocationOnMock invocation) {
new Thread(() -> {
Object[] args = invocation.getArguments();
SamzaResource host3Resource = (SamzaResource) args[0];
if (host3Resource.getHost().equals("host-3")) {
cpm.onStreamProcessorLaunchFailure(host3Resource, new Throwable("Custom Exception for Host-3"));
} else {
cpm.onStreamProcessorLaunchSuccess((SamzaResource) args[0]);
}
}, "AMRMClientAsync").start();
return null;
}
}).when(callback).onStreamProcessorLaunchSuccess(any());
doAnswer(new Answer<Void>() {
public Void answer(InvocationOnMock invocation) {
new Thread(() -> {
Object[] args = invocation.getArguments();
cpm.onResourcesCompleted((List<SamzaResourceStatus>) args[0]);
}, "AMRMClientAsync").start();
return null;
}
}).when(callback).onResourcesCompleted(anyList());
cpm.start();
if (!allocatorWithHostAffinity.awaitContainersStart(2, 5, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
while (state.runningProcessors.size() != 2) {
Thread.sleep(100);
}
// App is in running state with two containers running
assertEquals(state.runningProcessors.size(), 2);
assertEquals(state.runningProcessors.get("0").getHost(), "host-1");
assertEquals(state.runningProcessors.get("1").getHost(), "host-2");
assertEquals(state.preferredHostRequests.get(), 2);
assertEquals(state.anyHostRequests.get(), 0);
// Take a container placement action to move a container with container id 0
ContainerPlacementRequestMessage requestMessage = new ContainerPlacementRequestMessage(UUID.randomUUID(), "app-attempt-001", "0", "host-3", System.currentTimeMillis());
ContainerPlacementMetadata metadata = containerManager.registerContainerPlacementActionForTest(requestMessage, allocatorWithHostAffinity);
// Wait for the ControlAction to complete
if (!allocatorWithHostAffinity.awaitContainersStart(1, 3, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
while (state.runningProcessors.size() != 2) {
Thread.sleep(100);
}
assertEquals(state.preferredHostRequests.get(), 4);
assertEquals(state.runningProcessors.size(), 2);
// Container 0 should fallback to source host
assertEquals(state.runningProcessors.get("0").getHost(), "host-1");
assertEquals(state.runningProcessors.get("1").getHost(), "host-2");
assertEquals(state.anyHostRequests.get(), 0);
// Control Action should be failed in this case
assertEquals(metadata.getActionStatus(), ContainerPlacementMessage.StatusCode.FAILED);
Optional<ContainerPlacementResponseMessage> responseMessage = containerPlacementMetadataStore.readContainerPlacementResponseMessage(requestMessage.getUuid());
assertTrue(responseMessage.isPresent());
assertEquals(responseMessage.get().getStatusCode(), ContainerPlacementMessage.StatusCode.FAILED);
assertResponseMessage(responseMessage.get(), requestMessage);
// Request shall be deleted as soon as it is acted upon
assertFalse(containerPlacementMetadataStore.readContainerPlacementRequestMessage(requestMessage.getUuid()).isPresent());
}
use of org.apache.samza.clustermanager.container.placement.ContainerPlacementMetadata in project samza by apache.
the class TestContainerPlacementActions method testContainerMoveActionExpiredRequestNotAffectRunningContainers.
@Test(timeout = 10000)
public void testContainerMoveActionExpiredRequestNotAffectRunningContainers() throws Exception {
// Mimic the behavior of Expired request
doAnswer(new Answer<Void>() {
public Void answer(InvocationOnMock invocation) {
new Thread(() -> {
Object[] args = invocation.getArguments();
List<SamzaResource> resources = (List<SamzaResource>) args[0];
if (resources.get(0).getHost().equals("host-1") || resources.get(0).getHost().equals("host-2")) {
cpm.onResourcesAvailable((List<SamzaResource>) args[0]);
}
}, "AMRMClientAsync").start();
return null;
}
}).when(callback).onResourcesAvailable(anyList());
doAnswer(new Answer<Void>() {
public Void answer(InvocationOnMock invocation) {
new Thread(() -> {
Object[] args = invocation.getArguments();
cpm.onStreamProcessorLaunchSuccess((SamzaResource) args[0]);
}, "AMRMClientAsync").start();
return null;
}
}).when(callback).onStreamProcessorLaunchSuccess(any());
cpm.start();
if (!allocatorWithHostAffinity.awaitContainersStart(2, 3, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
while (state.runningProcessors.size() != 2) {
Thread.sleep(100);
}
// App is in running state with two containers running
assertEquals(state.runningProcessors.size(), 2);
assertEquals(state.runningProcessors.get("0").getHost(), "host-1");
assertEquals(state.runningProcessors.get("1").getHost(), "host-2");
assertEquals(state.preferredHostRequests.get(), 2);
assertEquals(state.anyHostRequests.get(), 0);
// Initiate container placement action to move a container with container id 0
ContainerPlacementRequestMessage requestMessage = new ContainerPlacementRequestMessage(UUID.randomUUID(), "appAttempt-001", "0", "host-3", Duration.ofMillis(10), System.currentTimeMillis());
ContainerPlacementMetadata metadata = containerManager.registerContainerPlacementActionForTest(requestMessage, allocatorWithHostAffinity);
Optional<ContainerPlacementResponseMessage> responseMessage = containerPlacementMetadataStore.readContainerPlacementResponseMessage(requestMessage.getUuid());
// Wait for the placement action to be complete & get written to the underlying metastore
while (true) {
if (metadata.getActionStatus() == ContainerPlacementMessage.StatusCode.FAILED && responseMessage.isPresent() && responseMessage.get().getStatusCode() == ContainerPlacementMessage.StatusCode.FAILED) {
break;
}
Thread.sleep(100);
responseMessage = containerPlacementMetadataStore.readContainerPlacementResponseMessage(requestMessage.getUuid());
}
assertEquals(state.preferredHostRequests.get(), 3);
assertEquals(state.runningProcessors.size(), 2);
// Container should not be stooped
assertEquals(state.runningProcessors.get("0").getHost(), "host-1");
assertEquals(state.runningProcessors.get("1").getHost(), "host-2");
assertEquals(state.anyHostRequests.get(), 0);
assertTrue(responseMessage.isPresent());
assertEquals(responseMessage.get().getStatusCode(), ContainerPlacementMessage.StatusCode.FAILED);
assertResponseMessage(responseMessage.get(), requestMessage);
// Request shall be deleted as soon as it is acted upon
assertFalse(containerPlacementMetadataStore.readContainerPlacementRequestMessage(requestMessage.getUuid()).isPresent());
}
use of org.apache.samza.clustermanager.container.placement.ContainerPlacementMetadata in project samza by apache.
the class TestContainerPlacementActions method testAlwaysMoveToAnyHostForHostAffinityDisabled.
@Test(timeout = 10000)
public void testAlwaysMoveToAnyHostForHostAffinityDisabled() throws Exception {
Map<String, String> conf = new HashMap<>();
conf.putAll(getConfigWithHostAffinityAndRetries(false, 1, true));
SamzaApplicationState state = new SamzaApplicationState(JobModelManagerTestUtil.getJobModelManager(getConfig(), 2, this.server));
ClusterResourceManager.Callback callback = mock(ClusterResourceManager.Callback.class);
MockClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
FaultDomainManager faultDomainManager = mock(FaultDomainManager.class);
ContainerManager containerManager = new ContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, false, false, localityManager, faultDomainManager, config);
MockContainerAllocatorWithoutHostAffinity allocatorWithoutHostAffinity = new MockContainerAllocatorWithoutHostAffinity(clusterResourceManager, new MapConfig(conf), state, containerManager);
ContainerProcessManager cpm = new ContainerProcessManager(new ClusterManagerConfig(new MapConfig(getConfig(), getConfigWithHostAffinityAndRetries(false, 1, true))), state, new MetricsRegistryMap(), clusterResourceManager, Optional.of(allocatorWithoutHostAffinity), containerManager, localityManager, false);
// Mimic Cluster Manager returning any request
doAnswer(new Answer<Void>() {
public Void answer(InvocationOnMock invocation) {
new Thread(() -> {
Object[] args = invocation.getArguments();
List<SamzaResource> resources = (List<SamzaResource>) args[0];
SamzaResource preferredResource = resources.get(0);
SamzaResource anyResource = new SamzaResource(preferredResource.getNumCores(), preferredResource.getMemoryMb(), "host-" + RandomStringUtils.randomAlphanumeric(5), preferredResource.getContainerId());
cpm.onResourcesAvailable(ImmutableList.of(anyResource));
}, "AMRMClientAsync").start();
return null;
}
}).when(callback).onResourcesAvailable(anyList());
doAnswer(new Answer<Void>() {
public Void answer(InvocationOnMock invocation) {
new Thread(() -> {
Object[] args = invocation.getArguments();
cpm.onStreamProcessorLaunchSuccess((SamzaResource) args[0]);
}, "AMRMClientAsync").start();
return null;
}
}).when(callback).onStreamProcessorLaunchSuccess(any());
doAnswer(new Answer<Void>() {
public Void answer(InvocationOnMock invocation) {
new Thread(() -> {
Object[] args = invocation.getArguments();
cpm.onResourcesCompleted((List<SamzaResourceStatus>) args[0]);
}, "AMRMClientAsync").start();
return null;
}
}).when(callback).onResourcesCompleted(anyList());
cpm.start();
// This spawns async start request and waits for async requests to complete
if (!allocatorWithoutHostAffinity.awaitContainersStart(2, 3, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
while (state.runningProcessors.size() != 2) {
Thread.sleep(100);
}
// App is in running state with two containers running
assertEquals(state.runningProcessors.size(), 2);
assertEquals(state.preferredHostRequests.get(), 0);
assertEquals(state.anyHostRequests.get(), 2);
String previousHostOfContainer1 = state.runningProcessors.get("0").getHost();
String previousHostOfContainer2 = state.runningProcessors.get("1").getHost();
// Initiate container placement action to move a container with container id 0
ContainerPlacementRequestMessage requestMessage = new ContainerPlacementRequestMessage(UUID.randomUUID(), "app-attempt-001", "0", "host-3", System.currentTimeMillis());
ContainerPlacementMetadata metadata = containerManager.registerContainerPlacementActionForTest(requestMessage, allocatorWithoutHostAffinity);
// Wait for the ControlAction to complete and spawn an async request
if (!allocatorWithoutHostAffinity.awaitContainersStart(1, 3, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
Optional<ContainerPlacementResponseMessage> responseMessage = containerPlacementMetadataStore.readContainerPlacementResponseMessage(requestMessage.getUuid());
while (true) {
if (metadata.getActionStatus() == ContainerPlacementMessage.StatusCode.SUCCEEDED && responseMessage.isPresent() && responseMessage.get().getStatusCode() == ContainerPlacementMessage.StatusCode.SUCCEEDED) {
break;
}
Thread.sleep(100);
responseMessage = containerPlacementMetadataStore.readContainerPlacementResponseMessage(requestMessage.getUuid());
}
// We should have no preferred host request
assertEquals(0, state.preferredHostRequests.get());
// We should have one more ANY_HOST request
assertEquals(3, state.anyHostRequests.get());
assertEquals(2, state.runningProcessors.size());
assertNotEquals(previousHostOfContainer1, state.runningProcessors.get("0").getHost());
// Container 2 should not be affected
assertEquals(previousHostOfContainer2, state.runningProcessors.get("1").getHost());
assertEquals(3, state.anyHostRequests.get());
// Action should success
assertEquals(ContainerPlacementMessage.StatusCode.SUCCEEDED, metadata.getActionStatus());
assertTrue(responseMessage.isPresent());
assertEquals(responseMessage.get().getStatusCode(), ContainerPlacementMessage.StatusCode.SUCCEEDED);
assertResponseMessage(responseMessage.get(), requestMessage);
/**
* Inject a duplicate request and check it is not accepted
*/
ContainerPlacementRequestMessage duplicateRequestToBeIgnored = new ContainerPlacementRequestMessage(requestMessage.getUuid(), "app-attempt-001", "1", "host-3", System.currentTimeMillis());
// Request with a dup uuid should not be accepted
metadata = containerManager.registerContainerPlacementActionForTest(duplicateRequestToBeIgnored, allocatorWithoutHostAffinity);
// metadata should be from the previous completed action
assertTrue(metadata == null || metadata.getUuid() != duplicateRequestToBeIgnored.getUuid());
responseMessage = containerPlacementMetadataStore.readContainerPlacementResponseMessage(requestMessage.getUuid());
assertTrue(responseMessage.isPresent());
assertEquals(responseMessage.get().getStatusCode(), ContainerPlacementMessage.StatusCode.BAD_REQUEST);
assertResponseMessage(responseMessage.get(), duplicateRequestToBeIgnored);
// Request shall be deleted as soon as it is acted upon
assertFalse(containerPlacementMetadataStore.readContainerPlacementRequestMessage(requestMessage.getUuid()).isPresent());
}
use of org.apache.samza.clustermanager.container.placement.ContainerPlacementMetadata in project samza by apache.
the class TestContainerPlacementActions method testContainerSuccessfulMoveActionWithoutStandby.
@Test(timeout = 10000)
public void testContainerSuccessfulMoveActionWithoutStandby() throws Exception {
doAnswer(new Answer<Void>() {
public Void answer(InvocationOnMock invocation) {
new Thread(() -> {
Object[] args = invocation.getArguments();
cpm.onResourcesAvailable((List<SamzaResource>) args[0]);
}, "AMRMClientAsync").start();
return null;
}
}).when(callback).onResourcesAvailable(anyList());
doAnswer(new Answer<Void>() {
public Void answer(InvocationOnMock invocation) {
new Thread(() -> {
Object[] args = invocation.getArguments();
cpm.onStreamProcessorLaunchSuccess((SamzaResource) args[0]);
}, "AMRMClientAsync").start();
return null;
}
}).when(callback).onStreamProcessorLaunchSuccess(any());
doAnswer(new Answer<Void>() {
public Void answer(InvocationOnMock invocation) {
new Thread(() -> {
Object[] args = invocation.getArguments();
cpm.onResourcesCompleted((List<SamzaResourceStatus>) args[0]);
}, "AMRMClientAsync").start();
return null;
}
}).when(callback).onResourcesCompleted(anyList());
cpm.start();
if (!allocatorWithHostAffinity.awaitContainersStart(2, 5, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
while (state.runningProcessors.size() != 2) {
Thread.sleep(100);
}
// App is in running state with two containers running
assertEquals(state.runningProcessors.size(), 2);
assertEquals(state.runningProcessors.get("0").getHost(), "host-1");
assertEquals(state.runningProcessors.get("1").getHost(), "host-2");
assertEquals(state.preferredHostRequests.get(), 2);
assertEquals(state.anyHostRequests.get(), 0);
// Initiate container placement action to move a container with container id 0
ContainerPlacementRequestMessage requestMessage = new ContainerPlacementRequestMessage(UUID.randomUUID(), "appAttempt-001", "0", "host-3", System.currentTimeMillis());
ContainerPlacementMetadata metadata = containerManager.registerContainerPlacementActionForTest(requestMessage, allocatorWithHostAffinity);
// Wait for the ControlAction to complete
if (!allocatorWithHostAffinity.awaitContainersStart(1, 3, TimeUnit.SECONDS)) {
fail("timed out waiting for the containers to start");
}
Optional<ContainerPlacementResponseMessage> responseMessage = containerPlacementMetadataStore.readContainerPlacementResponseMessage(requestMessage.getUuid());
// Wait for the placement action to be complete & get written to the underlying metastore
while (true) {
if (metadata.getActionStatus() == ContainerPlacementMessage.StatusCode.SUCCEEDED && responseMessage.isPresent() && responseMessage.get().getStatusCode() == ContainerPlacementMessage.StatusCode.SUCCEEDED) {
break;
}
Thread.sleep(100);
responseMessage = containerPlacementMetadataStore.readContainerPlacementResponseMessage(requestMessage.getUuid());
}
assertEquals(state.preferredHostRequests.get(), 3);
assertEquals(state.runningProcessors.size(), 2);
assertEquals(state.runningProcessors.get("0").getHost(), "host-3");
assertEquals(state.runningProcessors.get("1").getHost(), "host-2");
assertEquals(state.anyHostRequests.get(), 0);
assertEquals(metadata.getActionStatus(), ContainerPlacementMessage.StatusCode.SUCCEEDED);
assertTrue(responseMessage.isPresent());
assertEquals(responseMessage.get().getStatusCode(), ContainerPlacementMessage.StatusCode.SUCCEEDED);
assertResponseMessage(responseMessage.get(), requestMessage);
}
use of org.apache.samza.clustermanager.container.placement.ContainerPlacementMetadata in project samza by apache.
the class ContainerManager method handleContainerStopFail.
/**
* Handle the container stop failure for active containers and standby (if enabled).
* @param processorId logical id of the container eg 1,2,3
* @param containerId last known id of the container deployed
* @param containerHost host on which container is requested to be deployed
* @param containerAllocator allocator for requesting resources
* TODO: SAMZA-2512 Add integ test for handleContainerStopFail
*/
void handleContainerStopFail(String processorId, String containerId, String containerHost, ContainerAllocator containerAllocator) {
if (processorId != null && hasActiveContainerPlacementAction(processorId)) {
// Assuming resource acquired on destination host will be relinquished by the containerAllocator,
// We mark the placement action as failed, and return.
ContainerPlacementMetadata metaData = getPlacementActionMetadata(processorId).get();
metaData.setContainerStatus(ContainerPlacementMetadata.ContainerStatus.STOP_FAILED);
} else if (processorId != null && standbyContainerManager.isPresent()) {
standbyContainerManager.get().handleContainerStopFail(processorId, containerId, containerAllocator);
} else {
LOG.warn("Did not find a running Processor ID for Container ID: {} on host: {}. " + "Ignoring invalid/redundant notification.", containerId, containerHost);
}
}
Aggregations