Search in sources :

Example 6 with ContainerPlacementMetadata

use of org.apache.samza.clustermanager.container.placement.ContainerPlacementMetadata in project samza by apache.

the class TestContainerPlacementActions method testActiveContainerLaunchFailureOnControlActionShouldFallbackToSourceHost.

@Test(timeout = 10000)
public void testActiveContainerLaunchFailureOnControlActionShouldFallbackToSourceHost() throws Exception {
    doAnswer(new Answer<Void>() {

        public Void answer(InvocationOnMock invocation) {
            new Thread(() -> {
                Object[] args = invocation.getArguments();
                cpm.onResourcesAvailable((List<SamzaResource>) args[0]);
            }, "AMRMClientAsync").start();
            return null;
        }
    }).when(callback).onResourcesAvailable(anyList());
    // Mimic stream processor launch failure only on host-3
    doAnswer(new Answer<Void>() {

        public Void answer(InvocationOnMock invocation) {
            new Thread(() -> {
                Object[] args = invocation.getArguments();
                SamzaResource host3Resource = (SamzaResource) args[0];
                if (host3Resource.getHost().equals("host-3")) {
                    cpm.onStreamProcessorLaunchFailure(host3Resource, new Throwable("Custom Exception for Host-3"));
                } else {
                    cpm.onStreamProcessorLaunchSuccess((SamzaResource) args[0]);
                }
            }, "AMRMClientAsync").start();
            return null;
        }
    }).when(callback).onStreamProcessorLaunchSuccess(any());
    doAnswer(new Answer<Void>() {

        public Void answer(InvocationOnMock invocation) {
            new Thread(() -> {
                Object[] args = invocation.getArguments();
                cpm.onResourcesCompleted((List<SamzaResourceStatus>) args[0]);
            }, "AMRMClientAsync").start();
            return null;
        }
    }).when(callback).onResourcesCompleted(anyList());
    cpm.start();
    if (!allocatorWithHostAffinity.awaitContainersStart(2, 5, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    while (state.runningProcessors.size() != 2) {
        Thread.sleep(100);
    }
    // App is in running state with two containers running
    assertEquals(state.runningProcessors.size(), 2);
    assertEquals(state.runningProcessors.get("0").getHost(), "host-1");
    assertEquals(state.runningProcessors.get("1").getHost(), "host-2");
    assertEquals(state.preferredHostRequests.get(), 2);
    assertEquals(state.anyHostRequests.get(), 0);
    // Take a container placement action to move a container with container id 0
    ContainerPlacementRequestMessage requestMessage = new ContainerPlacementRequestMessage(UUID.randomUUID(), "app-attempt-001", "0", "host-3", System.currentTimeMillis());
    ContainerPlacementMetadata metadata = containerManager.registerContainerPlacementActionForTest(requestMessage, allocatorWithHostAffinity);
    // Wait for the ControlAction to complete
    if (!allocatorWithHostAffinity.awaitContainersStart(1, 3, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    while (state.runningProcessors.size() != 2) {
        Thread.sleep(100);
    }
    assertEquals(state.preferredHostRequests.get(), 4);
    assertEquals(state.runningProcessors.size(), 2);
    // Container 0 should fallback to source host
    assertEquals(state.runningProcessors.get("0").getHost(), "host-1");
    assertEquals(state.runningProcessors.get("1").getHost(), "host-2");
    assertEquals(state.anyHostRequests.get(), 0);
    // Control Action should be failed in this case
    assertEquals(metadata.getActionStatus(), ContainerPlacementMessage.StatusCode.FAILED);
    Optional<ContainerPlacementResponseMessage> responseMessage = containerPlacementMetadataStore.readContainerPlacementResponseMessage(requestMessage.getUuid());
    assertTrue(responseMessage.isPresent());
    assertEquals(responseMessage.get().getStatusCode(), ContainerPlacementMessage.StatusCode.FAILED);
    assertResponseMessage(responseMessage.get(), requestMessage);
    // Request shall be deleted as soon as it is acted upon
    assertFalse(containerPlacementMetadataStore.readContainerPlacementRequestMessage(requestMessage.getUuid()).isPresent());
}
Also used : ContainerPlacementResponseMessage(org.apache.samza.container.placement.ContainerPlacementResponseMessage) InvocationOnMock(org.mockito.invocation.InvocationOnMock) ContainerPlacementMetadata(org.apache.samza.clustermanager.container.placement.ContainerPlacementMetadata) ContainerPlacementRequestMessage(org.apache.samza.container.placement.ContainerPlacementRequestMessage) Test(org.junit.Test)

Example 7 with ContainerPlacementMetadata

use of org.apache.samza.clustermanager.container.placement.ContainerPlacementMetadata in project samza by apache.

the class TestContainerPlacementActions method testContainerMoveActionExpiredRequestNotAffectRunningContainers.

@Test(timeout = 10000)
public void testContainerMoveActionExpiredRequestNotAffectRunningContainers() throws Exception {
    // Mimic the behavior of Expired request
    doAnswer(new Answer<Void>() {

        public Void answer(InvocationOnMock invocation) {
            new Thread(() -> {
                Object[] args = invocation.getArguments();
                List<SamzaResource> resources = (List<SamzaResource>) args[0];
                if (resources.get(0).getHost().equals("host-1") || resources.get(0).getHost().equals("host-2")) {
                    cpm.onResourcesAvailable((List<SamzaResource>) args[0]);
                }
            }, "AMRMClientAsync").start();
            return null;
        }
    }).when(callback).onResourcesAvailable(anyList());
    doAnswer(new Answer<Void>() {

        public Void answer(InvocationOnMock invocation) {
            new Thread(() -> {
                Object[] args = invocation.getArguments();
                cpm.onStreamProcessorLaunchSuccess((SamzaResource) args[0]);
            }, "AMRMClientAsync").start();
            return null;
        }
    }).when(callback).onStreamProcessorLaunchSuccess(any());
    cpm.start();
    if (!allocatorWithHostAffinity.awaitContainersStart(2, 3, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    while (state.runningProcessors.size() != 2) {
        Thread.sleep(100);
    }
    // App is in running state with two containers running
    assertEquals(state.runningProcessors.size(), 2);
    assertEquals(state.runningProcessors.get("0").getHost(), "host-1");
    assertEquals(state.runningProcessors.get("1").getHost(), "host-2");
    assertEquals(state.preferredHostRequests.get(), 2);
    assertEquals(state.anyHostRequests.get(), 0);
    // Initiate container placement action to move a container with container id 0
    ContainerPlacementRequestMessage requestMessage = new ContainerPlacementRequestMessage(UUID.randomUUID(), "appAttempt-001", "0", "host-3", Duration.ofMillis(10), System.currentTimeMillis());
    ContainerPlacementMetadata metadata = containerManager.registerContainerPlacementActionForTest(requestMessage, allocatorWithHostAffinity);
    Optional<ContainerPlacementResponseMessage> responseMessage = containerPlacementMetadataStore.readContainerPlacementResponseMessage(requestMessage.getUuid());
    // Wait for the placement action to be complete & get written to the underlying metastore
    while (true) {
        if (metadata.getActionStatus() == ContainerPlacementMessage.StatusCode.FAILED && responseMessage.isPresent() && responseMessage.get().getStatusCode() == ContainerPlacementMessage.StatusCode.FAILED) {
            break;
        }
        Thread.sleep(100);
        responseMessage = containerPlacementMetadataStore.readContainerPlacementResponseMessage(requestMessage.getUuid());
    }
    assertEquals(state.preferredHostRequests.get(), 3);
    assertEquals(state.runningProcessors.size(), 2);
    // Container should not be stooped
    assertEquals(state.runningProcessors.get("0").getHost(), "host-1");
    assertEquals(state.runningProcessors.get("1").getHost(), "host-2");
    assertEquals(state.anyHostRequests.get(), 0);
    assertTrue(responseMessage.isPresent());
    assertEquals(responseMessage.get().getStatusCode(), ContainerPlacementMessage.StatusCode.FAILED);
    assertResponseMessage(responseMessage.get(), requestMessage);
    // Request shall be deleted as soon as it is acted upon
    assertFalse(containerPlacementMetadataStore.readContainerPlacementRequestMessage(requestMessage.getUuid()).isPresent());
}
Also used : ContainerPlacementResponseMessage(org.apache.samza.container.placement.ContainerPlacementResponseMessage) InvocationOnMock(org.mockito.invocation.InvocationOnMock) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ContainerPlacementMetadata(org.apache.samza.clustermanager.container.placement.ContainerPlacementMetadata) ContainerPlacementRequestMessage(org.apache.samza.container.placement.ContainerPlacementRequestMessage) Test(org.junit.Test)

Example 8 with ContainerPlacementMetadata

use of org.apache.samza.clustermanager.container.placement.ContainerPlacementMetadata in project samza by apache.

the class TestContainerPlacementActions method testAlwaysMoveToAnyHostForHostAffinityDisabled.

@Test(timeout = 10000)
public void testAlwaysMoveToAnyHostForHostAffinityDisabled() throws Exception {
    Map<String, String> conf = new HashMap<>();
    conf.putAll(getConfigWithHostAffinityAndRetries(false, 1, true));
    SamzaApplicationState state = new SamzaApplicationState(JobModelManagerTestUtil.getJobModelManager(getConfig(), 2, this.server));
    ClusterResourceManager.Callback callback = mock(ClusterResourceManager.Callback.class);
    MockClusterResourceManager clusterResourceManager = new MockClusterResourceManager(callback, state);
    FaultDomainManager faultDomainManager = mock(FaultDomainManager.class);
    ContainerManager containerManager = new ContainerManager(containerPlacementMetadataStore, state, clusterResourceManager, false, false, localityManager, faultDomainManager, config);
    MockContainerAllocatorWithoutHostAffinity allocatorWithoutHostAffinity = new MockContainerAllocatorWithoutHostAffinity(clusterResourceManager, new MapConfig(conf), state, containerManager);
    ContainerProcessManager cpm = new ContainerProcessManager(new ClusterManagerConfig(new MapConfig(getConfig(), getConfigWithHostAffinityAndRetries(false, 1, true))), state, new MetricsRegistryMap(), clusterResourceManager, Optional.of(allocatorWithoutHostAffinity), containerManager, localityManager, false);
    // Mimic Cluster Manager returning any request
    doAnswer(new Answer<Void>() {

        public Void answer(InvocationOnMock invocation) {
            new Thread(() -> {
                Object[] args = invocation.getArguments();
                List<SamzaResource> resources = (List<SamzaResource>) args[0];
                SamzaResource preferredResource = resources.get(0);
                SamzaResource anyResource = new SamzaResource(preferredResource.getNumCores(), preferredResource.getMemoryMb(), "host-" + RandomStringUtils.randomAlphanumeric(5), preferredResource.getContainerId());
                cpm.onResourcesAvailable(ImmutableList.of(anyResource));
            }, "AMRMClientAsync").start();
            return null;
        }
    }).when(callback).onResourcesAvailable(anyList());
    doAnswer(new Answer<Void>() {

        public Void answer(InvocationOnMock invocation) {
            new Thread(() -> {
                Object[] args = invocation.getArguments();
                cpm.onStreamProcessorLaunchSuccess((SamzaResource) args[0]);
            }, "AMRMClientAsync").start();
            return null;
        }
    }).when(callback).onStreamProcessorLaunchSuccess(any());
    doAnswer(new Answer<Void>() {

        public Void answer(InvocationOnMock invocation) {
            new Thread(() -> {
                Object[] args = invocation.getArguments();
                cpm.onResourcesCompleted((List<SamzaResourceStatus>) args[0]);
            }, "AMRMClientAsync").start();
            return null;
        }
    }).when(callback).onResourcesCompleted(anyList());
    cpm.start();
    // This spawns async start request and waits for async requests to complete
    if (!allocatorWithoutHostAffinity.awaitContainersStart(2, 3, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    while (state.runningProcessors.size() != 2) {
        Thread.sleep(100);
    }
    // App is in running state with two containers running
    assertEquals(state.runningProcessors.size(), 2);
    assertEquals(state.preferredHostRequests.get(), 0);
    assertEquals(state.anyHostRequests.get(), 2);
    String previousHostOfContainer1 = state.runningProcessors.get("0").getHost();
    String previousHostOfContainer2 = state.runningProcessors.get("1").getHost();
    // Initiate container placement action to move a container with container id 0
    ContainerPlacementRequestMessage requestMessage = new ContainerPlacementRequestMessage(UUID.randomUUID(), "app-attempt-001", "0", "host-3", System.currentTimeMillis());
    ContainerPlacementMetadata metadata = containerManager.registerContainerPlacementActionForTest(requestMessage, allocatorWithoutHostAffinity);
    // Wait for the ControlAction to complete and spawn an async request
    if (!allocatorWithoutHostAffinity.awaitContainersStart(1, 3, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    Optional<ContainerPlacementResponseMessage> responseMessage = containerPlacementMetadataStore.readContainerPlacementResponseMessage(requestMessage.getUuid());
    while (true) {
        if (metadata.getActionStatus() == ContainerPlacementMessage.StatusCode.SUCCEEDED && responseMessage.isPresent() && responseMessage.get().getStatusCode() == ContainerPlacementMessage.StatusCode.SUCCEEDED) {
            break;
        }
        Thread.sleep(100);
        responseMessage = containerPlacementMetadataStore.readContainerPlacementResponseMessage(requestMessage.getUuid());
    }
    // We should have no preferred host request
    assertEquals(0, state.preferredHostRequests.get());
    // We should have one more ANY_HOST request
    assertEquals(3, state.anyHostRequests.get());
    assertEquals(2, state.runningProcessors.size());
    assertNotEquals(previousHostOfContainer1, state.runningProcessors.get("0").getHost());
    // Container 2 should not be affected
    assertEquals(previousHostOfContainer2, state.runningProcessors.get("1").getHost());
    assertEquals(3, state.anyHostRequests.get());
    // Action should success
    assertEquals(ContainerPlacementMessage.StatusCode.SUCCEEDED, metadata.getActionStatus());
    assertTrue(responseMessage.isPresent());
    assertEquals(responseMessage.get().getStatusCode(), ContainerPlacementMessage.StatusCode.SUCCEEDED);
    assertResponseMessage(responseMessage.get(), requestMessage);
    /**
     * Inject a duplicate request and check it is not accepted
     */
    ContainerPlacementRequestMessage duplicateRequestToBeIgnored = new ContainerPlacementRequestMessage(requestMessage.getUuid(), "app-attempt-001", "1", "host-3", System.currentTimeMillis());
    // Request with a dup uuid should not be accepted
    metadata = containerManager.registerContainerPlacementActionForTest(duplicateRequestToBeIgnored, allocatorWithoutHostAffinity);
    // metadata should be from the previous completed action
    assertTrue(metadata == null || metadata.getUuid() != duplicateRequestToBeIgnored.getUuid());
    responseMessage = containerPlacementMetadataStore.readContainerPlacementResponseMessage(requestMessage.getUuid());
    assertTrue(responseMessage.isPresent());
    assertEquals(responseMessage.get().getStatusCode(), ContainerPlacementMessage.StatusCode.BAD_REQUEST);
    assertResponseMessage(responseMessage.get(), duplicateRequestToBeIgnored);
    // Request shall be deleted as soon as it is acted upon
    assertFalse(containerPlacementMetadataStore.readContainerPlacementRequestMessage(requestMessage.getUuid()).isPresent());
}
Also used : HashMap(java.util.HashMap) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) MapConfig(org.apache.samza.config.MapConfig) ContainerPlacementMetadata(org.apache.samza.clustermanager.container.placement.ContainerPlacementMetadata) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) ContainerPlacementRequestMessage(org.apache.samza.container.placement.ContainerPlacementRequestMessage) ContainerPlacementResponseMessage(org.apache.samza.container.placement.ContainerPlacementResponseMessage) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Test(org.junit.Test)

Example 9 with ContainerPlacementMetadata

use of org.apache.samza.clustermanager.container.placement.ContainerPlacementMetadata in project samza by apache.

the class TestContainerPlacementActions method testContainerSuccessfulMoveActionWithoutStandby.

@Test(timeout = 10000)
public void testContainerSuccessfulMoveActionWithoutStandby() throws Exception {
    doAnswer(new Answer<Void>() {

        public Void answer(InvocationOnMock invocation) {
            new Thread(() -> {
                Object[] args = invocation.getArguments();
                cpm.onResourcesAvailable((List<SamzaResource>) args[0]);
            }, "AMRMClientAsync").start();
            return null;
        }
    }).when(callback).onResourcesAvailable(anyList());
    doAnswer(new Answer<Void>() {

        public Void answer(InvocationOnMock invocation) {
            new Thread(() -> {
                Object[] args = invocation.getArguments();
                cpm.onStreamProcessorLaunchSuccess((SamzaResource) args[0]);
            }, "AMRMClientAsync").start();
            return null;
        }
    }).when(callback).onStreamProcessorLaunchSuccess(any());
    doAnswer(new Answer<Void>() {

        public Void answer(InvocationOnMock invocation) {
            new Thread(() -> {
                Object[] args = invocation.getArguments();
                cpm.onResourcesCompleted((List<SamzaResourceStatus>) args[0]);
            }, "AMRMClientAsync").start();
            return null;
        }
    }).when(callback).onResourcesCompleted(anyList());
    cpm.start();
    if (!allocatorWithHostAffinity.awaitContainersStart(2, 5, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    while (state.runningProcessors.size() != 2) {
        Thread.sleep(100);
    }
    // App is in running state with two containers running
    assertEquals(state.runningProcessors.size(), 2);
    assertEquals(state.runningProcessors.get("0").getHost(), "host-1");
    assertEquals(state.runningProcessors.get("1").getHost(), "host-2");
    assertEquals(state.preferredHostRequests.get(), 2);
    assertEquals(state.anyHostRequests.get(), 0);
    // Initiate container placement action to move a container with container id 0
    ContainerPlacementRequestMessage requestMessage = new ContainerPlacementRequestMessage(UUID.randomUUID(), "appAttempt-001", "0", "host-3", System.currentTimeMillis());
    ContainerPlacementMetadata metadata = containerManager.registerContainerPlacementActionForTest(requestMessage, allocatorWithHostAffinity);
    // Wait for the ControlAction to complete
    if (!allocatorWithHostAffinity.awaitContainersStart(1, 3, TimeUnit.SECONDS)) {
        fail("timed out waiting for the containers to start");
    }
    Optional<ContainerPlacementResponseMessage> responseMessage = containerPlacementMetadataStore.readContainerPlacementResponseMessage(requestMessage.getUuid());
    // Wait for the placement action to be complete & get written to the underlying metastore
    while (true) {
        if (metadata.getActionStatus() == ContainerPlacementMessage.StatusCode.SUCCEEDED && responseMessage.isPresent() && responseMessage.get().getStatusCode() == ContainerPlacementMessage.StatusCode.SUCCEEDED) {
            break;
        }
        Thread.sleep(100);
        responseMessage = containerPlacementMetadataStore.readContainerPlacementResponseMessage(requestMessage.getUuid());
    }
    assertEquals(state.preferredHostRequests.get(), 3);
    assertEquals(state.runningProcessors.size(), 2);
    assertEquals(state.runningProcessors.get("0").getHost(), "host-3");
    assertEquals(state.runningProcessors.get("1").getHost(), "host-2");
    assertEquals(state.anyHostRequests.get(), 0);
    assertEquals(metadata.getActionStatus(), ContainerPlacementMessage.StatusCode.SUCCEEDED);
    assertTrue(responseMessage.isPresent());
    assertEquals(responseMessage.get().getStatusCode(), ContainerPlacementMessage.StatusCode.SUCCEEDED);
    assertResponseMessage(responseMessage.get(), requestMessage);
}
Also used : InvocationOnMock(org.mockito.invocation.InvocationOnMock) ContainerPlacementMetadata(org.apache.samza.clustermanager.container.placement.ContainerPlacementMetadata) ContainerPlacementResponseMessage(org.apache.samza.container.placement.ContainerPlacementResponseMessage) ContainerPlacementRequestMessage(org.apache.samza.container.placement.ContainerPlacementRequestMessage) Test(org.junit.Test)

Example 10 with ContainerPlacementMetadata

use of org.apache.samza.clustermanager.container.placement.ContainerPlacementMetadata in project samza by apache.

the class ContainerManager method handleContainerStopFail.

/**
 * Handle the container stop failure for active containers and standby (if enabled).
 * @param processorId logical id of the container eg 1,2,3
 * @param containerId last known id of the container deployed
 * @param containerHost host on which container is requested to be deployed
 * @param containerAllocator allocator for requesting resources
 * TODO: SAMZA-2512 Add integ test for handleContainerStopFail
 */
void handleContainerStopFail(String processorId, String containerId, String containerHost, ContainerAllocator containerAllocator) {
    if (processorId != null && hasActiveContainerPlacementAction(processorId)) {
        // Assuming resource acquired on destination host will be relinquished by the containerAllocator,
        // We mark the placement action as failed, and return.
        ContainerPlacementMetadata metaData = getPlacementActionMetadata(processorId).get();
        metaData.setContainerStatus(ContainerPlacementMetadata.ContainerStatus.STOP_FAILED);
    } else if (processorId != null && standbyContainerManager.isPresent()) {
        standbyContainerManager.get().handleContainerStopFail(processorId, containerId, containerAllocator);
    } else {
        LOG.warn("Did not find a running Processor ID for Container ID: {} on host: {}. " + "Ignoring invalid/redundant notification.", containerId, containerHost);
    }
}
Also used : ContainerPlacementMetadata(org.apache.samza.clustermanager.container.placement.ContainerPlacementMetadata)

Aggregations

ContainerPlacementMetadata (org.apache.samza.clustermanager.container.placement.ContainerPlacementMetadata)13 ContainerPlacementRequestMessage (org.apache.samza.container.placement.ContainerPlacementRequestMessage)6 ContainerPlacementResponseMessage (org.apache.samza.container.placement.ContainerPlacementResponseMessage)5 Test (org.junit.Test)5 InvocationOnMock (org.mockito.invocation.InvocationOnMock)5 ImmutableList (com.google.common.collect.ImmutableList)2 List (java.util.List)2 ClusterManagerConfig (org.apache.samza.config.ClusterManagerConfig)2 MapConfig (org.apache.samza.config.MapConfig)2 MetricsRegistryMap (org.apache.samza.metrics.MetricsRegistryMap)2 HashMap (java.util.HashMap)1