Search in sources :

Example 11 with TaskExecutorProcessSpec

use of org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec in project flink by apache.

the class ActiveResourceManagerTest method testWorkerTerminatedNoLongerRequired.

/**
 * Tests worker terminated and is no longer required.
 */
@Test
public void testWorkerTerminatedNoLongerRequired() throws Exception {
    new Context() {

        {
            final ResourceID tmResourceId = ResourceID.generate();
            final AtomicInteger requestCount = new AtomicInteger(0);
            final List<CompletableFuture<TaskExecutorProcessSpec>> requestWorkerFromDriverFutures = new ArrayList<>();
            requestWorkerFromDriverFutures.add(new CompletableFuture<>());
            requestWorkerFromDriverFutures.add(new CompletableFuture<>());
            driverBuilder.setRequestResourceFunction(taskExecutorProcessSpec -> {
                int idx = requestCount.getAndIncrement();
                assertThat(idx, lessThan(2));
                requestWorkerFromDriverFutures.get(idx).complete(taskExecutorProcessSpec);
                return CompletableFuture.completedFuture(tmResourceId);
            });
            runTest(() -> {
                // received worker request, verify requesting from driver
                CompletableFuture<Boolean> startNewWorkerFuture = runInMainThread(() -> getResourceManager().startNewWorker(WORKER_RESOURCE_SPEC));
                TaskExecutorProcessSpec taskExecutorProcessSpec = requestWorkerFromDriverFutures.get(0).get(TIMEOUT_SEC, TimeUnit.SECONDS);
                assertThat(startNewWorkerFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), is(true));
                assertThat(taskExecutorProcessSpec, is(TaskExecutorProcessUtils.processSpecFromWorkerResourceSpec(flinkConfig, WORKER_RESOURCE_SPEC)));
                // worker registered, verify registration succeed
                CompletableFuture<RegistrationResponse> registerTaskExecutorFuture = registerTaskExecutor(tmResourceId);
                assertThat(registerTaskExecutorFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), instanceOf(RegistrationResponse.Success.class));
                // worker terminated, verify not requesting new worker
                runInMainThread(() -> {
                    getResourceManager().onWorkerTerminated(tmResourceId, "terminate for testing");
                    // finishes before the assertions
                    return null;
                }).get(TIMEOUT_SEC, TimeUnit.SECONDS);
                assertFalse(requestWorkerFromDriverFutures.get(1).isDone());
            });
        }
    };
}
Also used : TaskExecutorProcessSpec(org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec) ArrayList(java.util.ArrayList) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) Test(org.junit.Test)

Example 12 with TaskExecutorProcessSpec

use of org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec in project flink by apache.

the class ActiveResourceManagerTest method testWorkerRegistrationTimeout.

@Test
public void testWorkerRegistrationTimeout() throws Exception {
    new Context() {

        {
            final ResourceID tmResourceId = ResourceID.generate();
            final CompletableFuture<ResourceID> releaseResourceFuture = new CompletableFuture<>();
            flinkConfig.set(ResourceManagerOptions.TASK_MANAGER_REGISTRATION_TIMEOUT, Duration.ofMillis(TESTING_START_WORKER_TIMEOUT_MS));
            driverBuilder.setRequestResourceFunction(taskExecutorProcessSpec -> CompletableFuture.completedFuture(tmResourceId)).setReleaseResourceConsumer(releaseResourceFuture::complete);
            runTest(() -> {
                // request new worker
                runInMainThread(() -> getResourceManager().startNewWorker(WORKER_RESOURCE_SPEC));
                // verify worker is released due to not registered in time
                assertThat(releaseResourceFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), is(tmResourceId));
            });
        }
    };
}
Also used : TaskExecutorRegistration(org.apache.flink.runtime.resourcemanager.TaskExecutorRegistration) TaskExecutorProcessSpec(org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) WorkerResourceSpec(org.apache.flink.runtime.resourcemanager.WorkerResourceSpec) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) ResourceManagerOptions(org.apache.flink.configuration.ResourceManagerOptions) TaskExecutorMemoryConfiguration(org.apache.flink.runtime.taskexecutor.TaskExecutorMemoryConfiguration) Callable(java.util.concurrent.Callable) CompletableFuture(java.util.concurrent.CompletableFuture) RunnableWithException(org.apache.flink.util.function.RunnableWithException) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) ArrayList(java.util.ArrayList) Assert.assertThat(org.junit.Assert.assertThat) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Duration(java.time.Duration) ClusterInformation(org.apache.flink.runtime.entrypoint.ClusterInformation) NoOpResourceManagerPartitionTracker(org.apache.flink.runtime.io.network.partition.NoOpResourceManagerPartitionTracker) TestLogger(org.apache.flink.util.TestLogger) Matchers.lessThan(org.hamcrest.Matchers.lessThan) SlotManager(org.apache.flink.runtime.resourcemanager.slotmanager.SlotManager) Assert.fail(org.junit.Assert.fail) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) ClassRule(org.junit.ClassRule) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) HardwareDescription(org.apache.flink.runtime.instance.HardwareDescription) Configuration(org.apache.flink.configuration.Configuration) Test(org.junit.Test) UUID(java.util.UUID) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) TimeUnit(java.util.concurrent.TimeUnit) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) List(java.util.List) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) TaskExecutorProcessUtils(org.apache.flink.runtime.clusterframework.TaskExecutorProcessUtils) ForkJoinPool(java.util.concurrent.ForkJoinPool) Assert.assertFalse(org.junit.Assert.assertFalse) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) TestingRpcServiceResource(org.apache.flink.runtime.rpc.TestingRpcServiceResource) TestingSlotManagerBuilder(org.apache.flink.runtime.resourcemanager.slotmanager.TestingSlotManagerBuilder) Matchers.is(org.hamcrest.Matchers.is) Assume.assumeTrue(org.junit.Assume.assumeTrue) Collections(java.util.Collections) Time(org.apache.flink.api.common.time.Time) MockResourceManagerRuntimeServices(org.apache.flink.runtime.resourcemanager.utils.MockResourceManagerRuntimeServices) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Test(org.junit.Test)

Example 13 with TaskExecutorProcessSpec

use of org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec in project flink by apache.

the class TaskExecutorProcessSpecTest method testNotEquals.

@Test
public void testNotEquals() {
    TaskExecutorProcessSpec spec1 = new TaskExecutorProcessSpec(new CPUResource(1.0), MemorySize.parse("1m"), MemorySize.parse("2m"), MemorySize.parse("3m"), MemorySize.parse("4m"), MemorySize.parse("5m"), MemorySize.parse("6m"), MemorySize.parse("7m"), MemorySize.parse("8m"), Collections.singleton(new ExternalResource(EXTERNAL_RESOURCE_NAME, 1)));
    TaskExecutorProcessSpec spec2 = new TaskExecutorProcessSpec(new CPUResource(0.0), MemorySize.ZERO, MemorySize.ZERO, MemorySize.ZERO, MemorySize.ZERO, MemorySize.ZERO, MemorySize.ZERO, MemorySize.ZERO, MemorySize.ZERO, Collections.emptyList());
    assertThat(spec1, not(spec2));
}
Also used : TaskExecutorProcessSpec(org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec) CPUResource(org.apache.flink.api.common.resources.CPUResource) ExternalResource(org.apache.flink.api.common.resources.ExternalResource) Test(org.junit.Test)

Example 14 with TaskExecutorProcessSpec

use of org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec in project flink by apache.

the class YarnResourceManagerDriverTest method testStartWorkerVariousSpec.

@Test
public void testStartWorkerVariousSpec() throws Exception {
    final TaskExecutorProcessSpec taskExecutorProcessSpec1 = new TaskExecutorProcessSpec(new CPUResource(1), MemorySize.ZERO, MemorySize.ZERO, MemorySize.ofMebiBytes(50), MemorySize.ofMebiBytes(50), MemorySize.ofMebiBytes(50), MemorySize.ofMebiBytes(50), MemorySize.ZERO, MemorySize.ZERO, Collections.emptyList());
    final TaskExecutorProcessSpec taskExecutorProcessSpec2 = new TaskExecutorProcessSpec(new CPUResource(2), MemorySize.ZERO, MemorySize.ZERO, MemorySize.ofMebiBytes(500), MemorySize.ofMebiBytes(500), MemorySize.ofMebiBytes(500), MemorySize.ofMebiBytes(500), MemorySize.ZERO, MemorySize.ZERO, Collections.emptyList());
    new Context() {

        {
            final String startCommand1 = TaskManagerOptions.TASK_HEAP_MEMORY.key() + "=" + (50L << 20);
            final String startCommand2 = TaskManagerOptions.TASK_HEAP_MEMORY.key() + "=" + (100L << 20);
            final CompletableFuture<Void> startContainerAsyncCommandFuture1 = new CompletableFuture<>();
            final CompletableFuture<Void> startContainerAsyncCommandFuture2 = new CompletableFuture<>();
            prepareForTestStartTaskExecutorProcessVariousSpec(startCommand1, startCommand2, startContainerAsyncCommandFuture1, startContainerAsyncCommandFuture2, taskExecutorProcessSpec1);
            testingYarnAMRMClientAsyncBuilder.setGetMatchingRequestsFunction(tuple -> {
                final Priority priority = tuple.f0;
                final List<AMRMClient.ContainerRequest> matchingRequests = new ArrayList<>();
                for (CompletableFuture<AMRMClient.ContainerRequest> addContainerRequestFuture : addContainerRequestFutures) {
                    final AMRMClient.ContainerRequest request = addContainerRequestFuture.getNow(null);
                    if (request != null && priority.equals(request.getPriority())) {
                        assertThat(tuple.f2, is(request.getCapability()));
                        matchingRequests.add(request);
                    }
                }
                return Collections.singletonList(matchingRequests);
            });
            runTest(() -> {
                final Resource containerResource1 = ((YarnResourceManagerDriver) getDriver()).getContainerResource(taskExecutorProcessSpec1).get();
                final Resource containerResource2 = ((YarnResourceManagerDriver) getDriver()).getContainerResource(taskExecutorProcessSpec2).get();
                // Make sure two worker resource spec will be normalized to different
                // container resources
                assertNotEquals(containerResource1, containerResource2);
                runInMainThread(() -> getDriver().requestResource(taskExecutorProcessSpec1));
                runInMainThread(() -> getDriver().requestResource(taskExecutorProcessSpec2));
                // Verify both containers requested
                verifyFutureCompleted(addContainerRequestFutures.get(0));
                verifyFutureCompleted(addContainerRequestFutures.get(1));
                // Mock that container 1 is allocated
                Container container1 = createTestingContainerWithResource(containerResource1);
                resourceManagerClientCallbackHandler.onContainersAllocated(Collections.singletonList(container1));
                // Verify that only worker with spec1 is started.
                verifyFutureCompleted(startContainerAsyncCommandFuture1);
                assertFalse(startContainerAsyncCommandFuture2.isDone());
                // Mock that container 1 is completed, while the worker is still pending
                ContainerStatus testingContainerStatus = createTestingContainerCompletedStatus(container1.getId());
                resourceManagerClientCallbackHandler.onContainersCompleted(Collections.singletonList(testingContainerStatus));
                // Verify that only container 1 is requested again
                verifyFutureCompleted(addContainerRequestFutures.get(2));
                assertThat(addContainerRequestFutures.get(2).get().getCapability(), is(containerResource1));
                assertFalse(addContainerRequestFutures.get(3).isDone());
            });
        }
    };
}
Also used : ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) AMRMClient(org.apache.hadoop.yarn.client.api.AMRMClient) TaskExecutorProcessSpec(org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec) Priority(org.apache.hadoop.yarn.api.records.Priority) ArrayList(java.util.ArrayList) Resource(org.apache.hadoop.yarn.api.records.Resource) CPUResource(org.apache.flink.api.common.resources.CPUResource) Matchers.containsString(org.hamcrest.Matchers.containsString) CompletableFuture(java.util.concurrent.CompletableFuture) Container(org.apache.hadoop.yarn.api.records.Container) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) CPUResource(org.apache.flink.api.common.resources.CPUResource) Test(org.junit.Test)

Example 15 with TaskExecutorProcessSpec

use of org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec in project flink by apache.

the class KubernetesTaskManagerParametersTest method onSetup.

@Override
protected void onSetup() throws Exception {
    super.onSetup();
    final TaskExecutorProcessSpec taskExecutorProcessSpec = TaskExecutorProcessUtils.processSpecFromConfig(flinkConfig);
    final ContaineredTaskManagerParameters containeredTaskManagerParameters = ContaineredTaskManagerParameters.create(flinkConfig, taskExecutorProcessSpec);
    this.kubernetesTaskManagerParameters = new KubernetesTaskManagerParameters(flinkConfig, POD_NAME, DYNAMIC_PROPERTIES, JVM_MEM_OPTS_ENV, containeredTaskManagerParameters, Collections.emptyMap());
}
Also used : TaskExecutorProcessSpec(org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec) ContaineredTaskManagerParameters(org.apache.flink.runtime.clusterframework.ContaineredTaskManagerParameters)

Aggregations

TaskExecutorProcessSpec (org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec)21 Test (org.junit.Test)14 CompletableFuture (java.util.concurrent.CompletableFuture)13 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)12 ArrayList (java.util.ArrayList)10 Configuration (org.apache.flink.configuration.Configuration)10 RegistrationResponse (org.apache.flink.runtime.registration.RegistrationResponse)8 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)7 List (java.util.List)5 Duration (java.time.Duration)4 HashMap (java.util.HashMap)4 UUID (java.util.UUID)4 Callable (java.util.concurrent.Callable)4 TimeUnit (java.util.concurrent.TimeUnit)4 Time (org.apache.flink.api.common.time.Time)4 ContaineredTaskManagerParameters (org.apache.flink.runtime.clusterframework.ContaineredTaskManagerParameters)4 TaskExecutorProcessUtils (org.apache.flink.runtime.clusterframework.TaskExecutorProcessUtils)4 ClusterInformation (org.apache.flink.runtime.entrypoint.ClusterInformation)4 WorkerResourceSpec (org.apache.flink.runtime.resourcemanager.WorkerResourceSpec)4 SlotManager (org.apache.flink.runtime.resourcemanager.slotmanager.SlotManager)4