Search in sources :

Example 6 with TaskExecutorProcessSpec

use of org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec in project flink by apache.

the class TaskExecutorProcessSpecContainerResourcePriorityAdapter method getTaskExecutorProcessSpecAndResource.

Optional<TaskExecutorProcessSpecAndResource> getTaskExecutorProcessSpecAndResource(Priority priority) {
    final TaskExecutorProcessSpec taskExecutorProcessSpec = priorityToTaskExecutorProcessSpec.get(priority);
    if (taskExecutorProcessSpec == null) {
        return Optional.empty();
    }
    final PriorityAndResource priorityAndResource = taskExecutorProcessSpecToPriorityAndResource.get(taskExecutorProcessSpec);
    Preconditions.checkState(priorityAndResource != null);
    Preconditions.checkState(priority.equals(priorityAndResource.getPriority()));
    return Optional.of(new TaskExecutorProcessSpecAndResource(taskExecutorProcessSpec, priorityAndResource.getResource()));
}
Also used : TaskExecutorProcessSpec(org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec)

Example 7 with TaskExecutorProcessSpec

use of org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec in project flink by apache.

the class UtilsTest method testCreateTaskExecutorCredentials.

@Test
public void testCreateTaskExecutorCredentials() throws Exception {
    File root = temporaryFolder.getRoot();
    File home = new File(root, "home");
    boolean created = home.mkdir();
    assertTrue(created);
    Configuration flinkConf = new Configuration();
    YarnConfiguration yarnConf = new YarnConfiguration();
    Map<String, String> env = new HashMap<>();
    env.put(YarnConfigKeys.ENV_APP_ID, "foo");
    env.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR, home.getAbsolutePath());
    env.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES, "");
    env.put(YarnConfigKeys.ENV_FLINK_CLASSPATH, "");
    env.put(YarnConfigKeys.ENV_HADOOP_USER_NAME, "foo");
    env.put(YarnConfigKeys.FLINK_DIST_JAR, new YarnLocalResourceDescriptor("flink.jar", new Path(root.toURI()), 0, System.currentTimeMillis(), LocalResourceVisibility.APPLICATION, LocalResourceType.FILE).toString());
    env.put(YarnConfigKeys.FLINK_YARN_FILES, "");
    env.put(ApplicationConstants.Environment.PWD.key(), home.getAbsolutePath());
    env = Collections.unmodifiableMap(env);
    final YarnResourceManagerDriverConfiguration yarnResourceManagerDriverConfiguration = new YarnResourceManagerDriverConfiguration(env, "localhost", null);
    File credentialFile = temporaryFolder.newFile("container_tokens");
    final Text amRmTokenKind = AMRMTokenIdentifier.KIND_NAME;
    final Text hdfsDelegationTokenKind = new Text("HDFS_DELEGATION_TOKEN");
    final Text amRmTokenService = new Text("rm-ip:8030");
    final Text hdfsDelegationTokenService = new Text("ha-hdfs:hadoop-namespace");
    Credentials amCredentials = new Credentials();
    amCredentials.addToken(amRmTokenService, new Token<>(new byte[4], new byte[4], amRmTokenKind, amRmTokenService));
    amCredentials.addToken(hdfsDelegationTokenService, new Token<>(new byte[4], new byte[4], hdfsDelegationTokenKind, hdfsDelegationTokenService));
    amCredentials.writeTokenStorageFile(new org.apache.hadoop.fs.Path(credentialFile.getAbsolutePath()), yarnConf);
    TaskExecutorProcessSpec spec = TaskExecutorProcessUtils.newProcessSpecBuilder(flinkConf).withTotalProcessMemory(MemorySize.parse("1g")).build();
    ContaineredTaskManagerParameters tmParams = new ContaineredTaskManagerParameters(spec, new HashMap<>(1));
    Configuration taskManagerConf = new Configuration();
    String workingDirectory = root.getAbsolutePath();
    Class<?> taskManagerMainClass = YarnTaskExecutorRunner.class;
    ContainerLaunchContext ctx;
    final Map<String, String> originalEnv = System.getenv();
    try {
        Map<String, String> systemEnv = new HashMap<>(originalEnv);
        systemEnv.put("HADOOP_TOKEN_FILE_LOCATION", credentialFile.getAbsolutePath());
        CommonTestUtils.setEnv(systemEnv);
        ctx = Utils.createTaskExecutorContext(flinkConf, yarnConf, yarnResourceManagerDriverConfiguration, tmParams, "", workingDirectory, taskManagerMainClass, LOG);
    } finally {
        CommonTestUtils.setEnv(originalEnv);
    }
    Credentials credentials = new Credentials();
    try (DataInputStream dis = new DataInputStream(new ByteArrayInputStream(ctx.getTokens().array()))) {
        credentials.readTokenStorageStream(dis);
    }
    Collection<Token<? extends TokenIdentifier>> tokens = credentials.getAllTokens();
    boolean hasHdfsDelegationToken = false;
    boolean hasAmRmToken = false;
    for (Token<? extends TokenIdentifier> token : tokens) {
        if (token.getKind().equals(amRmTokenKind)) {
            hasAmRmToken = true;
        } else if (token.getKind().equals(hdfsDelegationTokenKind)) {
            hasHdfsDelegationToken = true;
        }
    }
    assertTrue(hasHdfsDelegationToken);
    assertFalse(hasAmRmToken);
}
Also used : AMRMTokenIdentifier(org.apache.hadoop.yarn.security.AMRMTokenIdentifier) TokenIdentifier(org.apache.hadoop.security.token.TokenIdentifier) YarnResourceManagerDriverConfiguration(org.apache.flink.yarn.configuration.YarnResourceManagerDriverConfiguration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) YarnResourceManagerDriverConfiguration(org.apache.flink.yarn.configuration.YarnResourceManagerDriverConfiguration) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) ContaineredTaskManagerParameters(org.apache.flink.runtime.clusterframework.ContaineredTaskManagerParameters) Token(org.apache.hadoop.security.token.Token) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Path(org.apache.hadoop.fs.Path) Path(org.apache.hadoop.fs.Path) TaskExecutorProcessSpec(org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec) Text(org.apache.hadoop.io.Text) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) DataInputStream(java.io.DataInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) File(java.io.File) Credentials(org.apache.hadoop.security.Credentials) Test(org.junit.Test)

Example 8 with TaskExecutorProcessSpec

use of org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec in project flink by apache.

the class ActiveResourceManagerTest method testWorkerRegistrationTimeoutNotCountingAllocationTime.

@Test
public void testWorkerRegistrationTimeoutNotCountingAllocationTime() throws Exception {
    new Context() {

        {
            final ResourceID tmResourceId = ResourceID.generate();
            final CompletableFuture<ResourceID> requestResourceFuture = new CompletableFuture<>();
            final CompletableFuture<ResourceID> releaseResourceFuture = new CompletableFuture<>();
            flinkConfig.set(ResourceManagerOptions.TASK_MANAGER_REGISTRATION_TIMEOUT, Duration.ofMillis(TESTING_START_WORKER_TIMEOUT_MS));
            driverBuilder.setRequestResourceFunction(taskExecutorProcessSpec -> requestResourceFuture).setReleaseResourceConsumer(releaseResourceFuture::complete);
            runTest(() -> {
                // request new worker
                runInMainThread(() -> getResourceManager().startNewWorker(WORKER_RESOURCE_SPEC));
                // resource allocation takes longer than worker registration timeout
                try {
                    Thread.sleep(TESTING_START_WORKER_TIMEOUT_MS * 2);
                } catch (InterruptedException e) {
                    fail();
                }
                final long start = System.nanoTime();
                runInMainThread(() -> requestResourceFuture.complete(tmResourceId));
                // worker registered, verify not released due to timeout
                RegistrationResponse registrationResponse = registerTaskExecutor(tmResourceId).join();
                final long registrationTime = (System.nanoTime() - start) / 1_000_000;
                assumeTrue("The registration must not take longer than the start worker timeout. If it does, then this indicates a very slow machine.", registrationTime < TESTING_START_WORKER_TIMEOUT_MS);
                assertThat(registrationResponse, instanceOf(RegistrationResponse.Success.class));
                assertFalse(releaseResourceFuture.isDone());
            });
        }
    };
}
Also used : TaskExecutorRegistration(org.apache.flink.runtime.resourcemanager.TaskExecutorRegistration) TaskExecutorProcessSpec(org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) WorkerResourceSpec(org.apache.flink.runtime.resourcemanager.WorkerResourceSpec) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) ResourceManagerOptions(org.apache.flink.configuration.ResourceManagerOptions) TaskExecutorMemoryConfiguration(org.apache.flink.runtime.taskexecutor.TaskExecutorMemoryConfiguration) Callable(java.util.concurrent.Callable) CompletableFuture(java.util.concurrent.CompletableFuture) RunnableWithException(org.apache.flink.util.function.RunnableWithException) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) ArrayList(java.util.ArrayList) Assert.assertThat(org.junit.Assert.assertThat) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Duration(java.time.Duration) ClusterInformation(org.apache.flink.runtime.entrypoint.ClusterInformation) NoOpResourceManagerPartitionTracker(org.apache.flink.runtime.io.network.partition.NoOpResourceManagerPartitionTracker) TestLogger(org.apache.flink.util.TestLogger) Matchers.lessThan(org.hamcrest.Matchers.lessThan) SlotManager(org.apache.flink.runtime.resourcemanager.slotmanager.SlotManager) Assert.fail(org.junit.Assert.fail) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) ClassRule(org.junit.ClassRule) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) HardwareDescription(org.apache.flink.runtime.instance.HardwareDescription) Configuration(org.apache.flink.configuration.Configuration) Test(org.junit.Test) UUID(java.util.UUID) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) TimeUnit(java.util.concurrent.TimeUnit) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) List(java.util.List) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) TaskExecutorProcessUtils(org.apache.flink.runtime.clusterframework.TaskExecutorProcessUtils) ForkJoinPool(java.util.concurrent.ForkJoinPool) Assert.assertFalse(org.junit.Assert.assertFalse) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) TestingRpcServiceResource(org.apache.flink.runtime.rpc.TestingRpcServiceResource) TestingSlotManagerBuilder(org.apache.flink.runtime.resourcemanager.slotmanager.TestingSlotManagerBuilder) Matchers.is(org.hamcrest.Matchers.is) Assume.assumeTrue(org.junit.Assume.assumeTrue) Collections(java.util.Collections) Time(org.apache.flink.api.common.time.Time) MockResourceManagerRuntimeServices(org.apache.flink.runtime.resourcemanager.utils.MockResourceManagerRuntimeServices) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) Test(org.junit.Test)

Example 9 with TaskExecutorProcessSpec

use of org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec in project flink by apache.

the class ActiveResourceManagerTest method testCloseTaskManagerConnectionOnWorkerTerminated.

@Test
public void testCloseTaskManagerConnectionOnWorkerTerminated() throws Exception {
    new Context() {

        {
            final ResourceID tmResourceId = ResourceID.generate();
            final CompletableFuture<TaskExecutorProcessSpec> requestWorkerFromDriverFuture = new CompletableFuture<>();
            final CompletableFuture<Void> disconnectResourceManagerFuture = new CompletableFuture<>();
            final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setDisconnectResourceManagerConsumer((ignore) -> disconnectResourceManagerFuture.complete(null)).createTestingTaskExecutorGateway();
            driverBuilder.setRequestResourceFunction(taskExecutorProcessSpec -> {
                requestWorkerFromDriverFuture.complete(taskExecutorProcessSpec);
                return CompletableFuture.completedFuture(tmResourceId);
            });
            runTest(() -> {
                // request a new worker, terminate it after registered
                runInMainThread(() -> getResourceManager().startNewWorker(WORKER_RESOURCE_SPEC)).thenCompose((ignore) -> registerTaskExecutor(tmResourceId, taskExecutorGateway)).thenRun(() -> runInMainThread(() -> getResourceManager().onWorkerTerminated(tmResourceId, "terminate for testing")));
                // verify task manager connection is closed
                disconnectResourceManagerFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS);
            });
        }
    };
}
Also used : TaskExecutorRegistration(org.apache.flink.runtime.resourcemanager.TaskExecutorRegistration) TaskExecutorProcessSpec(org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) WorkerResourceSpec(org.apache.flink.runtime.resourcemanager.WorkerResourceSpec) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) ResourceManagerOptions(org.apache.flink.configuration.ResourceManagerOptions) TaskExecutorMemoryConfiguration(org.apache.flink.runtime.taskexecutor.TaskExecutorMemoryConfiguration) Callable(java.util.concurrent.Callable) CompletableFuture(java.util.concurrent.CompletableFuture) RunnableWithException(org.apache.flink.util.function.RunnableWithException) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) ArrayList(java.util.ArrayList) Assert.assertThat(org.junit.Assert.assertThat) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Duration(java.time.Duration) ClusterInformation(org.apache.flink.runtime.entrypoint.ClusterInformation) NoOpResourceManagerPartitionTracker(org.apache.flink.runtime.io.network.partition.NoOpResourceManagerPartitionTracker) TestLogger(org.apache.flink.util.TestLogger) Matchers.lessThan(org.hamcrest.Matchers.lessThan) SlotManager(org.apache.flink.runtime.resourcemanager.slotmanager.SlotManager) Assert.fail(org.junit.Assert.fail) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) ClassRule(org.junit.ClassRule) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) HardwareDescription(org.apache.flink.runtime.instance.HardwareDescription) Configuration(org.apache.flink.configuration.Configuration) Test(org.junit.Test) UUID(java.util.UUID) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) TimeUnit(java.util.concurrent.TimeUnit) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) List(java.util.List) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) TaskExecutorProcessUtils(org.apache.flink.runtime.clusterframework.TaskExecutorProcessUtils) ForkJoinPool(java.util.concurrent.ForkJoinPool) Assert.assertFalse(org.junit.Assert.assertFalse) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) TestingRpcServiceResource(org.apache.flink.runtime.rpc.TestingRpcServiceResource) TestingSlotManagerBuilder(org.apache.flink.runtime.resourcemanager.slotmanager.TestingSlotManagerBuilder) Matchers.is(org.hamcrest.Matchers.is) Assume.assumeTrue(org.junit.Assume.assumeTrue) Collections(java.util.Collections) Time(org.apache.flink.api.common.time.Time) MockResourceManagerRuntimeServices(org.apache.flink.runtime.resourcemanager.utils.MockResourceManagerRuntimeServices) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) CompletableFuture(java.util.concurrent.CompletableFuture) TaskExecutorProcessSpec(org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) Test(org.junit.Test)

Example 10 with TaskExecutorProcessSpec

use of org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec in project flink by apache.

the class ActiveResourceManagerTest method testWorkerTerminatedBeforeRegister.

/**
 * Tests worker terminated after requested before registered.
 */
@Test
public void testWorkerTerminatedBeforeRegister() throws Exception {
    new Context() {

        {
            final AtomicInteger requestCount = new AtomicInteger(0);
            final List<ResourceID> tmResourceIds = new ArrayList<>();
            tmResourceIds.add(ResourceID.generate());
            tmResourceIds.add(ResourceID.generate());
            final List<CompletableFuture<TaskExecutorProcessSpec>> requestWorkerFromDriverFutures = new ArrayList<>();
            requestWorkerFromDriverFutures.add(new CompletableFuture<>());
            requestWorkerFromDriverFutures.add(new CompletableFuture<>());
            driverBuilder.setRequestResourceFunction(taskExecutorProcessSpec -> {
                int idx = requestCount.getAndIncrement();
                assertThat(idx, lessThan(2));
                requestWorkerFromDriverFutures.get(idx).complete(taskExecutorProcessSpec);
                return CompletableFuture.completedFuture(tmResourceIds.get(idx));
            });
            slotManagerBuilder.setGetRequiredResourcesSupplier(() -> Collections.singletonMap(WORKER_RESOURCE_SPEC, 1));
            runTest(() -> {
                // received worker request, verify requesting from driver
                CompletableFuture<Boolean> startNewWorkerFuture = runInMainThread(() -> getResourceManager().startNewWorker(WORKER_RESOURCE_SPEC));
                TaskExecutorProcessSpec taskExecutorProcessSpec1 = requestWorkerFromDriverFutures.get(0).get(TIMEOUT_SEC, TimeUnit.SECONDS);
                assertThat(startNewWorkerFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), is(true));
                assertThat(taskExecutorProcessSpec1, is(TaskExecutorProcessUtils.processSpecFromWorkerResourceSpec(flinkConfig, WORKER_RESOURCE_SPEC)));
                // first worker failed before register, verify requesting another worker
                // from driver
                runInMainThread(() -> getResourceManager().onWorkerTerminated(tmResourceIds.get(0), "terminate for testing"));
                TaskExecutorProcessSpec taskExecutorProcessSpec2 = requestWorkerFromDriverFutures.get(1).get(TIMEOUT_SEC, TimeUnit.SECONDS);
                assertThat(taskExecutorProcessSpec2, is(taskExecutorProcessSpec1));
                // second worker registered, verify registration succeed
                CompletableFuture<RegistrationResponse> registerTaskExecutorFuture = registerTaskExecutor(tmResourceIds.get(1));
                assertThat(registerTaskExecutorFuture.get(TIMEOUT_SEC, TimeUnit.SECONDS), instanceOf(RegistrationResponse.Success.class));
            });
        }
    };
}
Also used : TaskExecutorProcessSpec(org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec) ArrayList(java.util.ArrayList) CompletableFuture(java.util.concurrent.CompletableFuture) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) Test(org.junit.Test)

Aggregations

TaskExecutorProcessSpec (org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec)21 Test (org.junit.Test)14 CompletableFuture (java.util.concurrent.CompletableFuture)13 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)12 ArrayList (java.util.ArrayList)10 Configuration (org.apache.flink.configuration.Configuration)10 RegistrationResponse (org.apache.flink.runtime.registration.RegistrationResponse)8 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)7 List (java.util.List)5 Duration (java.time.Duration)4 HashMap (java.util.HashMap)4 UUID (java.util.UUID)4 Callable (java.util.concurrent.Callable)4 TimeUnit (java.util.concurrent.TimeUnit)4 Time (org.apache.flink.api.common.time.Time)4 ContaineredTaskManagerParameters (org.apache.flink.runtime.clusterframework.ContaineredTaskManagerParameters)4 TaskExecutorProcessUtils (org.apache.flink.runtime.clusterframework.TaskExecutorProcessUtils)4 ClusterInformation (org.apache.flink.runtime.entrypoint.ClusterInformation)4 WorkerResourceSpec (org.apache.flink.runtime.resourcemanager.WorkerResourceSpec)4 SlotManager (org.apache.flink.runtime.resourcemanager.slotmanager.SlotManager)4