Search in sources :

Example 21 with TaskExecutorProcessSpec

use of org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec in project flink by apache.

the class YarnConfigurationITCase method testFlinkContainerMemory.

/**
 * Tests that the Flink components are started with the correct memory settings.
 */
@Test(timeout = 60000)
public void testFlinkContainerMemory() throws Exception {
    runTest(() -> {
        final YarnClient yarnClient = getYarnClient();
        final Configuration configuration = new Configuration(flinkConfiguration);
        final int slotsPerTaskManager = 3;
        configuration.set(TaskManagerOptions.NUM_TASK_SLOTS, slotsPerTaskManager);
        final int masterMemory = 768;
        configuration.set(JobManagerOptions.TOTAL_PROCESS_MEMORY, MemorySize.ofMebiBytes(masterMemory));
        final TaskExecutorProcessSpec tmResourceSpec = TaskExecutorProcessUtils.processSpecFromConfig(configuration);
        final int taskManagerMemory = tmResourceSpec.getTotalProcessMemorySize().getMebiBytes();
        final YarnConfiguration yarnConfiguration = getYarnConfiguration();
        final YarnClusterDescriptor clusterDescriptor = YarnTestUtils.createClusterDescriptorWithLogging(CliFrontend.getConfigurationDirectoryFromEnv(), configuration, yarnConfiguration, yarnClient, true);
        clusterDescriptor.setLocalJarPath(new Path(flinkUberjar.getAbsolutePath()));
        clusterDescriptor.addShipFiles(Arrays.asList(flinkLibFolder.listFiles()));
        final File streamingWordCountFile = getTestJarPath("WindowJoin.jar");
        final PackagedProgram packagedProgram = PackagedProgram.newBuilder().setJarFile(streamingWordCountFile).build();
        final JobGraph jobGraph = PackagedProgramUtils.createJobGraph(packagedProgram, configuration, 1, false);
        try {
            final ClusterSpecification clusterSpecification = new ClusterSpecification.ClusterSpecificationBuilder().setMasterMemoryMB(masterMemory).setTaskManagerMemoryMB(taskManagerMemory).setSlotsPerTaskManager(slotsPerTaskManager).createClusterSpecification();
            final ClusterClient<ApplicationId> clusterClient = clusterDescriptor.deployJobCluster(clusterSpecification, jobGraph, true).getClusterClient();
            final ApplicationId clusterId = clusterClient.getClusterId();
            final RestClient restClient = new RestClient(configuration, TestingUtils.defaultExecutor());
            try {
                final ApplicationReport applicationReport = yarnClient.getApplicationReport(clusterId);
                final ApplicationAttemptId currentApplicationAttemptId = applicationReport.getCurrentApplicationAttemptId();
                // wait until we have second container allocated
                List<ContainerReport> containers = yarnClient.getContainers(currentApplicationAttemptId);
                while (containers.size() < 2) {
                    // this is nasty but Yarn does not offer a better way to wait
                    Thread.sleep(50L);
                    containers = yarnClient.getContainers(currentApplicationAttemptId);
                }
                for (ContainerReport container : containers) {
                    if (container.getContainerId().getId() == 1) {
                        // this should be the application master
                        assertThat(container.getAllocatedResource().getMemory(), is(masterMemory));
                    } else {
                        assertThat(container.getAllocatedResource().getMemory(), is(taskManagerMemory));
                    }
                }
                final URI webURI = new URI(clusterClient.getWebInterfaceURL());
                CompletableFuture<TaskManagersInfo> taskManagersInfoCompletableFuture;
                Collection<TaskManagerInfo> taskManagerInfos;
                while (true) {
                    taskManagersInfoCompletableFuture = restClient.sendRequest(webURI.getHost(), webURI.getPort(), TaskManagersHeaders.getInstance(), EmptyMessageParameters.getInstance(), EmptyRequestBody.getInstance());
                    final TaskManagersInfo taskManagersInfo = taskManagersInfoCompletableFuture.get();
                    taskManagerInfos = taskManagersInfo.getTaskManagerInfos();
                    // wait until the task manager has registered and reported its slots
                    if (hasTaskManagerConnectedAndReportedSlots(taskManagerInfos)) {
                        break;
                    } else {
                        Thread.sleep(100L);
                    }
                }
                // there should be at least one TaskManagerInfo
                final TaskManagerInfo taskManagerInfo = taskManagerInfos.iterator().next();
                assertThat(taskManagerInfo.getNumberSlots(), is(slotsPerTaskManager));
                final long expectedHeapSizeBytes = tmResourceSpec.getJvmHeapMemorySize().getBytes();
                // We compare here physical memory assigned to a container with the heap
                // memory that we should pass to
                // jvm as Xmx parameter. Those value might differ significantly due to
                // system page size or jvm
                // implementation therefore we use 15% threshold here.
                assertThat((double) taskManagerInfo.getHardwareDescription().getSizeOfJvmHeap() / (double) expectedHeapSizeBytes, is(closeTo(1.0, 0.15)));
                final int expectedManagedMemoryMB = tmResourceSpec.getManagedMemorySize().getMebiBytes();
                assertThat((int) (taskManagerInfo.getHardwareDescription().getSizeOfManagedMemory() >> 20), is(expectedManagedMemoryMB));
            } finally {
                restClient.shutdown(TIMEOUT);
                clusterClient.close();
            }
            clusterDescriptor.killCluster(clusterId);
        } finally {
            clusterDescriptor.close();
        }
    });
}
Also used : YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.flink.configuration.Configuration) URI(java.net.URI) PackagedProgram(org.apache.flink.client.program.PackagedProgram) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ContainerReport(org.apache.hadoop.yarn.api.records.ContainerReport) TaskManagersInfo(org.apache.flink.runtime.rest.messages.taskmanager.TaskManagersInfo) TestUtils.getTestJarPath(org.apache.flink.yarn.util.TestUtils.getTestJarPath) Path(org.apache.hadoop.fs.Path) TaskExecutorProcessSpec(org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec) ClusterSpecification(org.apache.flink.client.deployment.ClusterSpecification) RestClient(org.apache.flink.runtime.rest.RestClient) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient) ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TaskManagerInfo(org.apache.flink.runtime.rest.messages.taskmanager.TaskManagerInfo) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) File(java.io.File) Test(org.junit.Test)

Aggregations

TaskExecutorProcessSpec (org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec)21 Test (org.junit.Test)14 CompletableFuture (java.util.concurrent.CompletableFuture)13 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)12 ArrayList (java.util.ArrayList)10 Configuration (org.apache.flink.configuration.Configuration)10 RegistrationResponse (org.apache.flink.runtime.registration.RegistrationResponse)8 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)7 List (java.util.List)5 Duration (java.time.Duration)4 HashMap (java.util.HashMap)4 UUID (java.util.UUID)4 Callable (java.util.concurrent.Callable)4 TimeUnit (java.util.concurrent.TimeUnit)4 Time (org.apache.flink.api.common.time.Time)4 ContaineredTaskManagerParameters (org.apache.flink.runtime.clusterframework.ContaineredTaskManagerParameters)4 TaskExecutorProcessUtils (org.apache.flink.runtime.clusterframework.TaskExecutorProcessUtils)4 ClusterInformation (org.apache.flink.runtime.entrypoint.ClusterInformation)4 WorkerResourceSpec (org.apache.flink.runtime.resourcemanager.WorkerResourceSpec)4 SlotManager (org.apache.flink.runtime.resourcemanager.slotmanager.SlotManager)4