Search in sources :

Example 1 with TaskManagersInfo

use of org.apache.flink.runtime.rest.messages.taskmanager.TaskManagersInfo in project flink by apache.

the class MetricsAvailabilityITCase method getTaskManagerIds.

private static Collection<ResourceID> getTaskManagerIds(final RestClient restClient) throws Exception {
    final TaskManagersHeaders headers = TaskManagersHeaders.getInstance();
    final TaskManagersInfo response = fetchMetric(() -> restClient.sendRequest(HOST, PORT, headers, EmptyMessageParameters.getInstance(), EmptyRequestBody.getInstance()), taskManagersInfo -> !taskManagersInfo.getTaskManagerInfos().isEmpty());
    return response.getTaskManagerInfos().stream().map(TaskManagerInfo::getResourceId).collect(Collectors.toList());
}
Also used : TaskManagersHeaders(org.apache.flink.runtime.rest.messages.taskmanager.TaskManagersHeaders) TaskManagersInfo(org.apache.flink.runtime.rest.messages.taskmanager.TaskManagersInfo)

Example 2 with TaskManagersInfo

use of org.apache.flink.runtime.rest.messages.taskmanager.TaskManagersInfo in project flink by apache.

the class LocalStandaloneFlinkResource method startCluster.

@Override
public ClusterController startCluster(int numTaskManagers) throws IOException {
    distribution.setTaskExecutorHosts(Collections.nCopies(numTaskManagers, "localhost"));
    distribution.startFlinkCluster();
    try (final RestClient restClient = new RestClient(new Configuration(), Executors.directExecutor())) {
        for (int retryAttempt = 0; retryAttempt < 30; retryAttempt++) {
            final CompletableFuture<TaskManagersInfo> localhost = restClient.sendRequest("localhost", 8081, TaskManagersHeaders.getInstance(), EmptyMessageParameters.getInstance(), EmptyRequestBody.getInstance());
            try {
                final TaskManagersInfo taskManagersInfo = localhost.get(1, TimeUnit.SECONDS);
                final int numRunningTaskManagers = taskManagersInfo.getTaskManagerInfos().size();
                if (numRunningTaskManagers == numTaskManagers) {
                    return new StandaloneClusterController(distribution);
                } else {
                    LOG.info("Waiting for task managers to come up. {}/{} are currently running.", numRunningTaskManagers, numTaskManagers);
                }
            } catch (InterruptedException e) {
                LOG.info("Waiting for dispatcher REST endpoint to come up...");
                Thread.currentThread().interrupt();
            } catch (TimeoutException | ExecutionException e) {
                // ExecutionExceptions may occur if leader election is still going on
                LOG.info("Waiting for dispatcher REST endpoint to come up...");
            }
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
            }
        }
    } catch (ConfigurationException e) {
        throw new RuntimeException("Could not create RestClient.", e);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    throw new RuntimeException("Cluster did not start in expected time-frame.");
}
Also used : Configuration(org.apache.flink.configuration.Configuration) RestClient(org.apache.flink.runtime.rest.RestClient) ConfigurationException(org.apache.flink.util.ConfigurationException) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) ConfigurationException(org.apache.flink.util.ConfigurationException) ExecutionException(java.util.concurrent.ExecutionException) TaskManagersInfo(org.apache.flink.runtime.rest.messages.taskmanager.TaskManagersInfo) TimeoutException(java.util.concurrent.TimeoutException)

Example 3 with TaskManagersInfo

use of org.apache.flink.runtime.rest.messages.taskmanager.TaskManagersInfo in project flink by apache.

the class YarnConfigurationITCase method testFlinkContainerMemory.

/**
 * Tests that the Flink components are started with the correct memory settings.
 */
@Test(timeout = 60000)
public void testFlinkContainerMemory() throws Exception {
    runTest(() -> {
        final YarnClient yarnClient = getYarnClient();
        final Configuration configuration = new Configuration(flinkConfiguration);
        final int slotsPerTaskManager = 3;
        configuration.set(TaskManagerOptions.NUM_TASK_SLOTS, slotsPerTaskManager);
        final int masterMemory = 768;
        configuration.set(JobManagerOptions.TOTAL_PROCESS_MEMORY, MemorySize.ofMebiBytes(masterMemory));
        final TaskExecutorProcessSpec tmResourceSpec = TaskExecutorProcessUtils.processSpecFromConfig(configuration);
        final int taskManagerMemory = tmResourceSpec.getTotalProcessMemorySize().getMebiBytes();
        final YarnConfiguration yarnConfiguration = getYarnConfiguration();
        final YarnClusterDescriptor clusterDescriptor = YarnTestUtils.createClusterDescriptorWithLogging(CliFrontend.getConfigurationDirectoryFromEnv(), configuration, yarnConfiguration, yarnClient, true);
        clusterDescriptor.setLocalJarPath(new Path(flinkUberjar.getAbsolutePath()));
        clusterDescriptor.addShipFiles(Arrays.asList(flinkLibFolder.listFiles()));
        final File streamingWordCountFile = getTestJarPath("WindowJoin.jar");
        final PackagedProgram packagedProgram = PackagedProgram.newBuilder().setJarFile(streamingWordCountFile).build();
        final JobGraph jobGraph = PackagedProgramUtils.createJobGraph(packagedProgram, configuration, 1, false);
        try {
            final ClusterSpecification clusterSpecification = new ClusterSpecification.ClusterSpecificationBuilder().setMasterMemoryMB(masterMemory).setTaskManagerMemoryMB(taskManagerMemory).setSlotsPerTaskManager(slotsPerTaskManager).createClusterSpecification();
            final ClusterClient<ApplicationId> clusterClient = clusterDescriptor.deployJobCluster(clusterSpecification, jobGraph, true).getClusterClient();
            final ApplicationId clusterId = clusterClient.getClusterId();
            final RestClient restClient = new RestClient(configuration, TestingUtils.defaultExecutor());
            try {
                final ApplicationReport applicationReport = yarnClient.getApplicationReport(clusterId);
                final ApplicationAttemptId currentApplicationAttemptId = applicationReport.getCurrentApplicationAttemptId();
                // wait until we have second container allocated
                List<ContainerReport> containers = yarnClient.getContainers(currentApplicationAttemptId);
                while (containers.size() < 2) {
                    // this is nasty but Yarn does not offer a better way to wait
                    Thread.sleep(50L);
                    containers = yarnClient.getContainers(currentApplicationAttemptId);
                }
                for (ContainerReport container : containers) {
                    if (container.getContainerId().getId() == 1) {
                        // this should be the application master
                        assertThat(container.getAllocatedResource().getMemory(), is(masterMemory));
                    } else {
                        assertThat(container.getAllocatedResource().getMemory(), is(taskManagerMemory));
                    }
                }
                final URI webURI = new URI(clusterClient.getWebInterfaceURL());
                CompletableFuture<TaskManagersInfo> taskManagersInfoCompletableFuture;
                Collection<TaskManagerInfo> taskManagerInfos;
                while (true) {
                    taskManagersInfoCompletableFuture = restClient.sendRequest(webURI.getHost(), webURI.getPort(), TaskManagersHeaders.getInstance(), EmptyMessageParameters.getInstance(), EmptyRequestBody.getInstance());
                    final TaskManagersInfo taskManagersInfo = taskManagersInfoCompletableFuture.get();
                    taskManagerInfos = taskManagersInfo.getTaskManagerInfos();
                    // wait until the task manager has registered and reported its slots
                    if (hasTaskManagerConnectedAndReportedSlots(taskManagerInfos)) {
                        break;
                    } else {
                        Thread.sleep(100L);
                    }
                }
                // there should be at least one TaskManagerInfo
                final TaskManagerInfo taskManagerInfo = taskManagerInfos.iterator().next();
                assertThat(taskManagerInfo.getNumberSlots(), is(slotsPerTaskManager));
                final long expectedHeapSizeBytes = tmResourceSpec.getJvmHeapMemorySize().getBytes();
                // We compare here physical memory assigned to a container with the heap
                // memory that we should pass to
                // jvm as Xmx parameter. Those value might differ significantly due to
                // system page size or jvm
                // implementation therefore we use 15% threshold here.
                assertThat((double) taskManagerInfo.getHardwareDescription().getSizeOfJvmHeap() / (double) expectedHeapSizeBytes, is(closeTo(1.0, 0.15)));
                final int expectedManagedMemoryMB = tmResourceSpec.getManagedMemorySize().getMebiBytes();
                assertThat((int) (taskManagerInfo.getHardwareDescription().getSizeOfManagedMemory() >> 20), is(expectedManagedMemoryMB));
            } finally {
                restClient.shutdown(TIMEOUT);
                clusterClient.close();
            }
            clusterDescriptor.killCluster(clusterId);
        } finally {
            clusterDescriptor.close();
        }
    });
}
Also used : YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.flink.configuration.Configuration) URI(java.net.URI) PackagedProgram(org.apache.flink.client.program.PackagedProgram) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ContainerReport(org.apache.hadoop.yarn.api.records.ContainerReport) TaskManagersInfo(org.apache.flink.runtime.rest.messages.taskmanager.TaskManagersInfo) TestUtils.getTestJarPath(org.apache.flink.yarn.util.TestUtils.getTestJarPath) Path(org.apache.hadoop.fs.Path) TaskExecutorProcessSpec(org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec) ClusterSpecification(org.apache.flink.client.deployment.ClusterSpecification) RestClient(org.apache.flink.runtime.rest.RestClient) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) YarnClient(org.apache.hadoop.yarn.client.api.YarnClient) ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TaskManagerInfo(org.apache.flink.runtime.rest.messages.taskmanager.TaskManagerInfo) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) File(java.io.File) Test(org.junit.Test)

Aggregations

TaskManagersInfo (org.apache.flink.runtime.rest.messages.taskmanager.TaskManagersInfo)3 Configuration (org.apache.flink.configuration.Configuration)2 RestClient (org.apache.flink.runtime.rest.RestClient)2 File (java.io.File)1 IOException (java.io.IOException)1 URI (java.net.URI)1 ExecutionException (java.util.concurrent.ExecutionException)1 TimeoutException (java.util.concurrent.TimeoutException)1 ClusterSpecification (org.apache.flink.client.deployment.ClusterSpecification)1 PackagedProgram (org.apache.flink.client.program.PackagedProgram)1 TaskExecutorProcessSpec (org.apache.flink.runtime.clusterframework.TaskExecutorProcessSpec)1 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)1 TaskManagerInfo (org.apache.flink.runtime.rest.messages.taskmanager.TaskManagerInfo)1 TaskManagersHeaders (org.apache.flink.runtime.rest.messages.taskmanager.TaskManagersHeaders)1 ConfigurationException (org.apache.flink.util.ConfigurationException)1 TestUtils.getTestJarPath (org.apache.flink.yarn.util.TestUtils.getTestJarPath)1 Path (org.apache.hadoop.fs.Path)1 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)1 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)1 ApplicationReport (org.apache.hadoop.yarn.api.records.ApplicationReport)1