Search in sources :

Example 6 with ClusterSpecification

use of org.apache.flink.client.deployment.ClusterSpecification in project flink by apache.

the class YarnClusterDescriptor method deployInternal.

/**
 * This method will block until the ApplicationMaster/JobManager have been deployed on YARN.
 *
 * @param clusterSpecification Initial cluster specification for the Flink cluster to be
 *     deployed
 * @param applicationName name of the Yarn application to start
 * @param yarnClusterEntrypoint Class name of the Yarn cluster entry point.
 * @param jobGraph A job graph which is deployed with the Flink cluster, {@code null} if none
 * @param detached True if the cluster should be started in detached mode
 */
private ClusterClientProvider<ApplicationId> deployInternal(ClusterSpecification clusterSpecification, String applicationName, String yarnClusterEntrypoint, @Nullable JobGraph jobGraph, boolean detached) throws Exception {
    final UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
    if (HadoopUtils.isKerberosSecurityEnabled(currentUser)) {
        boolean useTicketCache = flinkConfiguration.getBoolean(SecurityOptions.KERBEROS_LOGIN_USETICKETCACHE);
        if (!HadoopUtils.areKerberosCredentialsValid(currentUser, useTicketCache)) {
            throw new RuntimeException("Hadoop security with Kerberos is enabled but the login user " + "does not have Kerberos credentials or delegation tokens!");
        }
        final boolean fetchToken = flinkConfiguration.getBoolean(SecurityOptions.KERBEROS_FETCH_DELEGATION_TOKEN);
        final boolean yarnAccessFSEnabled = !CollectionUtil.isNullOrEmpty(flinkConfiguration.get(YarnConfigOptions.YARN_ACCESS));
        if (!fetchToken && yarnAccessFSEnabled) {
            throw new IllegalConfigurationException(String.format("When %s is disabled, %s must be disabled as well.", SecurityOptions.KERBEROS_FETCH_DELEGATION_TOKEN.key(), YarnConfigOptions.YARN_ACCESS.key()));
        }
    }
    isReadyForDeployment(clusterSpecification);
    // ------------------ Check if the specified queue exists --------------------
    checkYarnQueues(yarnClient);
    // ------------------ Check if the YARN ClusterClient has the requested resources
    // --------------
    // Create application via yarnClient
    final YarnClientApplication yarnApplication = yarnClient.createApplication();
    final GetNewApplicationResponse appResponse = yarnApplication.getNewApplicationResponse();
    Resource maxRes = appResponse.getMaximumResourceCapability();
    final ClusterResourceDescription freeClusterMem;
    try {
        freeClusterMem = getCurrentFreeClusterResources(yarnClient);
    } catch (YarnException | IOException e) {
        failSessionDuringDeployment(yarnClient, yarnApplication);
        throw new YarnDeploymentException("Could not retrieve information about free cluster resources.", e);
    }
    final int yarnMinAllocationMB = yarnConfiguration.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB);
    if (yarnMinAllocationMB <= 0) {
        throw new YarnDeploymentException("The minimum allocation memory " + "(" + yarnMinAllocationMB + " MB) configured via '" + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB + "' should be greater than 0.");
    }
    final ClusterSpecification validClusterSpecification;
    try {
        validClusterSpecification = validateClusterResources(clusterSpecification, yarnMinAllocationMB, maxRes, freeClusterMem);
    } catch (YarnDeploymentException yde) {
        failSessionDuringDeployment(yarnClient, yarnApplication);
        throw yde;
    }
    LOG.info("Cluster specification: {}", validClusterSpecification);
    final ClusterEntrypoint.ExecutionMode executionMode = detached ? ClusterEntrypoint.ExecutionMode.DETACHED : ClusterEntrypoint.ExecutionMode.NORMAL;
    flinkConfiguration.setString(ClusterEntrypoint.INTERNAL_CLUSTER_EXECUTION_MODE, executionMode.toString());
    ApplicationReport report = startAppMaster(flinkConfiguration, applicationName, yarnClusterEntrypoint, jobGraph, yarnClient, yarnApplication, validClusterSpecification);
    // print the application id for user to cancel themselves.
    if (detached) {
        final ApplicationId yarnApplicationId = report.getApplicationId();
        logDetachedClusterInformation(yarnApplicationId, LOG);
    }
    setClusterEntrypointInfoToConfig(report);
    return () -> {
        try {
            return new RestClusterClient<>(flinkConfiguration, report.getApplicationId());
        } catch (Exception e) {
            throw new RuntimeException("Error while creating RestClusterClient.", e);
        }
    };
}
Also used : GetNewApplicationResponse(org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse) YarnClientApplication(org.apache.hadoop.yarn.client.api.YarnClientApplication) ClusterSpecification(org.apache.flink.client.deployment.ClusterSpecification) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) Resource(org.apache.hadoop.yarn.api.records.Resource) IOException(java.io.IOException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) YarnJobClusterEntrypoint(org.apache.flink.yarn.entrypoint.YarnJobClusterEntrypoint) YarnSessionClusterEntrypoint(org.apache.flink.yarn.entrypoint.YarnSessionClusterEntrypoint) YarnApplicationClusterEntryPoint(org.apache.flink.yarn.entrypoint.YarnApplicationClusterEntryPoint) ClusterEntrypoint(org.apache.flink.runtime.entrypoint.ClusterEntrypoint) InvocationTargetException(java.lang.reflect.InvocationTargetException) FlinkException(org.apache.flink.util.FlinkException) IOException(java.io.IOException) ClusterDeploymentException(org.apache.flink.client.deployment.ClusterDeploymentException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) ClusterRetrieveException(org.apache.flink.client.deployment.ClusterRetrieveException) ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) YarnJobClusterEntrypoint(org.apache.flink.yarn.entrypoint.YarnJobClusterEntrypoint) YarnSessionClusterEntrypoint(org.apache.flink.yarn.entrypoint.YarnSessionClusterEntrypoint) ClusterEntrypoint(org.apache.flink.runtime.entrypoint.ClusterEntrypoint) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 7 with ClusterSpecification

use of org.apache.flink.client.deployment.ClusterSpecification in project flink by apache.

the class FlinkYarnSessionCli method run.

public int run(String[] args) throws CliArgsException, FlinkException {
    // 
    // Command Line Options
    // 
    final CommandLine cmd = parseCommandLineOptions(args, true);
    if (cmd.hasOption(help.getOpt())) {
        printUsage();
        return 0;
    }
    final Configuration effectiveConfiguration = new Configuration(configuration);
    final Configuration commandLineConfiguration = toConfiguration(cmd);
    effectiveConfiguration.addAll(commandLineConfiguration);
    LOG.debug("Effective configuration: {}", effectiveConfiguration);
    final ClusterClientFactory<ApplicationId> yarnClusterClientFactory = clusterClientServiceLoader.getClusterClientFactory(effectiveConfiguration);
    effectiveConfiguration.set(DeploymentOptions.TARGET, YarnDeploymentTarget.SESSION.getName());
    final YarnClusterDescriptor yarnClusterDescriptor = (YarnClusterDescriptor) yarnClusterClientFactory.createClusterDescriptor(effectiveConfiguration);
    try {
        // Query cluster for metrics
        if (cmd.hasOption(query.getOpt())) {
            final String description = yarnClusterDescriptor.getClusterDescription();
            System.out.println(description);
            return 0;
        } else {
            final ClusterClientProvider<ApplicationId> clusterClientProvider;
            final ApplicationId yarnApplicationId;
            if (cmd.hasOption(applicationId.getOpt())) {
                yarnApplicationId = ConverterUtils.toApplicationId(cmd.getOptionValue(applicationId.getOpt()));
                clusterClientProvider = yarnClusterDescriptor.retrieve(yarnApplicationId);
            } else {
                final ClusterSpecification clusterSpecification = yarnClusterClientFactory.getClusterSpecification(effectiveConfiguration);
                clusterClientProvider = yarnClusterDescriptor.deploySessionCluster(clusterSpecification);
                ClusterClient<ApplicationId> clusterClient = clusterClientProvider.getClusterClient();
                // ------------------ ClusterClient deployed, handle connection details
                yarnApplicationId = clusterClient.getClusterId();
                try {
                    System.out.println("JobManager Web Interface: " + clusterClient.getWebInterfaceURL());
                    writeYarnPropertiesFile(yarnApplicationId, dynamicPropertiesEncoded);
                } catch (Exception e) {
                    try {
                        clusterClient.close();
                    } catch (Exception ex) {
                        LOG.info("Could not properly shutdown cluster client.", ex);
                    }
                    try {
                        yarnClusterDescriptor.killCluster(yarnApplicationId);
                    } catch (FlinkException fe) {
                        LOG.info("Could not properly terminate the Flink cluster.", fe);
                    }
                    throw new FlinkException("Could not write the Yarn connection information.", e);
                }
            }
            if (!effectiveConfiguration.getBoolean(DeploymentOptions.ATTACHED)) {
                YarnClusterDescriptor.logDetachedClusterInformation(yarnApplicationId, LOG);
            } else {
                ScheduledExecutorService scheduledExecutorService = Executors.newSingleThreadScheduledExecutor();
                final YarnApplicationStatusMonitor yarnApplicationStatusMonitor = new YarnApplicationStatusMonitor(yarnClusterDescriptor.getYarnClient(), yarnApplicationId, new ScheduledExecutorServiceAdapter(scheduledExecutorService));
                Thread shutdownHook = ShutdownHookUtil.addShutdownHook(() -> shutdownCluster(clusterClientProvider.getClusterClient(), scheduledExecutorService, yarnApplicationStatusMonitor), getClass().getSimpleName(), LOG);
                try {
                    runInteractiveCli(yarnApplicationStatusMonitor, acceptInteractiveInput);
                } finally {
                    shutdownCluster(clusterClientProvider.getClusterClient(), scheduledExecutorService, yarnApplicationStatusMonitor);
                    if (shutdownHook != null) {
                        // we do not need the hook anymore as we have just tried to shutdown the
                        // cluster.
                        ShutdownHookUtil.removeShutdownHook(shutdownHook, getClass().getSimpleName(), LOG);
                    }
                    tryRetrieveAndLogApplicationReport(yarnClusterDescriptor.getYarnClient(), yarnApplicationId);
                }
            }
        }
    } finally {
        try {
            yarnClusterDescriptor.close();
        } catch (Exception e) {
            LOG.info("Could not properly close the yarn cluster descriptor.", e);
        }
    }
    return 0;
}
Also used : ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) SecurityConfiguration(org.apache.flink.runtime.security.SecurityConfiguration) Configuration(org.apache.flink.configuration.Configuration) GlobalConfiguration(org.apache.flink.configuration.GlobalConfiguration) ClusterSpecification(org.apache.flink.client.deployment.ClusterSpecification) FlinkException(org.apache.flink.util.FlinkException) CliArgsException(org.apache.flink.client.cli.CliArgsException) ConfigurationException(org.apache.flink.util.ConfigurationException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) UndeclaredThrowableException(java.lang.reflect.UndeclaredThrowableException) FlinkException(org.apache.flink.util.FlinkException) CommandLine(org.apache.commons.cli.CommandLine) ScheduledExecutorServiceAdapter(org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) YarnClusterDescriptor(org.apache.flink.yarn.YarnClusterDescriptor)

Example 8 with ClusterSpecification

use of org.apache.flink.client.deployment.ClusterSpecification in project flink by apache.

the class FlinkYarnSessionCliTest method testJobManagerMemoryPropertyWithConfigDefaultValue.

/**
 * Tests the specifying job manager total process memory with config default value for job
 * manager and task manager.
 */
@Test
public void testJobManagerMemoryPropertyWithConfigDefaultValue() throws Exception {
    int procMemory = 2048;
    final Configuration configuration = createConfigurationWithJmAndTmTotalMemory(procMemory);
    final FlinkYarnSessionCli flinkYarnSessionCli = createFlinkYarnSessionCli(configuration);
    final CommandLine commandLine = flinkYarnSessionCli.parseCommandLineOptions(new String[0], false);
    configuration.addAll(flinkYarnSessionCli.toConfiguration(commandLine));
    final ClusterClientFactory<ApplicationId> clientFactory = getClusterClientFactory(configuration);
    final ClusterSpecification clusterSpecification = clientFactory.getClusterSpecification(configuration);
    assertThat(clusterSpecification.getMasterMemoryMB(), is(procMemory));
    assertThat(clusterSpecification.getTaskManagerMemoryMB(), is(procMemory));
}
Also used : CustomCommandLine(org.apache.flink.client.cli.CustomCommandLine) CommandLine(org.apache.commons.cli.CommandLine) Configuration(org.apache.flink.configuration.Configuration) ClusterSpecification(org.apache.flink.client.deployment.ClusterSpecification) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) FlinkYarnSessionCli(org.apache.flink.yarn.cli.FlinkYarnSessionCli) Test(org.junit.Test)

Example 9 with ClusterSpecification

use of org.apache.flink.client.deployment.ClusterSpecification in project flink by apache.

the class FlinkYarnSessionCliTest method testHeapMemoryPropertyWithOldConfigKey.

/**
 * Tests the specifying heap memory with old config key for job manager and task manager.
 */
@Test
public void testHeapMemoryPropertyWithOldConfigKey() throws Exception {
    Configuration configuration = new Configuration();
    configuration.setInteger(JobManagerOptions.JOB_MANAGER_HEAP_MEMORY_MB, 2048);
    configuration.setInteger(TaskManagerOptions.TASK_MANAGER_HEAP_MEMORY_MB, 4096);
    final FlinkYarnSessionCli flinkYarnSessionCli = createFlinkYarnSessionCli(configuration);
    final CommandLine commandLine = flinkYarnSessionCli.parseCommandLineOptions(new String[0], false);
    configuration.addAll(flinkYarnSessionCli.toConfiguration(commandLine));
    final ClusterClientFactory<ApplicationId> clientFactory = getClusterClientFactory(configuration);
    final ClusterSpecification clusterSpecification = clientFactory.getClusterSpecification(configuration);
    assertThat(clusterSpecification.getMasterMemoryMB(), is(2048));
    assertThat(clusterSpecification.getTaskManagerMemoryMB(), is(4096));
}
Also used : CustomCommandLine(org.apache.flink.client.cli.CustomCommandLine) CommandLine(org.apache.commons.cli.CommandLine) Configuration(org.apache.flink.configuration.Configuration) ClusterSpecification(org.apache.flink.client.deployment.ClusterSpecification) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) FlinkYarnSessionCli(org.apache.flink.yarn.cli.FlinkYarnSessionCli) Test(org.junit.Test)

Example 10 with ClusterSpecification

use of org.apache.flink.client.deployment.ClusterSpecification in project flink by apache.

the class FlinkYarnSessionCliTest method testCorrectSettingOfMaxSlots.

@Test
public void testCorrectSettingOfMaxSlots() throws Exception {
    String[] params = new String[] { "-ys", "3" };
    final Configuration configuration = createConfigurationWithJmAndTmTotalMemory(2048);
    final FlinkYarnSessionCli yarnCLI = createFlinkYarnSessionCli(configuration);
    final CommandLine commandLine = yarnCLI.parseCommandLineOptions(params, true);
    configuration.addAll(yarnCLI.toConfiguration(commandLine));
    final ClusterClientFactory<ApplicationId> clientFactory = getClusterClientFactory(configuration);
    final ClusterSpecification clusterSpecification = clientFactory.getClusterSpecification(configuration);
    // each task manager has 3 slots but the parallelism is 7. Thus the slots should be
    // increased.
    assertEquals(3, clusterSpecification.getSlotsPerTaskManager());
}
Also used : CustomCommandLine(org.apache.flink.client.cli.CustomCommandLine) CommandLine(org.apache.commons.cli.CommandLine) Configuration(org.apache.flink.configuration.Configuration) ClusterSpecification(org.apache.flink.client.deployment.ClusterSpecification) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) FlinkYarnSessionCli(org.apache.flink.yarn.cli.FlinkYarnSessionCli) Test(org.junit.Test)

Aggregations

ClusterSpecification (org.apache.flink.client.deployment.ClusterSpecification)28 Configuration (org.apache.flink.configuration.Configuration)20 Test (org.junit.Test)17 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)15 CommandLine (org.apache.commons.cli.CommandLine)9 CustomCommandLine (org.apache.flink.client.cli.CustomCommandLine)8 FlinkYarnSessionCli (org.apache.flink.yarn.cli.FlinkYarnSessionCli)8 File (java.io.File)5 ClusterDeploymentException (org.apache.flink.client.deployment.ClusterDeploymentException)4 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)4 FlinkException (org.apache.flink.util.FlinkException)4 Path (org.apache.hadoop.fs.Path)4 IOException (java.io.IOException)3 ClusterRetrieveException (org.apache.flink.client.deployment.ClusterRetrieveException)3 ApplicationConfiguration (org.apache.flink.client.deployment.application.ApplicationConfiguration)3 IllegalConfigurationException (org.apache.flink.configuration.IllegalConfigurationException)3 KubernetesJobManagerParameters (org.apache.flink.kubernetes.kubeclient.parameters.KubernetesJobManagerParameters)3 ClusterEntrypoint (org.apache.flink.runtime.entrypoint.ClusterEntrypoint)3 ApplicationReport (org.apache.hadoop.yarn.api.records.ApplicationReport)3 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)3