Search in sources :

Example 1 with TaskManagerRunner

use of org.apache.flink.runtime.taskexecutor.TaskManagerRunner in project flink by apache.

the class YarnTaskExecutorRunner method runTaskExecutor.

// ------------------------------------------------------------------------
//  Core work method
// ------------------------------------------------------------------------
/**
	 * The main work method, must run as a privileged action.
	 *
	 * @return The return code for the Java process.
	 */
protected int runTaskExecutor(Configuration config) {
    try {
        // ---- (1) create common services
        // first get the ResouceId, resource id is the container id for yarn.
        final String containerId = ENV.get(YarnFlinkResourceManager.ENV_FLINK_CONTAINER_ID);
        Preconditions.checkArgument(containerId != null, "ContainerId variable %s not set", YarnFlinkResourceManager.ENV_FLINK_CONTAINER_ID);
        // use the hostname passed by job manager
        final String taskExecutorHostname = ENV.get(YarnResourceManager.ENV_FLINK_NODE_ID);
        if (taskExecutorHostname != null) {
            config.setString(ConfigConstants.TASK_MANAGER_HOSTNAME_KEY, taskExecutorHostname);
        }
        ResourceID resourceID = new ResourceID(containerId);
        LOG.info("YARN assigned resource id {} for the task executor.", resourceID.toString());
        haServices = HighAvailabilityServicesUtils.createAvailableOrEmbeddedServices(config);
        HeartbeatServices heartbeatServices = HeartbeatServices.fromConfiguration(config);
        metricRegistry = new MetricRegistry(MetricRegistryConfiguration.fromConfiguration(config));
        // ---- (2) init task manager runner -------
        taskExecutorRpcService = TaskManagerRunner.createRpcService(config, haServices);
        taskManagerRunner = new TaskManagerRunner(config, resourceID, taskExecutorRpcService, haServices, heartbeatServices, metricRegistry);
        // ---- (3) start the task manager runner
        taskManagerRunner.start();
        LOG.debug("YARN task executor started");
        taskManagerRunner.getTerminationFuture().get();
        // everything started, we can wait until all is done or the process is killed
        LOG.info("YARN task manager runner finished");
        shutdown();
    } catch (Throwable t) {
        // make sure that everything whatever ends up in the log
        LOG.error("YARN task executor initialization failed", t);
        shutdown();
        return INIT_ERROR_EXIT_CODE;
    }
    return 0;
}
Also used : HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TaskManagerRunner(org.apache.flink.runtime.taskexecutor.TaskManagerRunner) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry)

Example 2 with TaskManagerRunner

use of org.apache.flink.runtime.taskexecutor.TaskManagerRunner in project flink by apache.

the class MiniCluster method shutdownInternally.

@GuardedBy("lock")
private void shutdownInternally() throws Exception {
    // this should always be called under the lock
    assert Thread.holdsLock(lock);
    // collect the first exception, but continue and add all successive
    // exceptions as suppressed
    Throwable exception = null;
    // cancel all jobs and shut down the job dispatcher
    if (jobDispatcher != null) {
        try {
            jobDispatcher.shutdown();
        } catch (Exception e) {
            exception = e;
        }
        jobDispatcher = null;
    }
    if (resourceManagerRunners != null) {
        for (ResourceManagerRunner rm : resourceManagerRunners) {
            if (rm != null) {
                try {
                    rm.shutDown();
                } catch (Throwable t) {
                    exception = firstOrSuppressed(t, exception);
                }
            }
        }
        resourceManagerRunners = null;
    }
    if (taskManagerRunners != null) {
        for (TaskManagerRunner tm : taskManagerRunners) {
            if (tm != null) {
                try {
                    tm.shutDown(null);
                } catch (Throwable t) {
                    exception = firstOrSuppressed(t, exception);
                }
            }
        }
        taskManagerRunners = null;
    }
    // shut down the RpcServices
    exception = shutDownRpc(commonRpcService, exception);
    exception = shutDownRpcs(jobManagerRpcServices, exception);
    exception = shutDownRpcs(taskManagerRpcServices, exception);
    exception = shutDownRpcs(resourceManagerRpcServices, exception);
    commonRpcService = null;
    jobManagerRpcServices = null;
    taskManagerRpcServices = null;
    resourceManagerRpcServices = null;
    // shut down high-availability services
    if (haServices != null) {
        try {
            haServices.closeAndCleanupAllData();
        } catch (Exception e) {
            exception = firstOrSuppressed(e, exception);
        }
        haServices = null;
    }
    // metrics shutdown
    if (metricRegistry != null) {
        metricRegistry.shutdown();
        metricRegistry = null;
    }
    // if anything went wrong, throw the first error with all the additional suppressed exceptions
    if (exception != null) {
        ExceptionUtils.rethrowException(exception, "Error while shutting down mini cluster");
    }
}
Also used : ResourceManagerRunner(org.apache.flink.runtime.resourcemanager.ResourceManagerRunner) TaskManagerRunner(org.apache.flink.runtime.taskexecutor.TaskManagerRunner) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) GuardedBy(javax.annotation.concurrent.GuardedBy)

Example 3 with TaskManagerRunner

use of org.apache.flink.runtime.taskexecutor.TaskManagerRunner in project flink by apache.

the class MiniCluster method startTaskManagers.

protected TaskManagerRunner[] startTaskManagers(Configuration configuration, HighAvailabilityServices haServices, MetricRegistry metricRegistry, int numTaskManagers, RpcService[] taskManagerRpcServices) throws Exception {
    final TaskManagerRunner[] taskManagerRunners = new TaskManagerRunner[numTaskManagers];
    final boolean localCommunication = numTaskManagers == 1;
    for (int i = 0; i < numTaskManagers; i++) {
        taskManagerRunners[i] = new TaskManagerRunner(configuration, new ResourceID(UUID.randomUUID().toString()), taskManagerRpcServices[i], haServices, heartbeatServices, metricRegistry, localCommunication);
        taskManagerRunners[i].start();
    }
    return taskManagerRunners;
}
Also used : ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TaskManagerRunner(org.apache.flink.runtime.taskexecutor.TaskManagerRunner)

Aggregations

TaskManagerRunner (org.apache.flink.runtime.taskexecutor.TaskManagerRunner)3 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)2 GuardedBy (javax.annotation.concurrent.GuardedBy)1 JobExecutionException (org.apache.flink.runtime.client.JobExecutionException)1 HeartbeatServices (org.apache.flink.runtime.heartbeat.HeartbeatServices)1 MetricRegistry (org.apache.flink.runtime.metrics.MetricRegistry)1 ResourceManagerRunner (org.apache.flink.runtime.resourcemanager.ResourceManagerRunner)1