Search in sources :

Example 11 with ExecutorThreadFactory

use of org.apache.flink.util.concurrent.ExecutorThreadFactory in project flink by apache.

the class TaskManagerRunner method startTaskManager.

public static TaskExecutor startTaskManager(Configuration configuration, ResourceID resourceID, RpcService rpcService, HighAvailabilityServices highAvailabilityServices, HeartbeatServices heartbeatServices, MetricRegistry metricRegistry, TaskExecutorBlobService taskExecutorBlobService, boolean localCommunicationOnly, ExternalResourceInfoProvider externalResourceInfoProvider, WorkingDirectory workingDirectory, FatalErrorHandler fatalErrorHandler) throws Exception {
    checkNotNull(configuration);
    checkNotNull(resourceID);
    checkNotNull(rpcService);
    checkNotNull(highAvailabilityServices);
    LOG.info("Starting TaskManager with ResourceID: {}", resourceID.getStringWithMetadata());
    String externalAddress = rpcService.getAddress();
    final TaskExecutorResourceSpec taskExecutorResourceSpec = TaskExecutorResourceUtils.resourceSpecFromConfig(configuration);
    TaskManagerServicesConfiguration taskManagerServicesConfiguration = TaskManagerServicesConfiguration.fromConfiguration(configuration, resourceID, externalAddress, localCommunicationOnly, taskExecutorResourceSpec, workingDirectory);
    Tuple2<TaskManagerMetricGroup, MetricGroup> taskManagerMetricGroup = MetricUtils.instantiateTaskManagerMetricGroup(metricRegistry, externalAddress, resourceID, taskManagerServicesConfiguration.getSystemResourceMetricsProbingInterval());
    final ExecutorService ioExecutor = Executors.newFixedThreadPool(taskManagerServicesConfiguration.getNumIoThreads(), new ExecutorThreadFactory("flink-taskexecutor-io"));
    TaskManagerServices taskManagerServices = TaskManagerServices.fromConfiguration(taskManagerServicesConfiguration, taskExecutorBlobService.getPermanentBlobService(), taskManagerMetricGroup.f1, ioExecutor, fatalErrorHandler, workingDirectory);
    MetricUtils.instantiateFlinkMemoryMetricGroup(taskManagerMetricGroup.f1, taskManagerServices.getTaskSlotTable(), taskManagerServices::getManagedMemorySize);
    TaskManagerConfiguration taskManagerConfiguration = TaskManagerConfiguration.fromConfiguration(configuration, taskExecutorResourceSpec, externalAddress, workingDirectory.getTmpDirectory());
    String metricQueryServiceAddress = metricRegistry.getMetricQueryServiceGatewayRpcAddress();
    return new TaskExecutor(rpcService, taskManagerConfiguration, highAvailabilityServices, taskManagerServices, externalResourceInfoProvider, heartbeatServices, taskManagerMetricGroup.f0, metricQueryServiceAddress, taskExecutorBlobService, fatalErrorHandler, new TaskExecutorPartitionTrackerImpl(taskManagerServices.getShuffleEnvironment()));
}
Also used : TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) MetricGroup(org.apache.flink.metrics.MetricGroup) TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) ExecutorThreadFactory(org.apache.flink.util.concurrent.ExecutorThreadFactory) TaskExecutorPartitionTrackerImpl(org.apache.flink.runtime.io.network.partition.TaskExecutorPartitionTrackerImpl) ExecutorService(java.util.concurrent.ExecutorService)

Example 12 with ExecutorThreadFactory

use of org.apache.flink.util.concurrent.ExecutorThreadFactory in project flink by apache.

the class MiniCluster method start.

/**
 * Starts the mini cluster, based on the configured properties.
 *
 * @throws Exception This method passes on any exception that occurs during the startup of the
 *     mini cluster.
 */
public void start() throws Exception {
    synchronized (lock) {
        checkState(!running, "MiniCluster is already running");
        LOG.info("Starting Flink Mini Cluster");
        LOG.debug("Using configuration {}", miniClusterConfiguration);
        final Configuration configuration = miniClusterConfiguration.getConfiguration();
        final boolean useSingleRpcService = miniClusterConfiguration.getRpcServiceSharing() == RpcServiceSharing.SHARED;
        try {
            workingDirectory = WorkingDirectory.create(ClusterEntrypointUtils.generateWorkingDirectoryFile(configuration, Optional.empty(), "minicluster_" + ResourceID.generate()));
            initializeIOFormatClasses(configuration);
            rpcSystem = rpcSystemSupplier.get();
            LOG.info("Starting Metrics Registry");
            metricRegistry = createMetricRegistry(configuration, rpcSystem.deref().getMaximumMessageSizeInBytes(configuration));
            // bring up all the RPC services
            LOG.info("Starting RPC Service(s)");
            final RpcServiceFactory dispatcherResourceManagerComponentRpcServiceFactory;
            final RpcService metricQueryServiceRpcService;
            if (useSingleRpcService) {
                // we always need the 'commonRpcService' for auxiliary calls
                commonRpcService = createLocalRpcService(configuration, rpcSystem.deref());
                final CommonRpcServiceFactory commonRpcServiceFactory = new CommonRpcServiceFactory(commonRpcService);
                taskManagerRpcServiceFactory = commonRpcServiceFactory;
                dispatcherResourceManagerComponentRpcServiceFactory = commonRpcServiceFactory;
                metricQueryServiceRpcService = MetricUtils.startLocalMetricsRpcService(configuration, rpcSystem.deref());
            } else {
                // start a new service per component, possibly with custom bind addresses
                final String jobManagerExternalAddress = miniClusterConfiguration.getJobManagerExternalAddress();
                final String taskManagerExternalAddress = miniClusterConfiguration.getTaskManagerExternalAddress();
                final String jobManagerExternalPortRange = miniClusterConfiguration.getJobManagerExternalPortRange();
                final String taskManagerExternalPortRange = miniClusterConfiguration.getTaskManagerExternalPortRange();
                final String jobManagerBindAddress = miniClusterConfiguration.getJobManagerBindAddress();
                final String taskManagerBindAddress = miniClusterConfiguration.getTaskManagerBindAddress();
                dispatcherResourceManagerComponentRpcServiceFactory = new DedicatedRpcServiceFactory(configuration, jobManagerExternalAddress, jobManagerExternalPortRange, jobManagerBindAddress, rpcSystem.deref());
                taskManagerRpcServiceFactory = new DedicatedRpcServiceFactory(configuration, taskManagerExternalAddress, taskManagerExternalPortRange, taskManagerBindAddress, rpcSystem.deref());
                // we always need the 'commonRpcService' for auxiliary calls
                // bind to the JobManager address with port 0
                commonRpcService = createRemoteRpcService(configuration, jobManagerBindAddress, 0, rpcSystem.deref());
                metricQueryServiceRpcService = MetricUtils.startRemoteMetricsRpcService(configuration, commonRpcService.getAddress(), null, rpcSystem.deref());
            }
            metricRegistry.startQueryService(metricQueryServiceRpcService, null);
            processMetricGroup = MetricUtils.instantiateProcessMetricGroup(metricRegistry, RpcUtils.getHostname(commonRpcService), ConfigurationUtils.getSystemResourceMetricsProbingInterval(configuration));
            ioExecutor = Executors.newFixedThreadPool(ClusterEntrypointUtils.getPoolSize(configuration), new ExecutorThreadFactory("mini-cluster-io"));
            haServices = createHighAvailabilityServices(configuration, ioExecutor);
            blobServer = BlobUtils.createBlobServer(configuration, Reference.borrowed(workingDirectory.getBlobStorageDirectory()), haServices.createBlobStore());
            blobServer.start();
            heartbeatServices = HeartbeatServices.fromConfiguration(configuration);
            blobCacheService = BlobUtils.createBlobCacheService(configuration, Reference.borrowed(workingDirectory.getBlobStorageDirectory()), haServices.createBlobStore(), new InetSocketAddress(InetAddress.getLocalHost(), blobServer.getPort()));
            startTaskManagers();
            MetricQueryServiceRetriever metricQueryServiceRetriever = new RpcMetricQueryServiceRetriever(metricRegistry.getMetricQueryServiceRpcService());
            setupDispatcherResourceManagerComponents(configuration, dispatcherResourceManagerComponentRpcServiceFactory, metricQueryServiceRetriever);
            resourceManagerLeaderRetriever = haServices.getResourceManagerLeaderRetriever();
            dispatcherLeaderRetriever = haServices.getDispatcherLeaderRetriever();
            clusterRestEndpointLeaderRetrievalService = haServices.getClusterRestEndpointLeaderRetriever();
            dispatcherGatewayRetriever = new RpcGatewayRetriever<>(commonRpcService, DispatcherGateway.class, DispatcherId::fromUuid, new ExponentialBackoffRetryStrategy(21, Duration.ofMillis(5L), Duration.ofMillis(20L)));
            resourceManagerGatewayRetriever = new RpcGatewayRetriever<>(commonRpcService, ResourceManagerGateway.class, ResourceManagerId::fromUuid, new ExponentialBackoffRetryStrategy(21, Duration.ofMillis(5L), Duration.ofMillis(20L)));
            webMonitorLeaderRetriever = new LeaderRetriever();
            resourceManagerLeaderRetriever.start(resourceManagerGatewayRetriever);
            dispatcherLeaderRetriever.start(dispatcherGatewayRetriever);
            clusterRestEndpointLeaderRetrievalService.start(webMonitorLeaderRetriever);
        } catch (Exception e) {
            // cleanup everything
            try {
                close();
            } catch (Exception ee) {
                e.addSuppressed(ee);
            }
            throw e;
        }
        // create a new termination future
        terminationFuture = new CompletableFuture<>();
        // now officially mark this as running
        running = true;
        LOG.info("Flink Mini Cluster started successfully");
    }
}
Also used : RpcMetricQueryServiceRetriever(org.apache.flink.runtime.webmonitor.retriever.impl.RpcMetricQueryServiceRetriever) MetricQueryServiceRetriever(org.apache.flink.runtime.webmonitor.retriever.MetricQueryServiceRetriever) ExponentialBackoffRetryStrategy(org.apache.flink.util.concurrent.ExponentialBackoffRetryStrategy) MetricRegistryConfiguration(org.apache.flink.runtime.metrics.MetricRegistryConfiguration) Configuration(org.apache.flink.configuration.Configuration) InetSocketAddress(java.net.InetSocketAddress) RpcMetricQueryServiceRetriever(org.apache.flink.runtime.webmonitor.retriever.impl.RpcMetricQueryServiceRetriever) DispatcherGateway(org.apache.flink.runtime.dispatcher.DispatcherGateway) FlinkException(org.apache.flink.util.FlinkException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) CompletionException(java.util.concurrent.CompletionException) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) ExecutorThreadFactory(org.apache.flink.util.concurrent.ExecutorThreadFactory) RpcService(org.apache.flink.runtime.rpc.RpcService) LeaderRetriever(org.apache.flink.runtime.webmonitor.retriever.LeaderRetriever)

Aggregations

ExecutorThreadFactory (org.apache.flink.util.concurrent.ExecutorThreadFactory)12 ExecutorService (java.util.concurrent.ExecutorService)7 IOException (java.io.IOException)5 ExecutionException (java.util.concurrent.ExecutionException)3 RpcService (org.apache.flink.runtime.rpc.RpcService)3 InetSocketAddress (java.net.InetSocketAddress)2 CompletableFuture (java.util.concurrent.CompletableFuture)2 VisibleForTesting (org.apache.flink.annotation.VisibleForTesting)2 Configuration (org.apache.flink.configuration.Configuration)2 IllegalConfigurationException (org.apache.flink.configuration.IllegalConfigurationException)2 MetricGroup (org.apache.flink.metrics.MetricGroup)2 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)2 ActorRef (akka.actor.ActorRef)1 File (java.io.File)1 UndeclaredThrowableException (java.lang.reflect.UndeclaredThrowableException)1 BindException (java.net.BindException)1 InetAddress (java.net.InetAddress)1 URL (java.net.URL)1 SQLException (java.sql.SQLException)1 Duration (java.time.Duration)1