Search in sources :

Example 1 with DispatcherId

use of org.apache.flink.runtime.dispatcher.DispatcherId in project flink by apache.

the class DefaultDispatcherResourceManagerComponentFactory method create.

@Override
public DispatcherResourceManagerComponent create(Configuration configuration, ResourceID resourceId, Executor ioExecutor, RpcService rpcService, HighAvailabilityServices highAvailabilityServices, BlobServer blobServer, HeartbeatServices heartbeatServices, MetricRegistry metricRegistry, ExecutionGraphInfoStore executionGraphInfoStore, MetricQueryServiceRetriever metricQueryServiceRetriever, FatalErrorHandler fatalErrorHandler) throws Exception {
    LeaderRetrievalService dispatcherLeaderRetrievalService = null;
    LeaderRetrievalService resourceManagerRetrievalService = null;
    WebMonitorEndpoint<?> webMonitorEndpoint = null;
    ResourceManagerService resourceManagerService = null;
    DispatcherRunner dispatcherRunner = null;
    try {
        dispatcherLeaderRetrievalService = highAvailabilityServices.getDispatcherLeaderRetriever();
        resourceManagerRetrievalService = highAvailabilityServices.getResourceManagerLeaderRetriever();
        final LeaderGatewayRetriever<DispatcherGateway> dispatcherGatewayRetriever = new RpcGatewayRetriever<>(rpcService, DispatcherGateway.class, DispatcherId::fromUuid, new ExponentialBackoffRetryStrategy(12, Duration.ofMillis(10), Duration.ofMillis(50)));
        final LeaderGatewayRetriever<ResourceManagerGateway> resourceManagerGatewayRetriever = new RpcGatewayRetriever<>(rpcService, ResourceManagerGateway.class, ResourceManagerId::fromUuid, new ExponentialBackoffRetryStrategy(12, Duration.ofMillis(10), Duration.ofMillis(50)));
        final ScheduledExecutorService executor = WebMonitorEndpoint.createExecutorService(configuration.getInteger(RestOptions.SERVER_NUM_THREADS), configuration.getInteger(RestOptions.SERVER_THREAD_PRIORITY), "DispatcherRestEndpoint");
        final long updateInterval = configuration.getLong(MetricOptions.METRIC_FETCHER_UPDATE_INTERVAL);
        final MetricFetcher metricFetcher = updateInterval == 0 ? VoidMetricFetcher.INSTANCE : MetricFetcherImpl.fromConfiguration(configuration, metricQueryServiceRetriever, dispatcherGatewayRetriever, executor);
        webMonitorEndpoint = restEndpointFactory.createRestEndpoint(configuration, dispatcherGatewayRetriever, resourceManagerGatewayRetriever, blobServer, executor, metricFetcher, highAvailabilityServices.getClusterRestEndpointLeaderElectionService(), fatalErrorHandler);
        log.debug("Starting Dispatcher REST endpoint.");
        webMonitorEndpoint.start();
        final String hostname = RpcUtils.getHostname(rpcService);
        resourceManagerService = ResourceManagerServiceImpl.create(resourceManagerFactory, configuration, resourceId, rpcService, highAvailabilityServices, heartbeatServices, fatalErrorHandler, new ClusterInformation(hostname, blobServer.getPort()), webMonitorEndpoint.getRestBaseUrl(), metricRegistry, hostname, ioExecutor);
        final HistoryServerArchivist historyServerArchivist = HistoryServerArchivist.createHistoryServerArchivist(configuration, webMonitorEndpoint, ioExecutor);
        final DispatcherOperationCaches dispatcherOperationCaches = new DispatcherOperationCaches(configuration.get(RestOptions.ASYNC_OPERATION_STORE_DURATION));
        final PartialDispatcherServices partialDispatcherServices = new PartialDispatcherServices(configuration, highAvailabilityServices, resourceManagerGatewayRetriever, blobServer, heartbeatServices, () -> JobManagerMetricGroup.createJobManagerMetricGroup(metricRegistry, hostname), executionGraphInfoStore, fatalErrorHandler, historyServerArchivist, metricRegistry.getMetricQueryServiceGatewayRpcAddress(), ioExecutor, dispatcherOperationCaches);
        log.debug("Starting Dispatcher.");
        dispatcherRunner = dispatcherRunnerFactory.createDispatcherRunner(highAvailabilityServices.getDispatcherLeaderElectionService(), fatalErrorHandler, new HaServicesJobPersistenceComponentFactory(highAvailabilityServices), ioExecutor, rpcService, partialDispatcherServices);
        log.debug("Starting ResourceManagerService.");
        resourceManagerService.start();
        resourceManagerRetrievalService.start(resourceManagerGatewayRetriever);
        dispatcherLeaderRetrievalService.start(dispatcherGatewayRetriever);
        return new DispatcherResourceManagerComponent(dispatcherRunner, resourceManagerService, dispatcherLeaderRetrievalService, resourceManagerRetrievalService, webMonitorEndpoint, fatalErrorHandler, dispatcherOperationCaches);
    } catch (Exception exception) {
        // clean up all started components
        if (dispatcherLeaderRetrievalService != null) {
            try {
                dispatcherLeaderRetrievalService.stop();
            } catch (Exception e) {
                exception = ExceptionUtils.firstOrSuppressed(e, exception);
            }
        }
        if (resourceManagerRetrievalService != null) {
            try {
                resourceManagerRetrievalService.stop();
            } catch (Exception e) {
                exception = ExceptionUtils.firstOrSuppressed(e, exception);
            }
        }
        final Collection<CompletableFuture<Void>> terminationFutures = new ArrayList<>(3);
        if (webMonitorEndpoint != null) {
            terminationFutures.add(webMonitorEndpoint.closeAsync());
        }
        if (resourceManagerService != null) {
            terminationFutures.add(resourceManagerService.closeAsync());
        }
        if (dispatcherRunner != null) {
            terminationFutures.add(dispatcherRunner.closeAsync());
        }
        final FutureUtils.ConjunctFuture<Void> terminationFuture = FutureUtils.completeAll(terminationFutures);
        try {
            terminationFuture.get();
        } catch (Exception e) {
            exception = ExceptionUtils.firstOrSuppressed(e, exception);
        }
        throw new FlinkException("Could not create the DispatcherResourceManagerComponent.", exception);
    }
}
Also used : ExponentialBackoffRetryStrategy(org.apache.flink.util.concurrent.ExponentialBackoffRetryStrategy) DispatcherGateway(org.apache.flink.runtime.dispatcher.DispatcherGateway) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) RpcGatewayRetriever(org.apache.flink.runtime.webmonitor.retriever.impl.RpcGatewayRetriever) DispatcherRunner(org.apache.flink.runtime.dispatcher.runner.DispatcherRunner) DispatcherOperationCaches(org.apache.flink.runtime.dispatcher.DispatcherOperationCaches) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) HistoryServerArchivist(org.apache.flink.runtime.dispatcher.HistoryServerArchivist) HaServicesJobPersistenceComponentFactory(org.apache.flink.runtime.jobmanager.HaServicesJobPersistenceComponentFactory) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) PartialDispatcherServices(org.apache.flink.runtime.dispatcher.PartialDispatcherServices) ResourceManagerService(org.apache.flink.runtime.resourcemanager.ResourceManagerService) DispatcherId(org.apache.flink.runtime.dispatcher.DispatcherId) ClusterInformation(org.apache.flink.runtime.entrypoint.ClusterInformation) VoidMetricFetcher(org.apache.flink.runtime.rest.handler.legacy.metrics.VoidMetricFetcher) MetricFetcher(org.apache.flink.runtime.rest.handler.legacy.metrics.MetricFetcher) FlinkException(org.apache.flink.util.FlinkException) FlinkException(org.apache.flink.util.FlinkException) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) Collection(java.util.Collection)

Example 2 with DispatcherId

use of org.apache.flink.runtime.dispatcher.DispatcherId in project flink by apache.

the class ApplicationDispatcherGatewayServiceFactory method create.

@Override
public AbstractDispatcherLeaderProcess.DispatcherGatewayService create(DispatcherId fencingToken, Collection<JobGraph> recoveredJobs, Collection<JobResult> recoveredDirtyJobResults, JobGraphWriter jobGraphWriter, JobResultStore jobResultStore) {
    final List<JobID> recoveredJobIds = getRecoveredJobIds(recoveredJobs);
    final Dispatcher dispatcher;
    try {
        dispatcher = dispatcherFactory.createDispatcher(rpcService, fencingToken, recoveredJobs, recoveredDirtyJobResults, (dispatcherGateway, scheduledExecutor, errorHandler) -> new ApplicationDispatcherBootstrap(application, recoveredJobIds, configuration, dispatcherGateway, scheduledExecutor, errorHandler), PartialDispatcherServicesWithJobPersistenceComponents.from(partialDispatcherServices, jobGraphWriter, jobResultStore));
    } catch (Exception e) {
        throw new FlinkRuntimeException("Could not create the Dispatcher rpc endpoint.", e);
    }
    dispatcher.start();
    return DefaultDispatcherGatewayService.from(dispatcher);
}
Also used : DispatcherId(org.apache.flink.runtime.dispatcher.DispatcherId) Dispatcher(org.apache.flink.runtime.dispatcher.Dispatcher) PartialDispatcherServices(org.apache.flink.runtime.dispatcher.PartialDispatcherServices) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) PartialDispatcherServicesWithJobPersistenceComponents(org.apache.flink.runtime.dispatcher.PartialDispatcherServicesWithJobPersistenceComponents) Collection(java.util.Collection) Configuration(org.apache.flink.configuration.Configuration) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) AbstractDispatcherLeaderProcess(org.apache.flink.runtime.dispatcher.runner.AbstractDispatcherLeaderProcess) Collectors(java.util.stream.Collectors) JobResult(org.apache.flink.runtime.jobmaster.JobResult) List(java.util.List) JobID(org.apache.flink.api.common.JobID) RpcService(org.apache.flink.runtime.rpc.RpcService) Internal(org.apache.flink.annotation.Internal) PackagedProgram(org.apache.flink.client.program.PackagedProgram) DispatcherFactory(org.apache.flink.runtime.dispatcher.DispatcherFactory) JobResultStore(org.apache.flink.runtime.highavailability.JobResultStore) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) JobGraphWriter(org.apache.flink.runtime.jobmanager.JobGraphWriter) DefaultDispatcherGatewayService(org.apache.flink.runtime.dispatcher.runner.DefaultDispatcherGatewayService) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) Dispatcher(org.apache.flink.runtime.dispatcher.Dispatcher) JobID(org.apache.flink.api.common.JobID) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException)

Example 3 with DispatcherId

use of org.apache.flink.runtime.dispatcher.DispatcherId in project flink by apache.

the class DefaultDispatcherGatewayServiceFactory method create.

@Override
public AbstractDispatcherLeaderProcess.DispatcherGatewayService create(DispatcherId fencingToken, Collection<JobGraph> recoveredJobs, Collection<JobResult> recoveredDirtyJobResults, JobGraphWriter jobGraphWriter, JobResultStore jobResultStore) {
    final Dispatcher dispatcher;
    try {
        dispatcher = dispatcherFactory.createDispatcher(rpcService, fencingToken, recoveredJobs, recoveredDirtyJobResults, (dispatcherGateway, scheduledExecutor, errorHandler) -> new NoOpDispatcherBootstrap(), PartialDispatcherServicesWithJobPersistenceComponents.from(partialDispatcherServices, jobGraphWriter, jobResultStore));
    } catch (Exception e) {
        throw new FlinkRuntimeException("Could not create the Dispatcher rpc endpoint.", e);
    }
    dispatcher.start();
    return DefaultDispatcherGatewayService.from(dispatcher);
}
Also used : DispatcherId(org.apache.flink.runtime.dispatcher.DispatcherId) Dispatcher(org.apache.flink.runtime.dispatcher.Dispatcher) PartialDispatcherServices(org.apache.flink.runtime.dispatcher.PartialDispatcherServices) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) PartialDispatcherServicesWithJobPersistenceComponents(org.apache.flink.runtime.dispatcher.PartialDispatcherServicesWithJobPersistenceComponents) Collection(java.util.Collection) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobResult(org.apache.flink.runtime.jobmaster.JobResult) RpcService(org.apache.flink.runtime.rpc.RpcService) NoOpDispatcherBootstrap(org.apache.flink.runtime.dispatcher.NoOpDispatcherBootstrap) DispatcherFactory(org.apache.flink.runtime.dispatcher.DispatcherFactory) JobResultStore(org.apache.flink.runtime.highavailability.JobResultStore) JobGraphWriter(org.apache.flink.runtime.jobmanager.JobGraphWriter) NoOpDispatcherBootstrap(org.apache.flink.runtime.dispatcher.NoOpDispatcherBootstrap) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) Dispatcher(org.apache.flink.runtime.dispatcher.Dispatcher) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException)

Aggregations

Collection (java.util.Collection)3 DispatcherId (org.apache.flink.runtime.dispatcher.DispatcherId)3 PartialDispatcherServices (org.apache.flink.runtime.dispatcher.PartialDispatcherServices)3 Dispatcher (org.apache.flink.runtime.dispatcher.Dispatcher)2 DispatcherFactory (org.apache.flink.runtime.dispatcher.DispatcherFactory)2 PartialDispatcherServicesWithJobPersistenceComponents (org.apache.flink.runtime.dispatcher.PartialDispatcherServicesWithJobPersistenceComponents)2 JobResultStore (org.apache.flink.runtime.highavailability.JobResultStore)2 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)2 JobGraphWriter (org.apache.flink.runtime.jobmanager.JobGraphWriter)2 JobResult (org.apache.flink.runtime.jobmaster.JobResult)2 RpcService (org.apache.flink.runtime.rpc.RpcService)2 FlinkRuntimeException (org.apache.flink.util.FlinkRuntimeException)2 List (java.util.List)1 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)1 Collectors (java.util.stream.Collectors)1 Internal (org.apache.flink.annotation.Internal)1 JobID (org.apache.flink.api.common.JobID)1 PackagedProgram (org.apache.flink.client.program.PackagedProgram)1 Configuration (org.apache.flink.configuration.Configuration)1 DispatcherGateway (org.apache.flink.runtime.dispatcher.DispatcherGateway)1