Search in sources :

Example 1 with MetricQueryServiceRetriever

use of org.apache.flink.runtime.webmonitor.retriever.MetricQueryServiceRetriever in project flink by apache.

the class MiniCluster method setupDispatcherResourceManagerComponents.

@GuardedBy("lock")
private void setupDispatcherResourceManagerComponents(Configuration configuration, RpcServiceFactory dispatcherResourceManagerComponentRpcServiceFactory, MetricQueryServiceRetriever metricQueryServiceRetriever) throws Exception {
    dispatcherResourceManagerComponents.addAll(createDispatcherResourceManagerComponents(configuration, dispatcherResourceManagerComponentRpcServiceFactory, haServices, blobServer, heartbeatServices, metricRegistry, metricQueryServiceRetriever, new ShutDownFatalErrorHandler()));
    final Collection<CompletableFuture<ApplicationStatus>> shutDownFutures = new ArrayList<>(dispatcherResourceManagerComponents.size());
    for (DispatcherResourceManagerComponent dispatcherResourceManagerComponent : dispatcherResourceManagerComponents) {
        final CompletableFuture<ApplicationStatus> shutDownFuture = dispatcherResourceManagerComponent.getShutDownFuture();
        FutureUtils.assertNoException(shutDownFuture.thenCompose(applicationStatus -> dispatcherResourceManagerComponent.stopApplication(applicationStatus, null)));
        shutDownFutures.add(shutDownFuture);
    }
    FutureUtils.completeAll(shutDownFutures).whenComplete((ignored, exception) -> closeAsync());
}
Also used : InetAddress(java.net.InetAddress) ClusterOverview(org.apache.flink.runtime.messages.webmonitor.ClusterOverview) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) FunctionUtils(org.apache.flink.util.function.FunctionUtils) Duration(java.time.Duration) JobStatusMessage(org.apache.flink.runtime.client.JobStatusMessage) HighAvailabilityServicesUtils(org.apache.flink.runtime.highavailability.HighAvailabilityServicesUtils) GuardedBy(javax.annotation.concurrent.GuardedBy) ClientUtils(org.apache.flink.runtime.client.ClientUtils) Executors(java.util.concurrent.Executors) AccessExecutionGraph(org.apache.flink.runtime.executiongraph.AccessExecutionGraph) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) ClusterEntrypointUtils(org.apache.flink.runtime.entrypoint.ClusterEntrypointUtils) Time(org.apache.flink.api.common.time.Time) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) FlinkException(org.apache.flink.util.FlinkException) BlobServer(org.apache.flink.runtime.blob.BlobServer) CoordinationResponse(org.apache.flink.runtime.operators.coordination.CoordinationResponse) MetricRegistryImpl(org.apache.flink.runtime.metrics.MetricRegistryImpl) SavepointConfigOptions(org.apache.flink.runtime.jobgraph.SavepointConfigOptions) JobStatus(org.apache.flink.api.common.JobStatus) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) RpcService(org.apache.flink.runtime.rpc.RpcService) ResourceOverview(org.apache.flink.runtime.resourcemanager.ResourceOverview) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) FileOutputFormat(org.apache.flink.api.common.io.FileOutputFormat) Executor(java.util.concurrent.Executor) ApplicationStatus(org.apache.flink.runtime.clusterframework.ApplicationStatus) TaskExecutor(org.apache.flink.runtime.taskexecutor.TaskExecutor) IOException(java.io.IOException) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) ExecutionException(java.util.concurrent.ExecutionException) JobID(org.apache.flink.api.common.JobID) ConfigurationUtils(org.apache.flink.configuration.ConfigurationUtils) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) ProcessMetricGroup(org.apache.flink.runtime.metrics.groups.ProcessMetricGroup) ClusterOptions(org.apache.flink.configuration.ClusterOptions) EmbeddedHaServicesWithLeadershipControl(org.apache.flink.runtime.highavailability.nonha.embedded.EmbeddedHaServicesWithLeadershipControl) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) LoggerFactory(org.slf4j.LoggerFactory) ExceptionUtils(org.apache.flink.util.ExceptionUtils) ReporterSetup(org.apache.flink.runtime.metrics.ReporterSetup) InstantiationUtil(org.apache.flink.util.InstantiationUtil) URI(java.net.URI) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) TaskManagerRunner(org.apache.flink.runtime.taskexecutor.TaskManagerRunner) TriggerSavepointMode(org.apache.flink.runtime.dispatcher.TriggerSavepointMode) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) LeaderRetriever(org.apache.flink.runtime.webmonitor.retriever.LeaderRetriever) ExecutorThreadFactory(org.apache.flink.util.concurrent.ExecutorThreadFactory) Collection(java.util.Collection) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) CompletionException(java.util.concurrent.CompletionException) MetricUtils(org.apache.flink.runtime.metrics.util.MetricUtils) UUID(java.util.UUID) InetSocketAddress(java.net.InetSocketAddress) Collectors(java.util.stream.Collectors) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ExecutorUtils(org.apache.flink.util.ExecutorUtils) JobExecutionResult(org.apache.flink.api.common.JobExecutionResult) List(java.util.List) SerializedValue(org.apache.flink.util.SerializedValue) CoordinationRequest(org.apache.flink.runtime.operators.coordination.CoordinationRequest) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) Optional(java.util.Optional) DispatcherId(org.apache.flink.runtime.dispatcher.DispatcherId) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) HaLeadershipControl(org.apache.flink.runtime.highavailability.nonha.embedded.HaLeadershipControl) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) RpcMetricQueryServiceRetriever(org.apache.flink.runtime.webmonitor.retriever.impl.RpcMetricQueryServiceRetriever) CompletableFuture(java.util.concurrent.CompletableFuture) DispatcherGateway(org.apache.flink.runtime.dispatcher.DispatcherGateway) Function(java.util.function.Function) MetricRegistryConfiguration(org.apache.flink.runtime.metrics.MetricRegistryConfiguration) RestoreMode(org.apache.flink.runtime.jobgraph.RestoreMode) JobResult(org.apache.flink.runtime.jobmaster.JobResult) DefaultDispatcherResourceManagerComponentFactory(org.apache.flink.runtime.entrypoint.component.DefaultDispatcherResourceManagerComponentFactory) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) ExternalResourceInfoProvider(org.apache.flink.runtime.externalresource.ExternalResourceInfoProvider) ClusterInformation(org.apache.flink.runtime.entrypoint.ClusterInformation) FatalErrorHandler(org.apache.flink.runtime.rpc.FatalErrorHandler) RpcGatewayRetriever(org.apache.flink.runtime.webmonitor.retriever.impl.RpcGatewayRetriever) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Nonnull(javax.annotation.Nonnull) JobSubmissionResult(org.apache.flink.api.common.JobSubmissionResult) ExecutorService(java.util.concurrent.ExecutorService) MetricQueryServiceRetriever(org.apache.flink.runtime.webmonitor.retriever.MetricQueryServiceRetriever) DispatcherResourceManagerComponentFactory(org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponentFactory) ArchivedExecutionGraph(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph) Preconditions.checkState(org.apache.flink.util.Preconditions.checkState) Logger(org.slf4j.Logger) AutoCloseableAsync(org.apache.flink.util.AutoCloseableAsync) ExponentialBackoffRetryStrategy(org.apache.flink.util.concurrent.ExponentialBackoffRetryStrategy) Configuration(org.apache.flink.configuration.Configuration) Reference(org.apache.flink.util.Reference) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) TimeUnit(java.util.concurrent.TimeUnit) RpcSystem(org.apache.flink.runtime.rpc.RpcSystem) WorkingDirectory(org.apache.flink.runtime.entrypoint.WorkingDirectory) DispatcherResourceManagerComponent(org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponent) MemoryExecutionGraphInfoStore(org.apache.flink.runtime.dispatcher.MemoryExecutionGraphInfoStore) Internal(org.apache.flink.annotation.Internal) BlobCacheService(org.apache.flink.runtime.blob.BlobCacheService) BlobClient(org.apache.flink.runtime.blob.BlobClient) BlobUtils(org.apache.flink.runtime.blob.BlobUtils) StandaloneResourceManagerFactory(org.apache.flink.runtime.resourcemanager.StandaloneResourceManagerFactory) Collections(java.util.Collections) HighAvailabilityOptions(org.apache.flink.configuration.HighAvailabilityOptions) CompletableFuture(java.util.concurrent.CompletableFuture) DispatcherResourceManagerComponent(org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponent) ApplicationStatus(org.apache.flink.runtime.clusterframework.ApplicationStatus) ArrayList(java.util.ArrayList) GuardedBy(javax.annotation.concurrent.GuardedBy)

Example 2 with MetricQueryServiceRetriever

use of org.apache.flink.runtime.webmonitor.retriever.MetricQueryServiceRetriever in project flink by apache.

the class MiniCluster method start.

/**
 * Starts the mini cluster, based on the configured properties.
 *
 * @throws Exception This method passes on any exception that occurs during the startup of the
 *     mini cluster.
 */
public void start() throws Exception {
    synchronized (lock) {
        checkState(!running, "MiniCluster is already running");
        LOG.info("Starting Flink Mini Cluster");
        LOG.debug("Using configuration {}", miniClusterConfiguration);
        final Configuration configuration = miniClusterConfiguration.getConfiguration();
        final boolean useSingleRpcService = miniClusterConfiguration.getRpcServiceSharing() == RpcServiceSharing.SHARED;
        try {
            workingDirectory = WorkingDirectory.create(ClusterEntrypointUtils.generateWorkingDirectoryFile(configuration, Optional.empty(), "minicluster_" + ResourceID.generate()));
            initializeIOFormatClasses(configuration);
            rpcSystem = rpcSystemSupplier.get();
            LOG.info("Starting Metrics Registry");
            metricRegistry = createMetricRegistry(configuration, rpcSystem.deref().getMaximumMessageSizeInBytes(configuration));
            // bring up all the RPC services
            LOG.info("Starting RPC Service(s)");
            final RpcServiceFactory dispatcherResourceManagerComponentRpcServiceFactory;
            final RpcService metricQueryServiceRpcService;
            if (useSingleRpcService) {
                // we always need the 'commonRpcService' for auxiliary calls
                commonRpcService = createLocalRpcService(configuration, rpcSystem.deref());
                final CommonRpcServiceFactory commonRpcServiceFactory = new CommonRpcServiceFactory(commonRpcService);
                taskManagerRpcServiceFactory = commonRpcServiceFactory;
                dispatcherResourceManagerComponentRpcServiceFactory = commonRpcServiceFactory;
                metricQueryServiceRpcService = MetricUtils.startLocalMetricsRpcService(configuration, rpcSystem.deref());
            } else {
                // start a new service per component, possibly with custom bind addresses
                final String jobManagerExternalAddress = miniClusterConfiguration.getJobManagerExternalAddress();
                final String taskManagerExternalAddress = miniClusterConfiguration.getTaskManagerExternalAddress();
                final String jobManagerExternalPortRange = miniClusterConfiguration.getJobManagerExternalPortRange();
                final String taskManagerExternalPortRange = miniClusterConfiguration.getTaskManagerExternalPortRange();
                final String jobManagerBindAddress = miniClusterConfiguration.getJobManagerBindAddress();
                final String taskManagerBindAddress = miniClusterConfiguration.getTaskManagerBindAddress();
                dispatcherResourceManagerComponentRpcServiceFactory = new DedicatedRpcServiceFactory(configuration, jobManagerExternalAddress, jobManagerExternalPortRange, jobManagerBindAddress, rpcSystem.deref());
                taskManagerRpcServiceFactory = new DedicatedRpcServiceFactory(configuration, taskManagerExternalAddress, taskManagerExternalPortRange, taskManagerBindAddress, rpcSystem.deref());
                // we always need the 'commonRpcService' for auxiliary calls
                // bind to the JobManager address with port 0
                commonRpcService = createRemoteRpcService(configuration, jobManagerBindAddress, 0, rpcSystem.deref());
                metricQueryServiceRpcService = MetricUtils.startRemoteMetricsRpcService(configuration, commonRpcService.getAddress(), null, rpcSystem.deref());
            }
            metricRegistry.startQueryService(metricQueryServiceRpcService, null);
            processMetricGroup = MetricUtils.instantiateProcessMetricGroup(metricRegistry, RpcUtils.getHostname(commonRpcService), ConfigurationUtils.getSystemResourceMetricsProbingInterval(configuration));
            ioExecutor = Executors.newFixedThreadPool(ClusterEntrypointUtils.getPoolSize(configuration), new ExecutorThreadFactory("mini-cluster-io"));
            haServices = createHighAvailabilityServices(configuration, ioExecutor);
            blobServer = BlobUtils.createBlobServer(configuration, Reference.borrowed(workingDirectory.getBlobStorageDirectory()), haServices.createBlobStore());
            blobServer.start();
            heartbeatServices = HeartbeatServices.fromConfiguration(configuration);
            blobCacheService = BlobUtils.createBlobCacheService(configuration, Reference.borrowed(workingDirectory.getBlobStorageDirectory()), haServices.createBlobStore(), new InetSocketAddress(InetAddress.getLocalHost(), blobServer.getPort()));
            startTaskManagers();
            MetricQueryServiceRetriever metricQueryServiceRetriever = new RpcMetricQueryServiceRetriever(metricRegistry.getMetricQueryServiceRpcService());
            setupDispatcherResourceManagerComponents(configuration, dispatcherResourceManagerComponentRpcServiceFactory, metricQueryServiceRetriever);
            resourceManagerLeaderRetriever = haServices.getResourceManagerLeaderRetriever();
            dispatcherLeaderRetriever = haServices.getDispatcherLeaderRetriever();
            clusterRestEndpointLeaderRetrievalService = haServices.getClusterRestEndpointLeaderRetriever();
            dispatcherGatewayRetriever = new RpcGatewayRetriever<>(commonRpcService, DispatcherGateway.class, DispatcherId::fromUuid, new ExponentialBackoffRetryStrategy(21, Duration.ofMillis(5L), Duration.ofMillis(20L)));
            resourceManagerGatewayRetriever = new RpcGatewayRetriever<>(commonRpcService, ResourceManagerGateway.class, ResourceManagerId::fromUuid, new ExponentialBackoffRetryStrategy(21, Duration.ofMillis(5L), Duration.ofMillis(20L)));
            webMonitorLeaderRetriever = new LeaderRetriever();
            resourceManagerLeaderRetriever.start(resourceManagerGatewayRetriever);
            dispatcherLeaderRetriever.start(dispatcherGatewayRetriever);
            clusterRestEndpointLeaderRetrievalService.start(webMonitorLeaderRetriever);
        } catch (Exception e) {
            // cleanup everything
            try {
                close();
            } catch (Exception ee) {
                e.addSuppressed(ee);
            }
            throw e;
        }
        // create a new termination future
        terminationFuture = new CompletableFuture<>();
        // now officially mark this as running
        running = true;
        LOG.info("Flink Mini Cluster started successfully");
    }
}
Also used : RpcMetricQueryServiceRetriever(org.apache.flink.runtime.webmonitor.retriever.impl.RpcMetricQueryServiceRetriever) MetricQueryServiceRetriever(org.apache.flink.runtime.webmonitor.retriever.MetricQueryServiceRetriever) ExponentialBackoffRetryStrategy(org.apache.flink.util.concurrent.ExponentialBackoffRetryStrategy) MetricRegistryConfiguration(org.apache.flink.runtime.metrics.MetricRegistryConfiguration) Configuration(org.apache.flink.configuration.Configuration) InetSocketAddress(java.net.InetSocketAddress) RpcMetricQueryServiceRetriever(org.apache.flink.runtime.webmonitor.retriever.impl.RpcMetricQueryServiceRetriever) DispatcherGateway(org.apache.flink.runtime.dispatcher.DispatcherGateway) FlinkException(org.apache.flink.util.FlinkException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) CompletionException(java.util.concurrent.CompletionException) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) ExecutorThreadFactory(org.apache.flink.util.concurrent.ExecutorThreadFactory) RpcService(org.apache.flink.runtime.rpc.RpcService) LeaderRetriever(org.apache.flink.runtime.webmonitor.retriever.LeaderRetriever)

Aggregations

IOException (java.io.IOException)2 InetSocketAddress (java.net.InetSocketAddress)2 CompletionException (java.util.concurrent.CompletionException)2 ExecutionException (java.util.concurrent.ExecutionException)2 Configuration (org.apache.flink.configuration.Configuration)2 IllegalConfigurationException (org.apache.flink.configuration.IllegalConfigurationException)2 JobExecutionException (org.apache.flink.runtime.client.JobExecutionException)2 DispatcherGateway (org.apache.flink.runtime.dispatcher.DispatcherGateway)2 MetricRegistryConfiguration (org.apache.flink.runtime.metrics.MetricRegistryConfiguration)2 ResourceManagerGateway (org.apache.flink.runtime.resourcemanager.ResourceManagerGateway)2 RpcService (org.apache.flink.runtime.rpc.RpcService)2 LeaderRetriever (org.apache.flink.runtime.webmonitor.retriever.LeaderRetriever)2 MetricQueryServiceRetriever (org.apache.flink.runtime.webmonitor.retriever.MetricQueryServiceRetriever)2 RpcMetricQueryServiceRetriever (org.apache.flink.runtime.webmonitor.retriever.impl.RpcMetricQueryServiceRetriever)2 FlinkException (org.apache.flink.util.FlinkException)2 ExecutorThreadFactory (org.apache.flink.util.concurrent.ExecutorThreadFactory)2 ExponentialBackoffRetryStrategy (org.apache.flink.util.concurrent.ExponentialBackoffRetryStrategy)2 InetAddress (java.net.InetAddress)1 URI (java.net.URI)1 Duration (java.time.Duration)1