Search in sources :

Example 1 with DispatcherGateway

use of org.apache.flink.runtime.dispatcher.DispatcherGateway in project flink by apache.

the class WebSubmissionExtensionTest method applicationsRunInSeparateThreads.

@Test
void applicationsRunInSeparateThreads(@TempDir Path tempDir) throws Exception {
    final Path uploadDir = Files.createDirectories(tempDir.resolve("uploadDir"));
    // create a copy because the upload handler moves uploaded jars (because it assumes it to be
    // a temporary file)
    final Path jarFile = Files.copy(Paths.get(System.getProperty("targetDir")).resolve(JAR_NAME), tempDir.resolve("app.jar"));
    final DispatcherGateway dispatcherGateway = TestingDispatcherGateway.newBuilder().build();
    final ThreadCapturingApplicationRunner threadCapturingApplicationRunner = new ThreadCapturingApplicationRunner();
    final WebSubmissionExtension webSubmissionExtension = new WebSubmissionExtension(new Configuration(), () -> CompletableFuture.completedFuture(dispatcherGateway), Collections.emptyMap(), new CompletableFuture<>(), uploadDir, Executors.directExecutor(), Time.of(5, TimeUnit.SECONDS), () -> threadCapturingApplicationRunner);
    final String jarId = uploadJar(webSubmissionExtension, jarFile, dispatcherGateway);
    final JarRunHandler jarRunHandler = webSubmissionExtension.getJarRunHandler();
    final JarRunMessageParameters parameters = new JarRunMessageParameters();
    parameters.jarIdPathParameter.resolve(jarId);
    final HandlerRequest<JarRunRequestBody> runRequest = HandlerRequest.create(new JarRunRequestBody(), parameters);
    // run several applications in sequence, and verify that each thread is unique
    int numApplications = 20;
    for (int i = 0; i < numApplications; i++) {
        jarRunHandler.handleRequest(runRequest, dispatcherGateway).get();
    }
    assertThat(threadCapturingApplicationRunner.getThreads().size()).isEqualTo(numApplications);
}
Also used : Path(java.nio.file.Path) JarRunHandler(org.apache.flink.runtime.webmonitor.handlers.JarRunHandler) Configuration(org.apache.flink.configuration.Configuration) JarRunRequestBody(org.apache.flink.runtime.webmonitor.handlers.JarRunRequestBody) JarRunMessageParameters(org.apache.flink.runtime.webmonitor.handlers.JarRunMessageParameters) DispatcherGateway(org.apache.flink.runtime.dispatcher.DispatcherGateway) Test(org.junit.jupiter.api.Test)

Example 2 with DispatcherGateway

use of org.apache.flink.runtime.dispatcher.DispatcherGateway in project flink by apache.

the class DefaultDispatcherResourceManagerComponentFactory method create.

@Override
public DispatcherResourceManagerComponent create(Configuration configuration, ResourceID resourceId, Executor ioExecutor, RpcService rpcService, HighAvailabilityServices highAvailabilityServices, BlobServer blobServer, HeartbeatServices heartbeatServices, MetricRegistry metricRegistry, ExecutionGraphInfoStore executionGraphInfoStore, MetricQueryServiceRetriever metricQueryServiceRetriever, FatalErrorHandler fatalErrorHandler) throws Exception {
    LeaderRetrievalService dispatcherLeaderRetrievalService = null;
    LeaderRetrievalService resourceManagerRetrievalService = null;
    WebMonitorEndpoint<?> webMonitorEndpoint = null;
    ResourceManagerService resourceManagerService = null;
    DispatcherRunner dispatcherRunner = null;
    try {
        dispatcherLeaderRetrievalService = highAvailabilityServices.getDispatcherLeaderRetriever();
        resourceManagerRetrievalService = highAvailabilityServices.getResourceManagerLeaderRetriever();
        final LeaderGatewayRetriever<DispatcherGateway> dispatcherGatewayRetriever = new RpcGatewayRetriever<>(rpcService, DispatcherGateway.class, DispatcherId::fromUuid, new ExponentialBackoffRetryStrategy(12, Duration.ofMillis(10), Duration.ofMillis(50)));
        final LeaderGatewayRetriever<ResourceManagerGateway> resourceManagerGatewayRetriever = new RpcGatewayRetriever<>(rpcService, ResourceManagerGateway.class, ResourceManagerId::fromUuid, new ExponentialBackoffRetryStrategy(12, Duration.ofMillis(10), Duration.ofMillis(50)));
        final ScheduledExecutorService executor = WebMonitorEndpoint.createExecutorService(configuration.getInteger(RestOptions.SERVER_NUM_THREADS), configuration.getInteger(RestOptions.SERVER_THREAD_PRIORITY), "DispatcherRestEndpoint");
        final long updateInterval = configuration.getLong(MetricOptions.METRIC_FETCHER_UPDATE_INTERVAL);
        final MetricFetcher metricFetcher = updateInterval == 0 ? VoidMetricFetcher.INSTANCE : MetricFetcherImpl.fromConfiguration(configuration, metricQueryServiceRetriever, dispatcherGatewayRetriever, executor);
        webMonitorEndpoint = restEndpointFactory.createRestEndpoint(configuration, dispatcherGatewayRetriever, resourceManagerGatewayRetriever, blobServer, executor, metricFetcher, highAvailabilityServices.getClusterRestEndpointLeaderElectionService(), fatalErrorHandler);
        log.debug("Starting Dispatcher REST endpoint.");
        webMonitorEndpoint.start();
        final String hostname = RpcUtils.getHostname(rpcService);
        resourceManagerService = ResourceManagerServiceImpl.create(resourceManagerFactory, configuration, resourceId, rpcService, highAvailabilityServices, heartbeatServices, fatalErrorHandler, new ClusterInformation(hostname, blobServer.getPort()), webMonitorEndpoint.getRestBaseUrl(), metricRegistry, hostname, ioExecutor);
        final HistoryServerArchivist historyServerArchivist = HistoryServerArchivist.createHistoryServerArchivist(configuration, webMonitorEndpoint, ioExecutor);
        final DispatcherOperationCaches dispatcherOperationCaches = new DispatcherOperationCaches(configuration.get(RestOptions.ASYNC_OPERATION_STORE_DURATION));
        final PartialDispatcherServices partialDispatcherServices = new PartialDispatcherServices(configuration, highAvailabilityServices, resourceManagerGatewayRetriever, blobServer, heartbeatServices, () -> JobManagerMetricGroup.createJobManagerMetricGroup(metricRegistry, hostname), executionGraphInfoStore, fatalErrorHandler, historyServerArchivist, metricRegistry.getMetricQueryServiceGatewayRpcAddress(), ioExecutor, dispatcherOperationCaches);
        log.debug("Starting Dispatcher.");
        dispatcherRunner = dispatcherRunnerFactory.createDispatcherRunner(highAvailabilityServices.getDispatcherLeaderElectionService(), fatalErrorHandler, new HaServicesJobPersistenceComponentFactory(highAvailabilityServices), ioExecutor, rpcService, partialDispatcherServices);
        log.debug("Starting ResourceManagerService.");
        resourceManagerService.start();
        resourceManagerRetrievalService.start(resourceManagerGatewayRetriever);
        dispatcherLeaderRetrievalService.start(dispatcherGatewayRetriever);
        return new DispatcherResourceManagerComponent(dispatcherRunner, resourceManagerService, dispatcherLeaderRetrievalService, resourceManagerRetrievalService, webMonitorEndpoint, fatalErrorHandler, dispatcherOperationCaches);
    } catch (Exception exception) {
        // clean up all started components
        if (dispatcherLeaderRetrievalService != null) {
            try {
                dispatcherLeaderRetrievalService.stop();
            } catch (Exception e) {
                exception = ExceptionUtils.firstOrSuppressed(e, exception);
            }
        }
        if (resourceManagerRetrievalService != null) {
            try {
                resourceManagerRetrievalService.stop();
            } catch (Exception e) {
                exception = ExceptionUtils.firstOrSuppressed(e, exception);
            }
        }
        final Collection<CompletableFuture<Void>> terminationFutures = new ArrayList<>(3);
        if (webMonitorEndpoint != null) {
            terminationFutures.add(webMonitorEndpoint.closeAsync());
        }
        if (resourceManagerService != null) {
            terminationFutures.add(resourceManagerService.closeAsync());
        }
        if (dispatcherRunner != null) {
            terminationFutures.add(dispatcherRunner.closeAsync());
        }
        final FutureUtils.ConjunctFuture<Void> terminationFuture = FutureUtils.completeAll(terminationFutures);
        try {
            terminationFuture.get();
        } catch (Exception e) {
            exception = ExceptionUtils.firstOrSuppressed(e, exception);
        }
        throw new FlinkException("Could not create the DispatcherResourceManagerComponent.", exception);
    }
}
Also used : ExponentialBackoffRetryStrategy(org.apache.flink.util.concurrent.ExponentialBackoffRetryStrategy) DispatcherGateway(org.apache.flink.runtime.dispatcher.DispatcherGateway) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) RpcGatewayRetriever(org.apache.flink.runtime.webmonitor.retriever.impl.RpcGatewayRetriever) DispatcherRunner(org.apache.flink.runtime.dispatcher.runner.DispatcherRunner) DispatcherOperationCaches(org.apache.flink.runtime.dispatcher.DispatcherOperationCaches) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) HistoryServerArchivist(org.apache.flink.runtime.dispatcher.HistoryServerArchivist) HaServicesJobPersistenceComponentFactory(org.apache.flink.runtime.jobmanager.HaServicesJobPersistenceComponentFactory) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) PartialDispatcherServices(org.apache.flink.runtime.dispatcher.PartialDispatcherServices) ResourceManagerService(org.apache.flink.runtime.resourcemanager.ResourceManagerService) DispatcherId(org.apache.flink.runtime.dispatcher.DispatcherId) ClusterInformation(org.apache.flink.runtime.entrypoint.ClusterInformation) VoidMetricFetcher(org.apache.flink.runtime.rest.handler.legacy.metrics.VoidMetricFetcher) MetricFetcher(org.apache.flink.runtime.rest.handler.legacy.metrics.MetricFetcher) FlinkException(org.apache.flink.util.FlinkException) FlinkException(org.apache.flink.util.FlinkException) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) Collection(java.util.Collection)

Example 3 with DispatcherGateway

use of org.apache.flink.runtime.dispatcher.DispatcherGateway in project flink by apache.

the class MiniCluster method submitJob.

public CompletableFuture<JobSubmissionResult> submitJob(JobGraph jobGraph) {
    // When MiniCluster uses the local RPC, the provided JobGraph is passed directly to the
    // Dispatcher. This means that any mutations to the JG can affect the Dispatcher behaviour,
    // so we rather clone it to guard against this.
    final JobGraph clonedJobGraph = cloneJobGraph(jobGraph);
    checkRestoreModeForRandomizedChangelogStateBackend(clonedJobGraph);
    final CompletableFuture<DispatcherGateway> dispatcherGatewayFuture = getDispatcherGatewayFuture();
    final CompletableFuture<InetSocketAddress> blobServerAddressFuture = createBlobServerAddress(dispatcherGatewayFuture);
    final CompletableFuture<Void> jarUploadFuture = uploadAndSetJobFiles(blobServerAddressFuture, clonedJobGraph);
    final CompletableFuture<Acknowledge> acknowledgeCompletableFuture = jarUploadFuture.thenCombine(dispatcherGatewayFuture, (Void ack, DispatcherGateway dispatcherGateway) -> dispatcherGateway.submitJob(clonedJobGraph, rpcTimeout)).thenCompose(Function.identity());
    return acknowledgeCompletableFuture.thenApply((Acknowledge ignored) -> new JobSubmissionResult(clonedJobGraph.getJobID()));
}
Also used : JobSubmissionResult(org.apache.flink.api.common.JobSubmissionResult) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) InetSocketAddress(java.net.InetSocketAddress) DispatcherGateway(org.apache.flink.runtime.dispatcher.DispatcherGateway)

Example 4 with DispatcherGateway

use of org.apache.flink.runtime.dispatcher.DispatcherGateway in project flink by apache.

the class DefaultDispatcherRunnerITCase method leaderChange_afterJobSubmission_recoversSubmittedJob.

@Test
public void leaderChange_afterJobSubmission_recoversSubmittedJob() throws Exception {
    try (final DispatcherRunner dispatcherRunner = createDispatcherRunner()) {
        final UUID firstLeaderSessionId = UUID.randomUUID();
        final DispatcherGateway firstDispatcherGateway = electLeaderAndRetrieveGateway(firstLeaderSessionId);
        firstDispatcherGateway.submitJob(jobGraph, TIMEOUT).get();
        dispatcherLeaderElectionService.notLeader();
        final UUID secondLeaderSessionId = UUID.randomUUID();
        final DispatcherGateway secondDispatcherGateway = electLeaderAndRetrieveGateway(secondLeaderSessionId);
        final Collection<JobID> jobIds = secondDispatcherGateway.listJobs(TIMEOUT).get();
        assertThat(jobIds, contains(jobGraph.getJobID()));
    }
}
Also used : UUID(java.util.UUID) DispatcherGateway(org.apache.flink.runtime.dispatcher.DispatcherGateway) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 5 with DispatcherGateway

use of org.apache.flink.runtime.dispatcher.DispatcherGateway in project flink by apache.

the class DefaultDispatcherRunnerITCase method electLeaderAndRetrieveGateway.

private DispatcherGateway electLeaderAndRetrieveGateway(UUID firstLeaderSessionId) throws InterruptedException, java.util.concurrent.ExecutionException {
    dispatcherLeaderElectionService.isLeader(firstLeaderSessionId);
    final LeaderConnectionInfo leaderConnectionInfo = dispatcherLeaderElectionService.getConfirmationFuture().get();
    return rpcServiceResource.getTestingRpcService().connect(leaderConnectionInfo.getAddress(), DispatcherId.fromUuid(leaderConnectionInfo.getLeaderSessionId()), DispatcherGateway.class).get();
}
Also used : LeaderConnectionInfo(org.apache.flink.runtime.util.LeaderConnectionInfo) DispatcherGateway(org.apache.flink.runtime.dispatcher.DispatcherGateway)

Aggregations

DispatcherGateway (org.apache.flink.runtime.dispatcher.DispatcherGateway)23 Configuration (org.apache.flink.configuration.Configuration)15 CompletableFuture (java.util.concurrent.CompletableFuture)14 Acknowledge (org.apache.flink.runtime.messages.Acknowledge)11 ExceptionUtils (org.apache.flink.util.ExceptionUtils)11 Collections (java.util.Collections)10 TestingDispatcherGateway (org.apache.flink.runtime.webmonitor.TestingDispatcherGateway)10 FutureUtils (org.apache.flink.util.concurrent.FutureUtils)10 Test (org.junit.Test)10 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)9 JobID (org.apache.flink.api.common.JobID)7 IOException (java.io.IOException)6 Path (java.nio.file.Path)6 Duration (java.time.Duration)6 Optional (java.util.Optional)6 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)6 Before (org.junit.Before)6 ObjectOutputStream (java.io.ObjectOutputStream)5 Files (java.nio.file.Files)5 ArrayList (java.util.ArrayList)5