Search in sources :

Example 1 with DispatcherResourceManagerComponent

use of org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponent in project flink by apache.

the class TestingMiniCluster method createDispatcherResourceManagerComponents.

@Override
protected Collection<? extends DispatcherResourceManagerComponent> createDispatcherResourceManagerComponents(Configuration configuration, RpcServiceFactory rpcServiceFactory, HighAvailabilityServices haServices, BlobServer blobServer, HeartbeatServices heartbeatServices, MetricRegistry metricRegistry, MetricQueryServiceRetriever metricQueryServiceRetriever, FatalErrorHandler fatalErrorHandler) throws Exception {
    DispatcherResourceManagerComponentFactory dispatcherResourceManagerComponentFactory = createDispatcherResourceManagerComponentFactory();
    final List<DispatcherResourceManagerComponent> result = new ArrayList<>(numberDispatcherResourceManagerComponents);
    for (int i = 0; i < numberDispatcherResourceManagerComponents; i++) {
        result.add(dispatcherResourceManagerComponentFactory.create(configuration, ResourceID.generate(), getIOExecutor(), rpcServiceFactory.createRpcService(), haServices, blobServer, heartbeatServices, metricRegistry, new MemoryExecutionGraphInfoStore(), metricQueryServiceRetriever, fatalErrorHandler));
    }
    return result;
}
Also used : MemoryExecutionGraphInfoStore(org.apache.flink.runtime.dispatcher.MemoryExecutionGraphInfoStore) DispatcherResourceManagerComponent(org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponent) ArrayList(java.util.ArrayList) DispatcherResourceManagerComponentFactory(org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponentFactory)

Example 2 with DispatcherResourceManagerComponent

use of org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponent in project flink by apache.

the class MiniCluster method shutDownResourceManagerComponents.

// ------------------------------------------------------------------------
// Internal methods
// ------------------------------------------------------------------------
@GuardedBy("lock")
private CompletableFuture<Void> shutDownResourceManagerComponents() {
    final Collection<CompletableFuture<Void>> terminationFutures = new ArrayList<>(dispatcherResourceManagerComponents.size());
    for (DispatcherResourceManagerComponent dispatcherResourceManagerComponent : dispatcherResourceManagerComponents) {
        terminationFutures.add(dispatcherResourceManagerComponent.closeAsync());
    }
    final FutureUtils.ConjunctFuture<Void> dispatcherTerminationFuture = FutureUtils.completeAll(terminationFutures);
    return FutureUtils.runAfterwards(dispatcherTerminationFuture, () -> {
        Exception exception = null;
        synchronized (lock) {
            if (resourceManagerLeaderRetriever != null) {
                try {
                    resourceManagerLeaderRetriever.stop();
                } catch (Exception e) {
                    exception = ExceptionUtils.firstOrSuppressed(e, exception);
                }
                resourceManagerLeaderRetriever = null;
            }
            if (dispatcherLeaderRetriever != null) {
                try {
                    dispatcherLeaderRetriever.stop();
                } catch (Exception e) {
                    exception = ExceptionUtils.firstOrSuppressed(e, exception);
                }
                dispatcherLeaderRetriever = null;
            }
            if (clusterRestEndpointLeaderRetrievalService != null) {
                try {
                    clusterRestEndpointLeaderRetrievalService.stop();
                } catch (Exception e) {
                    exception = ExceptionUtils.firstOrSuppressed(e, exception);
                }
                clusterRestEndpointLeaderRetrievalService = null;
            }
        }
        if (exception != null) {
            throw exception;
        }
    });
}
Also used : CompletableFuture(java.util.concurrent.CompletableFuture) DispatcherResourceManagerComponent(org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponent) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) ArrayList(java.util.ArrayList) FlinkException(org.apache.flink.util.FlinkException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) CompletionException(java.util.concurrent.CompletionException) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) GuardedBy(javax.annotation.concurrent.GuardedBy)

Example 3 with DispatcherResourceManagerComponent

use of org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponent in project flink by apache.

the class MiniCluster method setupDispatcherResourceManagerComponents.

@GuardedBy("lock")
private void setupDispatcherResourceManagerComponents(Configuration configuration, RpcServiceFactory dispatcherResourceManagerComponentRpcServiceFactory, MetricQueryServiceRetriever metricQueryServiceRetriever) throws Exception {
    dispatcherResourceManagerComponents.addAll(createDispatcherResourceManagerComponents(configuration, dispatcherResourceManagerComponentRpcServiceFactory, haServices, blobServer, heartbeatServices, metricRegistry, metricQueryServiceRetriever, new ShutDownFatalErrorHandler()));
    final Collection<CompletableFuture<ApplicationStatus>> shutDownFutures = new ArrayList<>(dispatcherResourceManagerComponents.size());
    for (DispatcherResourceManagerComponent dispatcherResourceManagerComponent : dispatcherResourceManagerComponents) {
        final CompletableFuture<ApplicationStatus> shutDownFuture = dispatcherResourceManagerComponent.getShutDownFuture();
        FutureUtils.assertNoException(shutDownFuture.thenCompose(applicationStatus -> dispatcherResourceManagerComponent.stopApplication(applicationStatus, null)));
        shutDownFutures.add(shutDownFuture);
    }
    FutureUtils.completeAll(shutDownFutures).whenComplete((ignored, exception) -> closeAsync());
}
Also used : InetAddress(java.net.InetAddress) ClusterOverview(org.apache.flink.runtime.messages.webmonitor.ClusterOverview) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) FunctionUtils(org.apache.flink.util.function.FunctionUtils) Duration(java.time.Duration) JobStatusMessage(org.apache.flink.runtime.client.JobStatusMessage) HighAvailabilityServicesUtils(org.apache.flink.runtime.highavailability.HighAvailabilityServicesUtils) GuardedBy(javax.annotation.concurrent.GuardedBy) ClientUtils(org.apache.flink.runtime.client.ClientUtils) Executors(java.util.concurrent.Executors) AccessExecutionGraph(org.apache.flink.runtime.executiongraph.AccessExecutionGraph) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) ClusterEntrypointUtils(org.apache.flink.runtime.entrypoint.ClusterEntrypointUtils) Time(org.apache.flink.api.common.time.Time) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) FlinkException(org.apache.flink.util.FlinkException) BlobServer(org.apache.flink.runtime.blob.BlobServer) CoordinationResponse(org.apache.flink.runtime.operators.coordination.CoordinationResponse) MetricRegistryImpl(org.apache.flink.runtime.metrics.MetricRegistryImpl) SavepointConfigOptions(org.apache.flink.runtime.jobgraph.SavepointConfigOptions) JobStatus(org.apache.flink.api.common.JobStatus) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) RpcService(org.apache.flink.runtime.rpc.RpcService) ResourceOverview(org.apache.flink.runtime.resourcemanager.ResourceOverview) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) FileOutputFormat(org.apache.flink.api.common.io.FileOutputFormat) Executor(java.util.concurrent.Executor) ApplicationStatus(org.apache.flink.runtime.clusterframework.ApplicationStatus) TaskExecutor(org.apache.flink.runtime.taskexecutor.TaskExecutor) IOException(java.io.IOException) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) ExecutionException(java.util.concurrent.ExecutionException) JobID(org.apache.flink.api.common.JobID) ConfigurationUtils(org.apache.flink.configuration.ConfigurationUtils) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) ProcessMetricGroup(org.apache.flink.runtime.metrics.groups.ProcessMetricGroup) ClusterOptions(org.apache.flink.configuration.ClusterOptions) EmbeddedHaServicesWithLeadershipControl(org.apache.flink.runtime.highavailability.nonha.embedded.EmbeddedHaServicesWithLeadershipControl) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) LoggerFactory(org.slf4j.LoggerFactory) ExceptionUtils(org.apache.flink.util.ExceptionUtils) ReporterSetup(org.apache.flink.runtime.metrics.ReporterSetup) InstantiationUtil(org.apache.flink.util.InstantiationUtil) URI(java.net.URI) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) TaskManagerRunner(org.apache.flink.runtime.taskexecutor.TaskManagerRunner) TriggerSavepointMode(org.apache.flink.runtime.dispatcher.TriggerSavepointMode) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) LeaderRetriever(org.apache.flink.runtime.webmonitor.retriever.LeaderRetriever) ExecutorThreadFactory(org.apache.flink.util.concurrent.ExecutorThreadFactory) Collection(java.util.Collection) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) CompletionException(java.util.concurrent.CompletionException) MetricUtils(org.apache.flink.runtime.metrics.util.MetricUtils) UUID(java.util.UUID) InetSocketAddress(java.net.InetSocketAddress) Collectors(java.util.stream.Collectors) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ExecutorUtils(org.apache.flink.util.ExecutorUtils) JobExecutionResult(org.apache.flink.api.common.JobExecutionResult) List(java.util.List) SerializedValue(org.apache.flink.util.SerializedValue) CoordinationRequest(org.apache.flink.runtime.operators.coordination.CoordinationRequest) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) Optional(java.util.Optional) DispatcherId(org.apache.flink.runtime.dispatcher.DispatcherId) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) HaLeadershipControl(org.apache.flink.runtime.highavailability.nonha.embedded.HaLeadershipControl) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) RpcMetricQueryServiceRetriever(org.apache.flink.runtime.webmonitor.retriever.impl.RpcMetricQueryServiceRetriever) CompletableFuture(java.util.concurrent.CompletableFuture) DispatcherGateway(org.apache.flink.runtime.dispatcher.DispatcherGateway) Function(java.util.function.Function) MetricRegistryConfiguration(org.apache.flink.runtime.metrics.MetricRegistryConfiguration) RestoreMode(org.apache.flink.runtime.jobgraph.RestoreMode) JobResult(org.apache.flink.runtime.jobmaster.JobResult) DefaultDispatcherResourceManagerComponentFactory(org.apache.flink.runtime.entrypoint.component.DefaultDispatcherResourceManagerComponentFactory) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) ExternalResourceInfoProvider(org.apache.flink.runtime.externalresource.ExternalResourceInfoProvider) ClusterInformation(org.apache.flink.runtime.entrypoint.ClusterInformation) FatalErrorHandler(org.apache.flink.runtime.rpc.FatalErrorHandler) RpcGatewayRetriever(org.apache.flink.runtime.webmonitor.retriever.impl.RpcGatewayRetriever) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Nonnull(javax.annotation.Nonnull) JobSubmissionResult(org.apache.flink.api.common.JobSubmissionResult) ExecutorService(java.util.concurrent.ExecutorService) MetricQueryServiceRetriever(org.apache.flink.runtime.webmonitor.retriever.MetricQueryServiceRetriever) DispatcherResourceManagerComponentFactory(org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponentFactory) ArchivedExecutionGraph(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph) Preconditions.checkState(org.apache.flink.util.Preconditions.checkState) Logger(org.slf4j.Logger) AutoCloseableAsync(org.apache.flink.util.AutoCloseableAsync) ExponentialBackoffRetryStrategy(org.apache.flink.util.concurrent.ExponentialBackoffRetryStrategy) Configuration(org.apache.flink.configuration.Configuration) Reference(org.apache.flink.util.Reference) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) TimeUnit(java.util.concurrent.TimeUnit) RpcSystem(org.apache.flink.runtime.rpc.RpcSystem) WorkingDirectory(org.apache.flink.runtime.entrypoint.WorkingDirectory) DispatcherResourceManagerComponent(org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponent) MemoryExecutionGraphInfoStore(org.apache.flink.runtime.dispatcher.MemoryExecutionGraphInfoStore) Internal(org.apache.flink.annotation.Internal) BlobCacheService(org.apache.flink.runtime.blob.BlobCacheService) BlobClient(org.apache.flink.runtime.blob.BlobClient) BlobUtils(org.apache.flink.runtime.blob.BlobUtils) StandaloneResourceManagerFactory(org.apache.flink.runtime.resourcemanager.StandaloneResourceManagerFactory) Collections(java.util.Collections) HighAvailabilityOptions(org.apache.flink.configuration.HighAvailabilityOptions) CompletableFuture(java.util.concurrent.CompletableFuture) DispatcherResourceManagerComponent(org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponent) ApplicationStatus(org.apache.flink.runtime.clusterframework.ApplicationStatus) ArrayList(java.util.ArrayList) GuardedBy(javax.annotation.concurrent.GuardedBy)

Example 4 with DispatcherResourceManagerComponent

use of org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponent in project flink by apache.

the class ProcessFailureCancelingITCase method testCancelingOnProcessFailure.

@Test
public void testCancelingOnProcessFailure() throws Throwable {
    Assume.assumeTrue("---- Skipping Process Failure test : Could not find java executable ----", getJavaCommandPath() != null);
    TestProcess taskManagerProcess = null;
    final TestingFatalErrorHandler fatalErrorHandler = new TestingFatalErrorHandler();
    Configuration config = new Configuration();
    config.setString(JobManagerOptions.ADDRESS, "localhost");
    config.set(AkkaOptions.ASK_TIMEOUT_DURATION, Duration.ofSeconds(100));
    config.setString(HighAvailabilityOptions.HA_MODE, "zookeeper");
    config.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, zooKeeperResource.getConnectString());
    config.setString(HighAvailabilityOptions.HA_STORAGE_PATH, temporaryFolder.newFolder().getAbsolutePath());
    config.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, 2);
    config.set(TaskManagerOptions.MANAGED_MEMORY_SIZE, MemorySize.parse("4m"));
    config.set(TaskManagerOptions.NETWORK_MEMORY_MIN, MemorySize.parse("3200k"));
    config.set(TaskManagerOptions.NETWORK_MEMORY_MAX, MemorySize.parse("3200k"));
    config.set(TaskManagerOptions.TASK_HEAP_MEMORY, MemorySize.parse("128m"));
    config.set(TaskManagerOptions.CPU_CORES, 1.0);
    config.setInteger(RestOptions.PORT, 0);
    final RpcService rpcService = RpcSystem.load().remoteServiceBuilder(config, "localhost", "0").createAndStart();
    final int jobManagerPort = rpcService.getPort();
    config.setInteger(JobManagerOptions.PORT, jobManagerPort);
    final DispatcherResourceManagerComponentFactory resourceManagerComponentFactory = DefaultDispatcherResourceManagerComponentFactory.createSessionComponentFactory(StandaloneResourceManagerFactory.getInstance());
    DispatcherResourceManagerComponent dispatcherResourceManagerComponent = null;
    final ScheduledExecutorService ioExecutor = TestingUtils.defaultExecutor();
    final HighAvailabilityServices haServices = HighAvailabilityServicesUtils.createHighAvailabilityServices(config, ioExecutor, AddressResolution.NO_ADDRESS_RESOLUTION, RpcSystem.load(), NoOpFatalErrorHandler.INSTANCE);
    final AtomicReference<Throwable> programException = new AtomicReference<>();
    try {
        dispatcherResourceManagerComponent = resourceManagerComponentFactory.create(config, ResourceID.generate(), ioExecutor, rpcService, haServices, blobServerResource.getBlobServer(), new HeartbeatServices(100L, 10000L, 2), NoOpMetricRegistry.INSTANCE, new MemoryExecutionGraphInfoStore(), VoidMetricQueryServiceRetriever.INSTANCE, fatalErrorHandler);
        TestProcessBuilder taskManagerProcessBuilder = new TestProcessBuilder(TaskExecutorProcessEntryPoint.class.getName());
        taskManagerProcessBuilder.addConfigAsMainClassArgs(config);
        taskManagerProcess = taskManagerProcessBuilder.start();
        // start the test program, which infinitely blocks
        Runnable programRunner = new Runnable() {

            @Override
            public void run() {
                try {
                    ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment("localhost", 1337, config);
                    env.setParallelism(2);
                    env.setRestartStrategy(RestartStrategies.noRestart());
                    env.generateSequence(0, Long.MAX_VALUE).map(new MapFunction<Long, Long>() {

                        @Override
                        public Long map(Long value) throws Exception {
                            synchronized (this) {
                                System.out.println(TASK_DEPLOYED_MARKER);
                                wait();
                            }
                            return 0L;
                        }
                    }).output(new DiscardingOutputFormat<>());
                    env.execute();
                } catch (Throwable t) {
                    programException.set(t);
                }
            }
        };
        Thread programThread = new Thread(programRunner);
        programThread.start();
        waitUntilAtLeastOneTaskHasBeenDeployed(taskManagerProcess);
        // kill the TaskManager after the job started to run
        taskManagerProcess.destroy();
        taskManagerProcess = null;
        // the job should fail within a few seconds due to heartbeat timeouts
        // since the CI environment is often slow, we conservatively give it up to 2 minutes
        programThread.join(TIMEOUT.toMillis());
        assertFalse("The program did not cancel in time", programThread.isAlive());
        Throwable error = programException.get();
        assertNotNull("The program did not fail properly", error);
        assertTrue(error instanceof ProgramInvocationException);
    // all seems well :-)
    } catch (Exception | Error e) {
        if (taskManagerProcess != null) {
            printOutput("TaskManager OUT", taskManagerProcess.getProcessOutput().toString());
            printOutput("TaskManager ERR", taskManagerProcess.getErrorOutput().toString());
        }
        throw ExceptionUtils.firstOrSuppressed(e, programException.get());
    } finally {
        if (taskManagerProcess != null) {
            taskManagerProcess.destroy();
        }
        if (dispatcherResourceManagerComponent != null) {
            dispatcherResourceManagerComponent.stopApplication(ApplicationStatus.SUCCEEDED, null);
        }
        fatalErrorHandler.rethrowError();
        RpcUtils.terminateRpcService(rpcService, Time.seconds(100L));
        haServices.closeAndCleanupAllData();
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) MapFunction(org.apache.flink.api.common.functions.MapFunction) TestProcess(org.apache.flink.test.util.TestProcessBuilder.TestProcess) MemoryExecutionGraphInfoStore(org.apache.flink.runtime.dispatcher.MemoryExecutionGraphInfoStore) DispatcherResourceManagerComponent(org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponent) TaskExecutorProcessEntryPoint(org.apache.flink.test.recovery.utils.TaskExecutorProcessEntryPoint) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) AtomicReference(java.util.concurrent.atomic.AtomicReference) TaskExecutorProcessEntryPoint(org.apache.flink.test.recovery.utils.TaskExecutorProcessEntryPoint) TestProcessBuilder(org.apache.flink.test.util.TestProcessBuilder) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) TimeoutException(java.util.concurrent.TimeoutException) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) RpcService(org.apache.flink.runtime.rpc.RpcService) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) DefaultDispatcherResourceManagerComponentFactory(org.apache.flink.runtime.entrypoint.component.DefaultDispatcherResourceManagerComponentFactory) DispatcherResourceManagerComponentFactory(org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponentFactory) Test(org.junit.Test)

Aggregations

DispatcherResourceManagerComponent (org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponent)4 ArrayList (java.util.ArrayList)3 MemoryExecutionGraphInfoStore (org.apache.flink.runtime.dispatcher.MemoryExecutionGraphInfoStore)3 DispatcherResourceManagerComponentFactory (org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponentFactory)3 IOException (java.io.IOException)2 CompletableFuture (java.util.concurrent.CompletableFuture)2 CompletionException (java.util.concurrent.CompletionException)2 ExecutionException (java.util.concurrent.ExecutionException)2 GuardedBy (javax.annotation.concurrent.GuardedBy)2 Configuration (org.apache.flink.configuration.Configuration)2 IllegalConfigurationException (org.apache.flink.configuration.IllegalConfigurationException)2 JobExecutionException (org.apache.flink.runtime.client.JobExecutionException)2 DefaultDispatcherResourceManagerComponentFactory (org.apache.flink.runtime.entrypoint.component.DefaultDispatcherResourceManagerComponentFactory)2 HeartbeatServices (org.apache.flink.runtime.heartbeat.HeartbeatServices)2 HighAvailabilityServices (org.apache.flink.runtime.highavailability.HighAvailabilityServices)2 RpcService (org.apache.flink.runtime.rpc.RpcService)2 FlinkException (org.apache.flink.util.FlinkException)2 FutureUtils (org.apache.flink.util.concurrent.FutureUtils)2 InetAddress (java.net.InetAddress)1 InetSocketAddress (java.net.InetSocketAddress)1