Search in sources :

Example 21 with RpcService

use of org.apache.flink.runtime.rpc.RpcService in project flink by apache.

the class MetricRegistryImplTest method testQueryActorShutdown.

/**
 * Tests that the query actor will be stopped when the MetricRegistry is shut down.
 */
@Test
public void testQueryActorShutdown() throws Exception {
    final Duration timeout = Duration.ofSeconds(10L);
    MetricRegistryImpl registry = new MetricRegistryImpl(MetricRegistryTestUtils.defaultMetricRegistryConfiguration());
    final RpcService rpcService = new TestingRpcService();
    registry.startQueryService(rpcService, null);
    MetricQueryService queryService = checkNotNull(registry.getQueryService());
    registry.shutdown().get();
    queryService.getTerminationFuture().get(timeout.toMillis(), TimeUnit.MILLISECONDS);
}
Also used : MetricQueryService(org.apache.flink.runtime.metrics.dump.MetricQueryService) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) RpcService(org.apache.flink.runtime.rpc.RpcService) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) Duration(java.time.Duration) MetricGroupTest(org.apache.flink.runtime.metrics.groups.MetricGroupTest) Test(org.junit.Test)

Example 22 with RpcService

use of org.apache.flink.runtime.rpc.RpcService in project flink by apache.

the class TaskExecutorBuilder method build.

public TaskExecutor build() throws Exception {
    final TaskExecutorBlobService resolvedTaskExecutorBlobService;
    TaskExecutorResourceUtils.adjustForLocalExecution(configuration);
    if (taskExecutorBlobService == null) {
        resolvedTaskExecutorBlobService = NoOpTaskExecutorBlobService.INSTANCE;
    } else {
        resolvedTaskExecutorBlobService = taskExecutorBlobService;
    }
    final TaskManagerConfiguration resolvedTaskManagerConfiguration;
    if (taskManagerConfiguration == null) {
        resolvedTaskManagerConfiguration = TaskManagerConfiguration.fromConfiguration(configuration, taskExecutorResourceSpec, rpcService.getAddress(), workingDirectory.getTmpDirectory());
    } else {
        resolvedTaskManagerConfiguration = taskManagerConfiguration;
    }
    final TaskManagerServices resolvedTaskManagerServices;
    if (taskManagerServices == null) {
        final TaskManagerServicesConfiguration taskManagerServicesConfiguration = TaskManagerServicesConfiguration.fromConfiguration(configuration, resourceId, rpcService.getAddress(), true, taskExecutorResourceSpec, workingDirectory);
        resolvedTaskManagerServices = TaskManagerServices.fromConfiguration(taskManagerServicesConfiguration, VoidPermanentBlobService.INSTANCE, UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(), Executors.newDirectExecutorService(), throwable -> {
        }, workingDirectory);
    } else {
        resolvedTaskManagerServices = taskManagerServices;
    }
    return new TaskExecutor(rpcService, resolvedTaskManagerConfiguration, haServices, resolvedTaskManagerServices, externalResourceInfoProvider, heartbeatServices, taskManagerMetricGroup, metricQueryServiceAddress, resolvedTaskExecutorBlobService, fatalErrorHandler, partitionTracker);
}
Also used : TaskExecutorBlobService(org.apache.flink.runtime.blob.TaskExecutorBlobService) VoidPermanentBlobService(org.apache.flink.runtime.blob.VoidPermanentBlobService) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) Configuration(org.apache.flink.configuration.Configuration) NoOpTaskExecutorBlobService(org.apache.flink.runtime.blob.NoOpTaskExecutorBlobService) TestingTaskExecutorPartitionTracker(org.apache.flink.runtime.io.network.partition.TestingTaskExecutorPartitionTracker) TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) MemorySize(org.apache.flink.configuration.MemorySize) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) Executors(org.apache.flink.util.concurrent.Executors) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) RpcService(org.apache.flink.runtime.rpc.RpcService) WorkingDirectory(org.apache.flink.runtime.entrypoint.WorkingDirectory) TaskExecutorPartitionTracker(org.apache.flink.runtime.io.network.partition.TaskExecutorPartitionTracker) ExternalResourceInfoProvider(org.apache.flink.runtime.externalresource.ExternalResourceInfoProvider) FatalErrorHandler(org.apache.flink.runtime.rpc.FatalErrorHandler) BlobCacheService(org.apache.flink.runtime.blob.BlobCacheService) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) NoOpFatalErrorHandler(org.apache.flink.runtime.rest.util.NoOpFatalErrorHandler) Collections(java.util.Collections) Nullable(javax.annotation.Nullable) CPUResource(org.apache.flink.api.common.resources.CPUResource) TaskExecutorBlobService(org.apache.flink.runtime.blob.TaskExecutorBlobService) NoOpTaskExecutorBlobService(org.apache.flink.runtime.blob.NoOpTaskExecutorBlobService)

Example 23 with RpcService

use of org.apache.flink.runtime.rpc.RpcService in project flink by apache.

the class MiniCluster method start.

/**
 * Starts the mini cluster, based on the configured properties.
 *
 * @throws Exception This method passes on any exception that occurs during the startup of the
 *     mini cluster.
 */
public void start() throws Exception {
    synchronized (lock) {
        checkState(!running, "MiniCluster is already running");
        LOG.info("Starting Flink Mini Cluster");
        LOG.debug("Using configuration {}", miniClusterConfiguration);
        final Configuration configuration = miniClusterConfiguration.getConfiguration();
        final boolean useSingleRpcService = miniClusterConfiguration.getRpcServiceSharing() == RpcServiceSharing.SHARED;
        try {
            workingDirectory = WorkingDirectory.create(ClusterEntrypointUtils.generateWorkingDirectoryFile(configuration, Optional.empty(), "minicluster_" + ResourceID.generate()));
            initializeIOFormatClasses(configuration);
            rpcSystem = rpcSystemSupplier.get();
            LOG.info("Starting Metrics Registry");
            metricRegistry = createMetricRegistry(configuration, rpcSystem.deref().getMaximumMessageSizeInBytes(configuration));
            // bring up all the RPC services
            LOG.info("Starting RPC Service(s)");
            final RpcServiceFactory dispatcherResourceManagerComponentRpcServiceFactory;
            final RpcService metricQueryServiceRpcService;
            if (useSingleRpcService) {
                // we always need the 'commonRpcService' for auxiliary calls
                commonRpcService = createLocalRpcService(configuration, rpcSystem.deref());
                final CommonRpcServiceFactory commonRpcServiceFactory = new CommonRpcServiceFactory(commonRpcService);
                taskManagerRpcServiceFactory = commonRpcServiceFactory;
                dispatcherResourceManagerComponentRpcServiceFactory = commonRpcServiceFactory;
                metricQueryServiceRpcService = MetricUtils.startLocalMetricsRpcService(configuration, rpcSystem.deref());
            } else {
                // start a new service per component, possibly with custom bind addresses
                final String jobManagerExternalAddress = miniClusterConfiguration.getJobManagerExternalAddress();
                final String taskManagerExternalAddress = miniClusterConfiguration.getTaskManagerExternalAddress();
                final String jobManagerExternalPortRange = miniClusterConfiguration.getJobManagerExternalPortRange();
                final String taskManagerExternalPortRange = miniClusterConfiguration.getTaskManagerExternalPortRange();
                final String jobManagerBindAddress = miniClusterConfiguration.getJobManagerBindAddress();
                final String taskManagerBindAddress = miniClusterConfiguration.getTaskManagerBindAddress();
                dispatcherResourceManagerComponentRpcServiceFactory = new DedicatedRpcServiceFactory(configuration, jobManagerExternalAddress, jobManagerExternalPortRange, jobManagerBindAddress, rpcSystem.deref());
                taskManagerRpcServiceFactory = new DedicatedRpcServiceFactory(configuration, taskManagerExternalAddress, taskManagerExternalPortRange, taskManagerBindAddress, rpcSystem.deref());
                // we always need the 'commonRpcService' for auxiliary calls
                // bind to the JobManager address with port 0
                commonRpcService = createRemoteRpcService(configuration, jobManagerBindAddress, 0, rpcSystem.deref());
                metricQueryServiceRpcService = MetricUtils.startRemoteMetricsRpcService(configuration, commonRpcService.getAddress(), null, rpcSystem.deref());
            }
            metricRegistry.startQueryService(metricQueryServiceRpcService, null);
            processMetricGroup = MetricUtils.instantiateProcessMetricGroup(metricRegistry, RpcUtils.getHostname(commonRpcService), ConfigurationUtils.getSystemResourceMetricsProbingInterval(configuration));
            ioExecutor = Executors.newFixedThreadPool(ClusterEntrypointUtils.getPoolSize(configuration), new ExecutorThreadFactory("mini-cluster-io"));
            haServices = createHighAvailabilityServices(configuration, ioExecutor);
            blobServer = BlobUtils.createBlobServer(configuration, Reference.borrowed(workingDirectory.getBlobStorageDirectory()), haServices.createBlobStore());
            blobServer.start();
            heartbeatServices = HeartbeatServices.fromConfiguration(configuration);
            blobCacheService = BlobUtils.createBlobCacheService(configuration, Reference.borrowed(workingDirectory.getBlobStorageDirectory()), haServices.createBlobStore(), new InetSocketAddress(InetAddress.getLocalHost(), blobServer.getPort()));
            startTaskManagers();
            MetricQueryServiceRetriever metricQueryServiceRetriever = new RpcMetricQueryServiceRetriever(metricRegistry.getMetricQueryServiceRpcService());
            setupDispatcherResourceManagerComponents(configuration, dispatcherResourceManagerComponentRpcServiceFactory, metricQueryServiceRetriever);
            resourceManagerLeaderRetriever = haServices.getResourceManagerLeaderRetriever();
            dispatcherLeaderRetriever = haServices.getDispatcherLeaderRetriever();
            clusterRestEndpointLeaderRetrievalService = haServices.getClusterRestEndpointLeaderRetriever();
            dispatcherGatewayRetriever = new RpcGatewayRetriever<>(commonRpcService, DispatcherGateway.class, DispatcherId::fromUuid, new ExponentialBackoffRetryStrategy(21, Duration.ofMillis(5L), Duration.ofMillis(20L)));
            resourceManagerGatewayRetriever = new RpcGatewayRetriever<>(commonRpcService, ResourceManagerGateway.class, ResourceManagerId::fromUuid, new ExponentialBackoffRetryStrategy(21, Duration.ofMillis(5L), Duration.ofMillis(20L)));
            webMonitorLeaderRetriever = new LeaderRetriever();
            resourceManagerLeaderRetriever.start(resourceManagerGatewayRetriever);
            dispatcherLeaderRetriever.start(dispatcherGatewayRetriever);
            clusterRestEndpointLeaderRetrievalService.start(webMonitorLeaderRetriever);
        } catch (Exception e) {
            // cleanup everything
            try {
                close();
            } catch (Exception ee) {
                e.addSuppressed(ee);
            }
            throw e;
        }
        // create a new termination future
        terminationFuture = new CompletableFuture<>();
        // now officially mark this as running
        running = true;
        LOG.info("Flink Mini Cluster started successfully");
    }
}
Also used : RpcMetricQueryServiceRetriever(org.apache.flink.runtime.webmonitor.retriever.impl.RpcMetricQueryServiceRetriever) MetricQueryServiceRetriever(org.apache.flink.runtime.webmonitor.retriever.MetricQueryServiceRetriever) ExponentialBackoffRetryStrategy(org.apache.flink.util.concurrent.ExponentialBackoffRetryStrategy) MetricRegistryConfiguration(org.apache.flink.runtime.metrics.MetricRegistryConfiguration) Configuration(org.apache.flink.configuration.Configuration) InetSocketAddress(java.net.InetSocketAddress) RpcMetricQueryServiceRetriever(org.apache.flink.runtime.webmonitor.retriever.impl.RpcMetricQueryServiceRetriever) DispatcherGateway(org.apache.flink.runtime.dispatcher.DispatcherGateway) FlinkException(org.apache.flink.util.FlinkException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) CompletionException(java.util.concurrent.CompletionException) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) ExecutorThreadFactory(org.apache.flink.util.concurrent.ExecutorThreadFactory) RpcService(org.apache.flink.runtime.rpc.RpcService) LeaderRetriever(org.apache.flink.runtime.webmonitor.retriever.LeaderRetriever)

Example 24 with RpcService

use of org.apache.flink.runtime.rpc.RpcService in project flink by apache.

the class MetricUtilsTest method testStartMetricActorSystemRespectsThreadPriority.

/**
 * Tests that the {@link MetricUtils#startRemoteMetricsRpcService(Configuration, String,
 * RpcSystem)} respects the given {@link MetricOptions#QUERY_SERVICE_THREAD_PRIORITY}.
 */
@Test
public void testStartMetricActorSystemRespectsThreadPriority() throws Exception {
    final Configuration configuration = new Configuration();
    final int expectedThreadPriority = 3;
    configuration.setInteger(MetricOptions.QUERY_SERVICE_THREAD_PRIORITY, expectedThreadPriority);
    final RpcService rpcService = MetricUtils.startRemoteMetricsRpcService(configuration, "localhost", null, RpcSystem.load());
    try {
        final int threadPriority = rpcService.execute(() -> Thread.currentThread().getPriority()).get();
        assertThat(threadPriority, is(expectedThreadPriority));
    } finally {
        rpcService.stopService().get();
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) RpcService(org.apache.flink.runtime.rpc.RpcService) Test(org.junit.Test)

Example 25 with RpcService

use of org.apache.flink.runtime.rpc.RpcService in project flink by apache.

the class ProcessFailureCancelingITCase method testCancelingOnProcessFailure.

@Test
public void testCancelingOnProcessFailure() throws Throwable {
    Assume.assumeTrue("---- Skipping Process Failure test : Could not find java executable ----", getJavaCommandPath() != null);
    TestProcess taskManagerProcess = null;
    final TestingFatalErrorHandler fatalErrorHandler = new TestingFatalErrorHandler();
    Configuration config = new Configuration();
    config.setString(JobManagerOptions.ADDRESS, "localhost");
    config.set(AkkaOptions.ASK_TIMEOUT_DURATION, Duration.ofSeconds(100));
    config.setString(HighAvailabilityOptions.HA_MODE, "zookeeper");
    config.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, zooKeeperResource.getConnectString());
    config.setString(HighAvailabilityOptions.HA_STORAGE_PATH, temporaryFolder.newFolder().getAbsolutePath());
    config.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, 2);
    config.set(TaskManagerOptions.MANAGED_MEMORY_SIZE, MemorySize.parse("4m"));
    config.set(TaskManagerOptions.NETWORK_MEMORY_MIN, MemorySize.parse("3200k"));
    config.set(TaskManagerOptions.NETWORK_MEMORY_MAX, MemorySize.parse("3200k"));
    config.set(TaskManagerOptions.TASK_HEAP_MEMORY, MemorySize.parse("128m"));
    config.set(TaskManagerOptions.CPU_CORES, 1.0);
    config.setInteger(RestOptions.PORT, 0);
    final RpcService rpcService = RpcSystem.load().remoteServiceBuilder(config, "localhost", "0").createAndStart();
    final int jobManagerPort = rpcService.getPort();
    config.setInteger(JobManagerOptions.PORT, jobManagerPort);
    final DispatcherResourceManagerComponentFactory resourceManagerComponentFactory = DefaultDispatcherResourceManagerComponentFactory.createSessionComponentFactory(StandaloneResourceManagerFactory.getInstance());
    DispatcherResourceManagerComponent dispatcherResourceManagerComponent = null;
    final ScheduledExecutorService ioExecutor = TestingUtils.defaultExecutor();
    final HighAvailabilityServices haServices = HighAvailabilityServicesUtils.createHighAvailabilityServices(config, ioExecutor, AddressResolution.NO_ADDRESS_RESOLUTION, RpcSystem.load(), NoOpFatalErrorHandler.INSTANCE);
    final AtomicReference<Throwable> programException = new AtomicReference<>();
    try {
        dispatcherResourceManagerComponent = resourceManagerComponentFactory.create(config, ResourceID.generate(), ioExecutor, rpcService, haServices, blobServerResource.getBlobServer(), new HeartbeatServices(100L, 10000L, 2), NoOpMetricRegistry.INSTANCE, new MemoryExecutionGraphInfoStore(), VoidMetricQueryServiceRetriever.INSTANCE, fatalErrorHandler);
        TestProcessBuilder taskManagerProcessBuilder = new TestProcessBuilder(TaskExecutorProcessEntryPoint.class.getName());
        taskManagerProcessBuilder.addConfigAsMainClassArgs(config);
        taskManagerProcess = taskManagerProcessBuilder.start();
        // start the test program, which infinitely blocks
        Runnable programRunner = new Runnable() {

            @Override
            public void run() {
                try {
                    ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment("localhost", 1337, config);
                    env.setParallelism(2);
                    env.setRestartStrategy(RestartStrategies.noRestart());
                    env.generateSequence(0, Long.MAX_VALUE).map(new MapFunction<Long, Long>() {

                        @Override
                        public Long map(Long value) throws Exception {
                            synchronized (this) {
                                System.out.println(TASK_DEPLOYED_MARKER);
                                wait();
                            }
                            return 0L;
                        }
                    }).output(new DiscardingOutputFormat<>());
                    env.execute();
                } catch (Throwable t) {
                    programException.set(t);
                }
            }
        };
        Thread programThread = new Thread(programRunner);
        programThread.start();
        waitUntilAtLeastOneTaskHasBeenDeployed(taskManagerProcess);
        // kill the TaskManager after the job started to run
        taskManagerProcess.destroy();
        taskManagerProcess = null;
        // the job should fail within a few seconds due to heartbeat timeouts
        // since the CI environment is often slow, we conservatively give it up to 2 minutes
        programThread.join(TIMEOUT.toMillis());
        assertFalse("The program did not cancel in time", programThread.isAlive());
        Throwable error = programException.get();
        assertNotNull("The program did not fail properly", error);
        assertTrue(error instanceof ProgramInvocationException);
    // all seems well :-)
    } catch (Exception | Error e) {
        if (taskManagerProcess != null) {
            printOutput("TaskManager OUT", taskManagerProcess.getProcessOutput().toString());
            printOutput("TaskManager ERR", taskManagerProcess.getErrorOutput().toString());
        }
        throw ExceptionUtils.firstOrSuppressed(e, programException.get());
    } finally {
        if (taskManagerProcess != null) {
            taskManagerProcess.destroy();
        }
        if (dispatcherResourceManagerComponent != null) {
            dispatcherResourceManagerComponent.stopApplication(ApplicationStatus.SUCCEEDED, null);
        }
        fatalErrorHandler.rethrowError();
        RpcUtils.terminateRpcService(rpcService, Time.seconds(100L));
        haServices.closeAndCleanupAllData();
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) MapFunction(org.apache.flink.api.common.functions.MapFunction) TestProcess(org.apache.flink.test.util.TestProcessBuilder.TestProcess) MemoryExecutionGraphInfoStore(org.apache.flink.runtime.dispatcher.MemoryExecutionGraphInfoStore) DispatcherResourceManagerComponent(org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponent) TaskExecutorProcessEntryPoint(org.apache.flink.test.recovery.utils.TaskExecutorProcessEntryPoint) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) AtomicReference(java.util.concurrent.atomic.AtomicReference) TaskExecutorProcessEntryPoint(org.apache.flink.test.recovery.utils.TaskExecutorProcessEntryPoint) TestProcessBuilder(org.apache.flink.test.util.TestProcessBuilder) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) TimeoutException(java.util.concurrent.TimeoutException) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) RpcService(org.apache.flink.runtime.rpc.RpcService) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) DefaultDispatcherResourceManagerComponentFactory(org.apache.flink.runtime.entrypoint.component.DefaultDispatcherResourceManagerComponentFactory) DispatcherResourceManagerComponentFactory(org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponentFactory) Test(org.junit.Test)

Aggregations

RpcService (org.apache.flink.runtime.rpc.RpcService)25 Test (org.junit.Test)15 Configuration (org.apache.flink.configuration.Configuration)13 HighAvailabilityServices (org.apache.flink.runtime.highavailability.HighAvailabilityServices)9 ExecutionException (java.util.concurrent.ExecutionException)8 TestingRpcService (org.apache.flink.runtime.rpc.TestingRpcService)7 UUID (java.util.UUID)6 CompletableFuture (java.util.concurrent.CompletableFuture)6 Before (org.junit.Before)6 FlinkException (org.apache.flink.util.FlinkException)5 TestLogger (org.apache.flink.util.TestLogger)5 After (org.junit.After)5 Assert.assertThat (org.junit.Assert.assertThat)5 Assert.fail (org.junit.Assert.fail)5 LoggerFactory (org.slf4j.LoggerFactory)5 IOException (java.io.IOException)4 TimeUnit (java.util.concurrent.TimeUnit)4 HeartbeatServices (org.apache.flink.runtime.heartbeat.HeartbeatServices)4 Mockito.anyString (org.mockito.Mockito.anyString)4 InetAddress (java.net.InetAddress)3