use of org.apache.flink.runtime.rpc.RpcService in project flink by apache.
the class MetricRegistryImplTest method testQueryActorShutdown.
/**
* Tests that the query actor will be stopped when the MetricRegistry is shut down.
*/
@Test
public void testQueryActorShutdown() throws Exception {
final Duration timeout = Duration.ofSeconds(10L);
MetricRegistryImpl registry = new MetricRegistryImpl(MetricRegistryTestUtils.defaultMetricRegistryConfiguration());
final RpcService rpcService = new TestingRpcService();
registry.startQueryService(rpcService, null);
MetricQueryService queryService = checkNotNull(registry.getQueryService());
registry.shutdown().get();
queryService.getTerminationFuture().get(timeout.toMillis(), TimeUnit.MILLISECONDS);
}
use of org.apache.flink.runtime.rpc.RpcService in project flink by apache.
the class TaskExecutorBuilder method build.
public TaskExecutor build() throws Exception {
final TaskExecutorBlobService resolvedTaskExecutorBlobService;
TaskExecutorResourceUtils.adjustForLocalExecution(configuration);
if (taskExecutorBlobService == null) {
resolvedTaskExecutorBlobService = NoOpTaskExecutorBlobService.INSTANCE;
} else {
resolvedTaskExecutorBlobService = taskExecutorBlobService;
}
final TaskManagerConfiguration resolvedTaskManagerConfiguration;
if (taskManagerConfiguration == null) {
resolvedTaskManagerConfiguration = TaskManagerConfiguration.fromConfiguration(configuration, taskExecutorResourceSpec, rpcService.getAddress(), workingDirectory.getTmpDirectory());
} else {
resolvedTaskManagerConfiguration = taskManagerConfiguration;
}
final TaskManagerServices resolvedTaskManagerServices;
if (taskManagerServices == null) {
final TaskManagerServicesConfiguration taskManagerServicesConfiguration = TaskManagerServicesConfiguration.fromConfiguration(configuration, resourceId, rpcService.getAddress(), true, taskExecutorResourceSpec, workingDirectory);
resolvedTaskManagerServices = TaskManagerServices.fromConfiguration(taskManagerServicesConfiguration, VoidPermanentBlobService.INSTANCE, UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(), Executors.newDirectExecutorService(), throwable -> {
}, workingDirectory);
} else {
resolvedTaskManagerServices = taskManagerServices;
}
return new TaskExecutor(rpcService, resolvedTaskManagerConfiguration, haServices, resolvedTaskManagerServices, externalResourceInfoProvider, heartbeatServices, taskManagerMetricGroup, metricQueryServiceAddress, resolvedTaskExecutorBlobService, fatalErrorHandler, partitionTracker);
}
use of org.apache.flink.runtime.rpc.RpcService in project flink by apache.
the class MiniCluster method start.
/**
* Starts the mini cluster, based on the configured properties.
*
* @throws Exception This method passes on any exception that occurs during the startup of the
* mini cluster.
*/
public void start() throws Exception {
synchronized (lock) {
checkState(!running, "MiniCluster is already running");
LOG.info("Starting Flink Mini Cluster");
LOG.debug("Using configuration {}", miniClusterConfiguration);
final Configuration configuration = miniClusterConfiguration.getConfiguration();
final boolean useSingleRpcService = miniClusterConfiguration.getRpcServiceSharing() == RpcServiceSharing.SHARED;
try {
workingDirectory = WorkingDirectory.create(ClusterEntrypointUtils.generateWorkingDirectoryFile(configuration, Optional.empty(), "minicluster_" + ResourceID.generate()));
initializeIOFormatClasses(configuration);
rpcSystem = rpcSystemSupplier.get();
LOG.info("Starting Metrics Registry");
metricRegistry = createMetricRegistry(configuration, rpcSystem.deref().getMaximumMessageSizeInBytes(configuration));
// bring up all the RPC services
LOG.info("Starting RPC Service(s)");
final RpcServiceFactory dispatcherResourceManagerComponentRpcServiceFactory;
final RpcService metricQueryServiceRpcService;
if (useSingleRpcService) {
// we always need the 'commonRpcService' for auxiliary calls
commonRpcService = createLocalRpcService(configuration, rpcSystem.deref());
final CommonRpcServiceFactory commonRpcServiceFactory = new CommonRpcServiceFactory(commonRpcService);
taskManagerRpcServiceFactory = commonRpcServiceFactory;
dispatcherResourceManagerComponentRpcServiceFactory = commonRpcServiceFactory;
metricQueryServiceRpcService = MetricUtils.startLocalMetricsRpcService(configuration, rpcSystem.deref());
} else {
// start a new service per component, possibly with custom bind addresses
final String jobManagerExternalAddress = miniClusterConfiguration.getJobManagerExternalAddress();
final String taskManagerExternalAddress = miniClusterConfiguration.getTaskManagerExternalAddress();
final String jobManagerExternalPortRange = miniClusterConfiguration.getJobManagerExternalPortRange();
final String taskManagerExternalPortRange = miniClusterConfiguration.getTaskManagerExternalPortRange();
final String jobManagerBindAddress = miniClusterConfiguration.getJobManagerBindAddress();
final String taskManagerBindAddress = miniClusterConfiguration.getTaskManagerBindAddress();
dispatcherResourceManagerComponentRpcServiceFactory = new DedicatedRpcServiceFactory(configuration, jobManagerExternalAddress, jobManagerExternalPortRange, jobManagerBindAddress, rpcSystem.deref());
taskManagerRpcServiceFactory = new DedicatedRpcServiceFactory(configuration, taskManagerExternalAddress, taskManagerExternalPortRange, taskManagerBindAddress, rpcSystem.deref());
// we always need the 'commonRpcService' for auxiliary calls
// bind to the JobManager address with port 0
commonRpcService = createRemoteRpcService(configuration, jobManagerBindAddress, 0, rpcSystem.deref());
metricQueryServiceRpcService = MetricUtils.startRemoteMetricsRpcService(configuration, commonRpcService.getAddress(), null, rpcSystem.deref());
}
metricRegistry.startQueryService(metricQueryServiceRpcService, null);
processMetricGroup = MetricUtils.instantiateProcessMetricGroup(metricRegistry, RpcUtils.getHostname(commonRpcService), ConfigurationUtils.getSystemResourceMetricsProbingInterval(configuration));
ioExecutor = Executors.newFixedThreadPool(ClusterEntrypointUtils.getPoolSize(configuration), new ExecutorThreadFactory("mini-cluster-io"));
haServices = createHighAvailabilityServices(configuration, ioExecutor);
blobServer = BlobUtils.createBlobServer(configuration, Reference.borrowed(workingDirectory.getBlobStorageDirectory()), haServices.createBlobStore());
blobServer.start();
heartbeatServices = HeartbeatServices.fromConfiguration(configuration);
blobCacheService = BlobUtils.createBlobCacheService(configuration, Reference.borrowed(workingDirectory.getBlobStorageDirectory()), haServices.createBlobStore(), new InetSocketAddress(InetAddress.getLocalHost(), blobServer.getPort()));
startTaskManagers();
MetricQueryServiceRetriever metricQueryServiceRetriever = new RpcMetricQueryServiceRetriever(metricRegistry.getMetricQueryServiceRpcService());
setupDispatcherResourceManagerComponents(configuration, dispatcherResourceManagerComponentRpcServiceFactory, metricQueryServiceRetriever);
resourceManagerLeaderRetriever = haServices.getResourceManagerLeaderRetriever();
dispatcherLeaderRetriever = haServices.getDispatcherLeaderRetriever();
clusterRestEndpointLeaderRetrievalService = haServices.getClusterRestEndpointLeaderRetriever();
dispatcherGatewayRetriever = new RpcGatewayRetriever<>(commonRpcService, DispatcherGateway.class, DispatcherId::fromUuid, new ExponentialBackoffRetryStrategy(21, Duration.ofMillis(5L), Duration.ofMillis(20L)));
resourceManagerGatewayRetriever = new RpcGatewayRetriever<>(commonRpcService, ResourceManagerGateway.class, ResourceManagerId::fromUuid, new ExponentialBackoffRetryStrategy(21, Duration.ofMillis(5L), Duration.ofMillis(20L)));
webMonitorLeaderRetriever = new LeaderRetriever();
resourceManagerLeaderRetriever.start(resourceManagerGatewayRetriever);
dispatcherLeaderRetriever.start(dispatcherGatewayRetriever);
clusterRestEndpointLeaderRetrievalService.start(webMonitorLeaderRetriever);
} catch (Exception e) {
// cleanup everything
try {
close();
} catch (Exception ee) {
e.addSuppressed(ee);
}
throw e;
}
// create a new termination future
terminationFuture = new CompletableFuture<>();
// now officially mark this as running
running = true;
LOG.info("Flink Mini Cluster started successfully");
}
}
use of org.apache.flink.runtime.rpc.RpcService in project flink by apache.
the class MetricUtilsTest method testStartMetricActorSystemRespectsThreadPriority.
/**
* Tests that the {@link MetricUtils#startRemoteMetricsRpcService(Configuration, String,
* RpcSystem)} respects the given {@link MetricOptions#QUERY_SERVICE_THREAD_PRIORITY}.
*/
@Test
public void testStartMetricActorSystemRespectsThreadPriority() throws Exception {
final Configuration configuration = new Configuration();
final int expectedThreadPriority = 3;
configuration.setInteger(MetricOptions.QUERY_SERVICE_THREAD_PRIORITY, expectedThreadPriority);
final RpcService rpcService = MetricUtils.startRemoteMetricsRpcService(configuration, "localhost", null, RpcSystem.load());
try {
final int threadPriority = rpcService.execute(() -> Thread.currentThread().getPriority()).get();
assertThat(threadPriority, is(expectedThreadPriority));
} finally {
rpcService.stopService().get();
}
}
use of org.apache.flink.runtime.rpc.RpcService in project flink by apache.
the class ProcessFailureCancelingITCase method testCancelingOnProcessFailure.
@Test
public void testCancelingOnProcessFailure() throws Throwable {
Assume.assumeTrue("---- Skipping Process Failure test : Could not find java executable ----", getJavaCommandPath() != null);
TestProcess taskManagerProcess = null;
final TestingFatalErrorHandler fatalErrorHandler = new TestingFatalErrorHandler();
Configuration config = new Configuration();
config.setString(JobManagerOptions.ADDRESS, "localhost");
config.set(AkkaOptions.ASK_TIMEOUT_DURATION, Duration.ofSeconds(100));
config.setString(HighAvailabilityOptions.HA_MODE, "zookeeper");
config.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, zooKeeperResource.getConnectString());
config.setString(HighAvailabilityOptions.HA_STORAGE_PATH, temporaryFolder.newFolder().getAbsolutePath());
config.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, 2);
config.set(TaskManagerOptions.MANAGED_MEMORY_SIZE, MemorySize.parse("4m"));
config.set(TaskManagerOptions.NETWORK_MEMORY_MIN, MemorySize.parse("3200k"));
config.set(TaskManagerOptions.NETWORK_MEMORY_MAX, MemorySize.parse("3200k"));
config.set(TaskManagerOptions.TASK_HEAP_MEMORY, MemorySize.parse("128m"));
config.set(TaskManagerOptions.CPU_CORES, 1.0);
config.setInteger(RestOptions.PORT, 0);
final RpcService rpcService = RpcSystem.load().remoteServiceBuilder(config, "localhost", "0").createAndStart();
final int jobManagerPort = rpcService.getPort();
config.setInteger(JobManagerOptions.PORT, jobManagerPort);
final DispatcherResourceManagerComponentFactory resourceManagerComponentFactory = DefaultDispatcherResourceManagerComponentFactory.createSessionComponentFactory(StandaloneResourceManagerFactory.getInstance());
DispatcherResourceManagerComponent dispatcherResourceManagerComponent = null;
final ScheduledExecutorService ioExecutor = TestingUtils.defaultExecutor();
final HighAvailabilityServices haServices = HighAvailabilityServicesUtils.createHighAvailabilityServices(config, ioExecutor, AddressResolution.NO_ADDRESS_RESOLUTION, RpcSystem.load(), NoOpFatalErrorHandler.INSTANCE);
final AtomicReference<Throwable> programException = new AtomicReference<>();
try {
dispatcherResourceManagerComponent = resourceManagerComponentFactory.create(config, ResourceID.generate(), ioExecutor, rpcService, haServices, blobServerResource.getBlobServer(), new HeartbeatServices(100L, 10000L, 2), NoOpMetricRegistry.INSTANCE, new MemoryExecutionGraphInfoStore(), VoidMetricQueryServiceRetriever.INSTANCE, fatalErrorHandler);
TestProcessBuilder taskManagerProcessBuilder = new TestProcessBuilder(TaskExecutorProcessEntryPoint.class.getName());
taskManagerProcessBuilder.addConfigAsMainClassArgs(config);
taskManagerProcess = taskManagerProcessBuilder.start();
// start the test program, which infinitely blocks
Runnable programRunner = new Runnable() {
@Override
public void run() {
try {
ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment("localhost", 1337, config);
env.setParallelism(2);
env.setRestartStrategy(RestartStrategies.noRestart());
env.generateSequence(0, Long.MAX_VALUE).map(new MapFunction<Long, Long>() {
@Override
public Long map(Long value) throws Exception {
synchronized (this) {
System.out.println(TASK_DEPLOYED_MARKER);
wait();
}
return 0L;
}
}).output(new DiscardingOutputFormat<>());
env.execute();
} catch (Throwable t) {
programException.set(t);
}
}
};
Thread programThread = new Thread(programRunner);
programThread.start();
waitUntilAtLeastOneTaskHasBeenDeployed(taskManagerProcess);
// kill the TaskManager after the job started to run
taskManagerProcess.destroy();
taskManagerProcess = null;
// the job should fail within a few seconds due to heartbeat timeouts
// since the CI environment is often slow, we conservatively give it up to 2 minutes
programThread.join(TIMEOUT.toMillis());
assertFalse("The program did not cancel in time", programThread.isAlive());
Throwable error = programException.get();
assertNotNull("The program did not fail properly", error);
assertTrue(error instanceof ProgramInvocationException);
// all seems well :-)
} catch (Exception | Error e) {
if (taskManagerProcess != null) {
printOutput("TaskManager OUT", taskManagerProcess.getProcessOutput().toString());
printOutput("TaskManager ERR", taskManagerProcess.getErrorOutput().toString());
}
throw ExceptionUtils.firstOrSuppressed(e, programException.get());
} finally {
if (taskManagerProcess != null) {
taskManagerProcess.destroy();
}
if (dispatcherResourceManagerComponent != null) {
dispatcherResourceManagerComponent.stopApplication(ApplicationStatus.SUCCEEDED, null);
}
fatalErrorHandler.rethrowError();
RpcUtils.terminateRpcService(rpcService, Time.seconds(100L));
haServices.closeAndCleanupAllData();
}
}
Aggregations