Search in sources :

Example 1 with HeartbeatServices

use of org.apache.flink.runtime.heartbeat.HeartbeatServices in project flink by apache.

the class JobManagerRunnerMockTest method setUp.

@Before
public void setUp() throws Exception {
    RpcService mockRpc = mock(RpcService.class);
    when(mockRpc.getAddress()).thenReturn("localhost");
    jobManager = mock(JobMaster.class);
    jobManagerGateway = mock(JobMasterGateway.class);
    when(jobManager.getSelf()).thenReturn(jobManagerGateway);
    when(jobManager.getRpcService()).thenReturn(mockRpc);
    PowerMockito.whenNew(JobMaster.class).withAnyArguments().thenReturn(jobManager);
    jobCompletion = new TestingOnCompletionActions();
    leaderElectionService = mock(LeaderElectionService.class);
    when(leaderElectionService.hasLeadership()).thenReturn(true);
    SubmittedJobGraphStore submittedJobGraphStore = mock(SubmittedJobGraphStore.class);
    blobStore = mock(BlobStore.class);
    HighAvailabilityServices haServices = mock(HighAvailabilityServices.class);
    when(haServices.getJobManagerLeaderElectionService(any(JobID.class))).thenReturn(leaderElectionService);
    when(haServices.getSubmittedJobGraphStore()).thenReturn(submittedJobGraphStore);
    when(haServices.createBlobStore()).thenReturn(blobStore);
    when(haServices.getRunningJobsRegistry()).thenReturn(runningJobsRegistry);
    HeartbeatServices heartbeatServices = mock(HeartbeatServices.class);
    runner = PowerMockito.spy(new JobManagerRunner(ResourceID.generate(), new JobGraph("test", new JobVertex("vertex")), mock(Configuration.class), mockRpc, haServices, heartbeatServices, JobManagerServices.fromConfiguration(new Configuration(), haServices), new MetricRegistry(MetricRegistryConfiguration.defaultMetricRegistryConfiguration()), jobCompletion, jobCompletion));
}
Also used : HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) SubmittedJobGraphStore(org.apache.flink.runtime.jobmanager.SubmittedJobGraphStore) MetricRegistryConfiguration(org.apache.flink.runtime.metrics.MetricRegistryConfiguration) Configuration(org.apache.flink.configuration.Configuration) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) RpcService(org.apache.flink.runtime.rpc.RpcService) LeaderElectionService(org.apache.flink.runtime.leaderelection.LeaderElectionService) BlobStore(org.apache.flink.runtime.blob.BlobStore) JobID(org.apache.flink.api.common.JobID) Before(org.junit.Before)

Example 2 with HeartbeatServices

use of org.apache.flink.runtime.heartbeat.HeartbeatServices in project flink by apache.

the class YarnTaskExecutorRunner method runTaskExecutor.

// ------------------------------------------------------------------------
//  Core work method
// ------------------------------------------------------------------------
/**
	 * The main work method, must run as a privileged action.
	 *
	 * @return The return code for the Java process.
	 */
protected int runTaskExecutor(Configuration config) {
    try {
        // ---- (1) create common services
        // first get the ResouceId, resource id is the container id for yarn.
        final String containerId = ENV.get(YarnFlinkResourceManager.ENV_FLINK_CONTAINER_ID);
        Preconditions.checkArgument(containerId != null, "ContainerId variable %s not set", YarnFlinkResourceManager.ENV_FLINK_CONTAINER_ID);
        // use the hostname passed by job manager
        final String taskExecutorHostname = ENV.get(YarnResourceManager.ENV_FLINK_NODE_ID);
        if (taskExecutorHostname != null) {
            config.setString(ConfigConstants.TASK_MANAGER_HOSTNAME_KEY, taskExecutorHostname);
        }
        ResourceID resourceID = new ResourceID(containerId);
        LOG.info("YARN assigned resource id {} for the task executor.", resourceID.toString());
        haServices = HighAvailabilityServicesUtils.createAvailableOrEmbeddedServices(config);
        HeartbeatServices heartbeatServices = HeartbeatServices.fromConfiguration(config);
        metricRegistry = new MetricRegistry(MetricRegistryConfiguration.fromConfiguration(config));
        // ---- (2) init task manager runner -------
        taskExecutorRpcService = TaskManagerRunner.createRpcService(config, haServices);
        taskManagerRunner = new TaskManagerRunner(config, resourceID, taskExecutorRpcService, haServices, heartbeatServices, metricRegistry);
        // ---- (3) start the task manager runner
        taskManagerRunner.start();
        LOG.debug("YARN task executor started");
        taskManagerRunner.getTerminationFuture().get();
        // everything started, we can wait until all is done or the process is killed
        LOG.info("YARN task manager runner finished");
        shutdown();
    } catch (Throwable t) {
        // make sure that everything whatever ends up in the log
        LOG.error("YARN task executor initialization failed", t);
        shutdown();
        return INIT_ERROR_EXIT_CODE;
    }
    return 0;
}
Also used : HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TaskManagerRunner(org.apache.flink.runtime.taskexecutor.TaskManagerRunner) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry)

Example 3 with HeartbeatServices

use of org.apache.flink.runtime.heartbeat.HeartbeatServices in project flink by apache.

the class TaskManagerRunner method startTaskManagerRunnerServices.

private void startTaskManagerRunnerServices() throws Exception {
    synchronized (lock) {
        rpcSystem = RpcSystem.load(configuration);
        this.executor = Executors.newScheduledThreadPool(Hardware.getNumberCPUCores(), new ExecutorThreadFactory("taskmanager-future"));
        highAvailabilityServices = HighAvailabilityServicesUtils.createHighAvailabilityServices(configuration, executor, AddressResolution.NO_ADDRESS_RESOLUTION, rpcSystem, this);
        JMXService.startInstance(configuration.getString(JMXServerOptions.JMX_SERVER_PORT));
        rpcService = createRpcService(configuration, highAvailabilityServices, rpcSystem);
        this.resourceId = getTaskManagerResourceID(configuration, rpcService.getAddress(), rpcService.getPort());
        this.workingDirectory = ClusterEntrypointUtils.createTaskManagerWorkingDirectory(configuration, resourceId);
        LOG.info("Using working directory: {}", workingDirectory);
        HeartbeatServices heartbeatServices = HeartbeatServices.fromConfiguration(configuration);
        metricRegistry = new MetricRegistryImpl(MetricRegistryConfiguration.fromConfiguration(configuration, rpcSystem.getMaximumMessageSizeInBytes(configuration)), ReporterSetup.fromConfiguration(configuration, pluginManager));
        final RpcService metricQueryServiceRpcService = MetricUtils.startRemoteMetricsRpcService(configuration, rpcService.getAddress(), configuration.getString(TaskManagerOptions.BIND_HOST), rpcSystem);
        metricRegistry.startQueryService(metricQueryServiceRpcService, resourceId.unwrap());
        blobCacheService = BlobUtils.createBlobCacheService(configuration, Reference.borrowed(workingDirectory.unwrap().getBlobStorageDirectory()), highAvailabilityServices.createBlobStore(), null);
        final ExternalResourceInfoProvider externalResourceInfoProvider = ExternalResourceUtils.createStaticExternalResourceInfoProviderFromConfig(configuration, pluginManager);
        taskExecutorService = taskExecutorServiceFactory.createTaskExecutor(this.configuration, this.resourceId.unwrap(), rpcService, highAvailabilityServices, heartbeatServices, metricRegistry, blobCacheService, false, externalResourceInfoProvider, workingDirectory.unwrap(), this);
        handleUnexpectedTaskExecutorServiceTermination();
        MemoryLogger.startIfConfigured(LOG, configuration, terminationFuture.thenAccept(ignored -> {
        }));
    }
}
Also used : ExecutorThreadFactory(org.apache.flink.util.concurrent.ExecutorThreadFactory) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) TaskExecutorBlobService(org.apache.flink.runtime.blob.TaskExecutorBlobService) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Hardware(org.apache.flink.runtime.util.Hardware) RpcSystemUtils(org.apache.flink.runtime.rpc.RpcSystemUtils) LoggerFactory(org.slf4j.LoggerFactory) ExternalResourceUtils(org.apache.flink.runtime.externalresource.ExternalResourceUtils) LeaderRetrievalException(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalException) ExceptionUtils(org.apache.flink.util.ExceptionUtils) LeaderRetrievalUtils(org.apache.flink.runtime.util.LeaderRetrievalUtils) ReporterSetup(org.apache.flink.runtime.metrics.ReporterSetup) InetAddress(java.net.InetAddress) MemoryLogger(org.apache.flink.runtime.taskmanager.MemoryLogger) StateChangelogStorageLoader(org.apache.flink.runtime.state.changelog.StateChangelogStorageLoader) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) FunctionUtils(org.apache.flink.util.function.FunctionUtils) Duration(java.time.Duration) SecurityUtils(org.apache.flink.runtime.security.SecurityUtils) TaskManagerOptionsInternal(org.apache.flink.configuration.TaskManagerOptionsInternal) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) AkkaOptions(org.apache.flink.configuration.AkkaOptions) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) ExecutorThreadFactory(org.apache.flink.util.concurrent.ExecutorThreadFactory) AbstractID(org.apache.flink.util.AbstractID) Collection(java.util.Collection) JvmShutdownSafeguard(org.apache.flink.runtime.util.JvmShutdownSafeguard) HighAvailabilityServicesUtils(org.apache.flink.runtime.highavailability.HighAvailabilityServicesUtils) MetricUtils(org.apache.flink.runtime.metrics.util.MetricUtils) GuardedBy(javax.annotation.concurrent.GuardedBy) SecurityConfiguration(org.apache.flink.runtime.security.SecurityConfiguration) EnvironmentInformation(org.apache.flink.runtime.util.EnvironmentInformation) StringUtils(org.apache.flink.util.StringUtils) Executors(java.util.concurrent.Executors) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) ExecutorUtils(org.apache.flink.util.ExecutorUtils) JMXService(org.apache.flink.management.jmx.JMXService) MetricGroup(org.apache.flink.metrics.MetricGroup) FileSystem(org.apache.flink.core.fs.FileSystem) PluginManager(org.apache.flink.core.plugin.PluginManager) ConfigurationParserUtils(org.apache.flink.runtime.util.ConfigurationParserUtils) ClusterEntrypointUtils(org.apache.flink.runtime.entrypoint.ClusterEntrypointUtils) Time(org.apache.flink.api.common.time.Time) TaskExecutorPartitionTrackerImpl(org.apache.flink.runtime.io.network.partition.TaskExecutorPartitionTrackerImpl) FlinkException(org.apache.flink.util.FlinkException) MetricRegistryImpl(org.apache.flink.runtime.metrics.MetricRegistryImpl) CompletableFuture(java.util.concurrent.CompletableFuture) ShutdownHookUtil(org.apache.flink.util.ShutdownHookUtil) ArrayList(java.util.ArrayList) MetricRegistryConfiguration(org.apache.flink.runtime.metrics.MetricRegistryConfiguration) TaskManagerOptions(org.apache.flink.configuration.TaskManagerOptions) RpcService(org.apache.flink.runtime.rpc.RpcService) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) ExternalResourceInfoProvider(org.apache.flink.runtime.externalresource.ExternalResourceInfoProvider) FatalErrorHandler(org.apache.flink.runtime.rpc.FatalErrorHandler) FlinkSecurityManager(org.apache.flink.core.security.FlinkSecurityManager) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) ExecutorService(java.util.concurrent.ExecutorService) TaskManagerExceptionUtils(org.apache.flink.util.TaskManagerExceptionUtils) Logger(org.slf4j.Logger) AutoCloseableAsync(org.apache.flink.util.AutoCloseableAsync) Configuration(org.apache.flink.configuration.Configuration) TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) IOException(java.io.IOException) Reference(org.apache.flink.util.Reference) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) SignalHandler(org.apache.flink.runtime.util.SignalHandler) UndeclaredThrowableException(java.lang.reflect.UndeclaredThrowableException) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) DeterminismEnvelope(org.apache.flink.runtime.entrypoint.DeterminismEnvelope) RpcSystem(org.apache.flink.runtime.rpc.RpcSystem) PluginUtils(org.apache.flink.core.plugin.PluginUtils) WorkingDirectory(org.apache.flink.runtime.entrypoint.WorkingDirectory) JMXServerOptions(org.apache.flink.configuration.JMXServerOptions) BlobCacheService(org.apache.flink.runtime.blob.BlobCacheService) AddressResolution(org.apache.flink.runtime.rpc.AddressResolution) BlobUtils(org.apache.flink.runtime.blob.BlobUtils) FlinkParseException(org.apache.flink.runtime.entrypoint.FlinkParseException) RpcService(org.apache.flink.runtime.rpc.RpcService) MetricRegistryImpl(org.apache.flink.runtime.metrics.MetricRegistryImpl) ExternalResourceInfoProvider(org.apache.flink.runtime.externalresource.ExternalResourceInfoProvider)

Example 4 with HeartbeatServices

use of org.apache.flink.runtime.heartbeat.HeartbeatServices in project flink by apache.

the class JobMasterTest method runHeartbeatTest.

private void runHeartbeatTest(TestingTaskExecutorGatewayBuilder testingTaskExecutorGatewayBuilder, HeartbeatServices heartbeatServices) throws Exception {
    final CompletableFuture<JobID> disconnectedJobManagerFuture = new CompletableFuture<>();
    final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
    final TestingTaskExecutorGateway taskExecutorGateway = testingTaskExecutorGatewayBuilder.setDisconnectJobManagerConsumer((jobId, throwable) -> disconnectedJobManagerFuture.complete(jobId)).createTestingTaskExecutorGateway();
    rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);
    final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withResourceId(jmResourceId).withConfiguration(configuration).withHighAvailabilityServices(haServices).withHeartbeatServices(heartbeatServices).createJobMaster();
    jobMaster.start();
    try {
        final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
        // register task manager will trigger monitor heartbeat target, schedule heartbeat
        // request at interval time
        CompletableFuture<RegistrationResponse> registrationResponse = jobMasterGateway.registerTaskManager(jobGraph.getJobID(), TaskManagerRegistrationInformation.create(taskExecutorGateway.getAddress(), unresolvedTaskManagerLocation, TestingUtils.zeroUUID()), testingTimeout);
        // wait for the completion of the registration
        registrationResponse.get();
        final JobID disconnectedJobManager = disconnectedJobManagerFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);
        assertThat(disconnectedJobManager, equalTo(jobGraph.getJobID()));
    } finally {
        RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
    }
}
Also used : TaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway) DefaultSchedulerFactory(org.apache.flink.runtime.scheduler.DefaultSchedulerFactory) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) SlotPoolService(org.apache.flink.runtime.jobmaster.slotpool.SlotPoolService) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) RestartStrategyOptions(org.apache.flink.configuration.RestartStrategyOptions) PerJobCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.PerJobCheckpointRecoveryFactory) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) PhysicalSlot(org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlot) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) Duration(java.time.Duration) Map(java.util.Map) Matchers.nullValue(org.hamcrest.Matchers.nullValue) CompletedCheckpoint(org.apache.flink.runtime.checkpoint.CompletedCheckpoint) ClassRule(org.junit.ClassRule) SimpleSlotContext(org.apache.flink.runtime.instance.SimpleSlotContext) SlotPoolServiceFactory(org.apache.flink.runtime.jobmaster.slotpool.SlotPoolServiceFactory) AfterClass(org.junit.AfterClass) BlockingQueue(java.util.concurrent.BlockingQueue) JobManagerOptions(org.apache.flink.configuration.JobManagerOptions) Category(org.junit.experimental.categories.Category) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) TimeUtils(org.apache.flink.util.TimeUtils) Matchers.is(org.hamcrest.Matchers.is) Time(org.apache.flink.api.common.time.Time) InputSplitSource(org.apache.flink.core.io.InputSplitSource) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) FlinkException(org.apache.flink.util.FlinkException) ComponentMainThreadExecutor(org.apache.flink.runtime.concurrent.ComponentMainThreadExecutor) AccessExecution(org.apache.flink.runtime.executiongraph.AccessExecution) JobStatus(org.apache.flink.api.common.JobStatus) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) DefaultInputSplitAssigner(org.apache.flink.api.common.io.DefaultInputSplitAssigner) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) PartitionProducerDisposedException(org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException) BiConsumer(java.util.function.BiConsumer) Matchers.hasSize(org.hamcrest.Matchers.hasSize) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) DistributionPattern(org.apache.flink.runtime.jobgraph.DistributionPattern) Nullable(javax.annotation.Nullable) CheckpointProperties(org.apache.flink.runtime.checkpoint.CheckpointProperties) Before(org.junit.Before) InputSplitAssigner(org.apache.flink.core.io.InputSplitAssigner) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) InputSplit(org.apache.flink.core.io.InputSplit) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) CheckpointsCleaner(org.apache.flink.runtime.checkpoint.CheckpointsCleaner) Test(org.junit.Test) IOException(java.io.IOException) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) JobID(org.apache.flink.api.common.JobID) StandaloneCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.StandaloneCheckpointRecoveryFactory) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) ArrayDeque(java.util.ArrayDeque) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) CheckpointRetentionPolicy(org.apache.flink.runtime.checkpoint.CheckpointRetentionPolicy) Deadline(org.apache.flink.api.common.time.Deadline) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) BiFunction(java.util.function.BiFunction) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TimeoutException(java.util.concurrent.TimeoutException) ExceptionUtils(org.apache.flink.util.ExceptionUtils) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) TaskExecutorToJobManagerHeartbeatPayload(org.apache.flink.runtime.taskexecutor.TaskExecutorToJobManagerHeartbeatPayload) AggregateFunction(org.apache.flink.api.common.functions.AggregateFunction) InstantiationUtil(org.apache.flink.util.InstantiationUtil) After(org.junit.After) TestLogger(org.apache.flink.util.TestLogger) TestingSchedulerNGFactory(org.apache.flink.runtime.scheduler.TestingSchedulerNGFactory) Assert.fail(org.junit.Assert.fail) BlobServerOptions(org.apache.flink.configuration.BlobServerOptions) CompletedCheckpointStorageLocation(org.apache.flink.runtime.state.CompletedCheckpointStorageLocation) Collection(java.util.Collection) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) UUID(java.util.UUID) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) Collectors(java.util.stream.Collectors) SlotInfoWithUtilization(org.apache.flink.runtime.jobmaster.slotpool.SlotInfoWithUtilization) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) Objects(java.util.Objects) TestingUtils(org.apache.flink.testutils.TestingUtils) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) ResultPartitionDeploymentDescriptor(org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor) Matchers.equalTo(org.hamcrest.Matchers.equalTo) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Optional(java.util.Optional) Queue(java.util.Queue) Matchers.anyOf(org.hamcrest.Matchers.anyOf) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) IntStream(java.util.stream.IntStream) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) BeforeClass(org.junit.BeforeClass) AccessExecutionVertex(org.apache.flink.runtime.executiongraph.AccessExecutionVertex) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) Function(java.util.function.Function) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) FailoverStrategyFactoryLoader(org.apache.flink.runtime.executiongraph.failover.flip1.FailoverStrategyFactoryLoader) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) TestingJobMasterPartitionTracker(org.apache.flink.runtime.io.network.partition.TestingJobMasterPartitionTracker) FailsWithAdaptiveScheduler(org.apache.flink.testutils.junit.FailsWithAdaptiveScheduler) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) TestingSlotPoolServiceBuilder(org.apache.flink.runtime.jobmaster.slotpool.TestingSlotPoolServiceBuilder) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Nonnull(javax.annotation.Nonnull) StandaloneCompletedCheckpointStore(org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore) ArchivedExecutionGraph(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph) SlotPool(org.apache.flink.runtime.jobmaster.slotpool.SlotPool) Matchers.empty(org.hamcrest.Matchers.empty) JobGraphBuilder(org.apache.flink.runtime.jobgraph.JobGraphBuilder) TestingSchedulerNG(org.apache.flink.runtime.scheduler.TestingSchedulerNG) Configuration(org.apache.flink.configuration.Configuration) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) Matchers(org.hamcrest.Matchers) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) CheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.CheckpointRecoveryFactory) TimeUnit(java.util.concurrent.TimeUnit) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) ClosureCleaner(org.apache.flink.api.java.ClosureCleaner) TaskExecutionState(org.apache.flink.runtime.taskmanager.TaskExecutionState) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) CompletableFuture(java.util.concurrent.CompletableFuture) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) JobID(org.apache.flink.api.common.JobID) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder)

Example 5 with HeartbeatServices

use of org.apache.flink.runtime.heartbeat.HeartbeatServices in project flink by apache.

the class TaskExecutorTest method testMaximumRegistrationDurationAfterConnectionLoss.

@Test
public void testMaximumRegistrationDurationAfterConnectionLoss() throws Exception {
    configuration.set(TaskManagerOptions.REGISTRATION_TIMEOUT, TimeUtils.parseDuration("100 ms"));
    final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1);
    final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).build();
    final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices, new HeartbeatServices(10L, 10L));
    taskExecutor.start();
    final CompletableFuture<ResourceID> registrationFuture = new CompletableFuture<>();
    final OneShotLatch secondRegistration = new OneShotLatch();
    try {
        final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
        testingResourceManagerGateway.setRegisterTaskExecutorFunction(taskExecutorRegistration -> {
            if (registrationFuture.complete(taskExecutorRegistration.getResourceId())) {
                return createRegistrationResponse(testingResourceManagerGateway);
            } else {
                secondRegistration.trigger();
                return CompletableFuture.completedFuture(new Failure(new FlinkException("Only the first registration should succeed.")));
            }
        });
        rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
        resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), UUID.randomUUID());
        final ResourceID registrationResourceId = registrationFuture.get();
        assertThat(registrationResourceId, equalTo(taskManagerServices.getUnresolvedTaskManagerLocation().getResourceID()));
        secondRegistration.await();
        final Throwable error = testingFatalErrorHandler.getErrorFuture().get();
        assertThat(error, is(notNullValue()));
        assertThat(ExceptionUtils.stripExecutionException(error), instanceOf(RegistrationTimeoutException.class));
        testingFatalErrorHandler.clearError();
    } finally {
        RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
    }
}
Also used : HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) Task(org.apache.flink.runtime.taskmanager.Task) RegistrationTimeoutException(org.apache.flink.runtime.taskexecutor.exceptions.RegistrationTimeoutException) FlinkException(org.apache.flink.util.FlinkException) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) Failure(org.apache.flink.runtime.registration.RegistrationResponse.Failure) Test(org.junit.Test)

Aggregations

HeartbeatServices (org.apache.flink.runtime.heartbeat.HeartbeatServices)19 Configuration (org.apache.flink.configuration.Configuration)16 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)14 Test (org.junit.Test)12 UUID (java.util.UUID)11 CompletableFuture (java.util.concurrent.CompletableFuture)11 JobID (org.apache.flink.api.common.JobID)11 FlinkException (org.apache.flink.util.FlinkException)11 IOException (java.io.IOException)10 Collection (java.util.Collection)10 Collections (java.util.Collections)10 ExecutionException (java.util.concurrent.ExecutionException)10 TimeUnit (java.util.concurrent.TimeUnit)10 TimeoutException (java.util.concurrent.TimeoutException)10 Time (org.apache.flink.api.common.time.Time)10 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)10 RpcUtils (org.apache.flink.runtime.rpc.RpcUtils)10 ExceptionUtils (org.apache.flink.util.ExceptionUtils)10 FutureUtils (org.apache.flink.util.concurrent.FutureUtils)10 Before (org.junit.Before)10