Search in sources :

Example 26 with TestingJobMasterGateway

use of org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway in project flink by apache.

the class TaskExecutorTest method testReleaseOfJobResourcesIfJobMasterIsNotCorrect.

/**
 * Tests that the TaskExecutor releases all of its job resources if the JobMaster is not running
 * the specified job. See FLINK-21606.
 */
@Test
public void testReleaseOfJobResourcesIfJobMasterIsNotCorrect() throws Exception {
    final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(TaskSlotUtils.createTaskSlotTable(1)).build();
    final TestingTaskExecutorPartitionTracker taskExecutorPartitionTracker = new TestingTaskExecutorPartitionTracker();
    final CompletableFuture<JobID> jobPartitionsReleaseFuture = new CompletableFuture<>();
    // simulate that we have some partitions tracked
    taskExecutorPartitionTracker.setIsTrackingPartitionsForFunction(ignored -> true);
    taskExecutorPartitionTracker.setStopTrackingAndReleaseAllPartitionsConsumer(jobPartitionsReleaseFuture::complete);
    final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices, HEARTBEAT_SERVICES, taskExecutorPartitionTracker);
    final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setRegisterTaskManagerFunction((ignoredJobId, ignoredTaskManagerRegistrationInformation) -> CompletableFuture.completedFuture(new JMTMRegistrationRejection("foobar"))).build();
    rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
    final InstanceID registrationId = new InstanceID();
    final OneShotLatch taskExecutorIsRegistered = new OneShotLatch();
    final CompletableFuture<Tuple3<InstanceID, SlotID, AllocationID>> availableSlotFuture = new CompletableFuture<>();
    final TestingResourceManagerGateway resourceManagerGateway = createRmWithTmRegisterAndNotifySlotHooks(registrationId, taskExecutorIsRegistered, availableSlotFuture);
    rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
    resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID());
    try {
        taskExecutor.start();
        final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
        taskExecutorIsRegistered.await();
        final AllocationID allocationId = new AllocationID();
        final SlotID slotId = new SlotID(taskExecutor.getResourceID(), 0);
        requestSlot(taskExecutorGateway, jobId, allocationId, slotId, ResourceProfile.UNKNOWN, jobMasterGateway.getAddress(), resourceManagerGateway.getFencingToken());
        // The JobManager should reject the registration which should release all job resources
        // on the TaskExecutor
        jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
        // the slot should be freed
        assertThat(availableSlotFuture.get().f1, is(slotId));
        assertThat(availableSlotFuture.get().f2, is(allocationId));
        // all job partitions should be released
        assertThat(jobPartitionsReleaseFuture.get(), is(jobId));
    } finally {
        RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
    }
}
Also used : Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) TaskSlotUtils.createDefaultTimerService(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils.createDefaultTimerService) MemorySize(org.apache.flink.configuration.MemorySize) NetUtils(org.apache.flink.util.NetUtils) InetAddress(java.net.InetAddress) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) TaskExecutorPartitionTracker(org.apache.flink.runtime.io.network.partition.TaskExecutorPartitionTracker) FunctionUtils(org.apache.flink.util.function.FunctionUtils) Matchers.nullValue(org.hamcrest.Matchers.nullValue) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) Builder(org.apache.flink.runtime.taskexecutor.TaskSubmissionTestEnvironment.Builder) Matchers.notNullValue(org.hamcrest.Matchers.notNullValue) Failure(org.apache.flink.runtime.registration.RegistrationResponse.Failure) NoOpTaskExecutorBlobService(org.apache.flink.runtime.blob.NoOpTaskExecutorBlobService) TestingTaskExecutorPartitionTracker(org.apache.flink.runtime.io.network.partition.TestingTaskExecutorPartitionTracker) NoOpMetricRegistry(org.apache.flink.runtime.metrics.NoOpMetricRegistry) BlockingQueue(java.util.concurrent.BlockingQueue) Matchers.startsWith(org.hamcrest.Matchers.startsWith) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) TaskExecutorStateChangelogStoragesManager(org.apache.flink.runtime.state.TaskExecutorStateChangelogStoragesManager) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) TimeUtils(org.apache.flink.util.TimeUtils) Matchers.contains(org.hamcrest.Matchers.contains) Matchers.is(org.hamcrest.Matchers.is) Matchers.containsString(org.hamcrest.Matchers.containsString) Time(org.apache.flink.api.common.time.Time) TaskExecutorRegistration(org.apache.flink.runtime.resourcemanager.TaskExecutorRegistration) TaskExecutorPartitionTrackerImpl(org.apache.flink.runtime.io.network.partition.TaskExecutorPartitionTrackerImpl) FlinkException(org.apache.flink.util.FlinkException) TaskSlotTableImpl(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTableImpl) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) TaskSubmissionException(org.apache.flink.runtime.taskexecutor.exceptions.TaskSubmissionException) Callable(java.util.concurrent.Callable) DEFAULT_RESOURCE_PROFILE(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils.DEFAULT_RESOURCE_PROFILE) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) TestingTaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TestingTaskSlotTable) ArrayList(java.util.ArrayList) TaskManagerOptions(org.apache.flink.configuration.TaskManagerOptions) TestingClassLoaderLease(org.apache.flink.runtime.execution.librarycache.TestingClassLoaderLease) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) TestName(org.junit.rules.TestName) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) Matchers.hasSize(org.hamcrest.Matchers.hasSize) TriConsumer(org.apache.flink.util.function.TriConsumer) TestFileUtils(org.apache.flink.testutils.TestFileUtils) Before(org.junit.Before) RetryingRegistrationConfiguration(org.apache.flink.runtime.registration.RetryingRegistrationConfiguration) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) SlotNotFoundException(org.apache.flink.runtime.taskexecutor.slot.SlotNotFoundException) TriConsumerWithException(org.apache.flink.util.function.TriConsumerWithException) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) InstanceID(org.apache.flink.runtime.instance.InstanceID) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) Executors(org.apache.flink.util.concurrent.Executors) ThreadSafeTaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.ThreadSafeTaskSlotTable) JobID(org.apache.flink.api.common.JobID) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) Task(org.apache.flink.runtime.taskmanager.Task) Assert.assertNull(org.junit.Assert.assertNull) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) ArrayDeque(java.util.ArrayDeque) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) Assert.assertEquals(org.junit.Assert.assertEquals) CPUResource(org.apache.flink.api.common.resources.CPUResource) MultiShotLatch(org.apache.flink.core.testutils.MultiShotLatch) Deadline(org.apache.flink.api.common.time.Deadline) ClusterPartitionReport(org.apache.flink.runtime.taskexecutor.partition.ClusterPartitionReport) IntStream.range(java.util.stream.IntStream.range) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) ShuffleEnvironment(org.apache.flink.runtime.shuffle.ShuffleEnvironment) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) BlockingNoOpInvokable(org.apache.flink.runtime.testtasks.BlockingNoOpInvokable) TimeoutException(java.util.concurrent.TimeoutException) ExceptionUtils(org.apache.flink.util.ExceptionUtils) Lists(org.apache.flink.shaded.guava30.com.google.common.collect.Lists) Assert.assertThat(org.junit.Assert.assertThat) After(org.junit.After) TestLogger(org.apache.flink.util.TestLogger) Assert.fail(org.junit.Assert.fail) TransientBlobKey(org.apache.flink.runtime.blob.TransientBlobKey) RegistrationTimeoutException(org.apache.flink.runtime.taskexecutor.exceptions.RegistrationTimeoutException) Collection(java.util.Collection) KvStateRegistry(org.apache.flink.runtime.query.KvStateRegistry) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) CompletionException(java.util.concurrent.CompletionException) UUID(java.util.UUID) NettyShuffleEnvironment(org.apache.flink.runtime.io.network.NettyShuffleEnvironment) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) TaskManagerException(org.apache.flink.runtime.taskexecutor.exceptions.TaskManagerException) Collectors(java.util.stream.Collectors) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) ExecutorUtils(org.apache.flink.util.ExecutorUtils) TaskSlotUtils.createTotalResourceProfile(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils.createTotalResourceProfile) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Queue(java.util.Queue) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) AllocatedSlotInfo(org.apache.flink.runtime.jobmaster.AllocatedSlotInfo) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) LeaderRetrievalListener(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalListener) TaskInvokable(org.apache.flink.runtime.jobgraph.tasks.TaskInvokable) TaskExecutorLocalStateStoresManager(org.apache.flink.runtime.state.TaskExecutorLocalStateStoresManager) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) CompletableFuture(java.util.concurrent.CompletableFuture) TaskDeploymentDescriptorBuilder(org.apache.flink.runtime.deployment.TaskDeploymentDescriptorBuilder) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) NettyShuffleEnvironmentOptions(org.apache.flink.configuration.NettyShuffleEnvironmentOptions) TaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable) ExternalResourceInfoProvider(org.apache.flink.runtime.externalresource.ExternalResourceInfoProvider) ConfigConstants(org.apache.flink.configuration.ConfigConstants) ClusterInformation(org.apache.flink.runtime.entrypoint.ClusterInformation) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Nonnull(javax.annotation.Nonnull) TaskSlotUtils(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils) JMTMRegistrationRejection(org.apache.flink.runtime.jobmaster.JMTMRegistrationRejection) Matchers.empty(org.hamcrest.Matchers.empty) NettyShuffleEnvironmentBuilder(org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder) Assert.assertNotNull(org.junit.Assert.assertNotNull) Configuration(org.apache.flink.configuration.Configuration) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) Reference(org.apache.flink.util.Reference) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) AllocatedSlotReport(org.apache.flink.runtime.jobmaster.AllocatedSlotReport) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) Rule(org.junit.Rule) UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) InstanceID(org.apache.flink.runtime.instance.InstanceID) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) JMTMRegistrationRejection(org.apache.flink.runtime.jobmaster.JMTMRegistrationRejection) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) TestingTaskExecutorPartitionTracker(org.apache.flink.runtime.io.network.partition.TestingTaskExecutorPartitionTracker) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) Tuple3(org.apache.flink.api.java.tuple.Tuple3) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 27 with TestingJobMasterGateway

use of org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway in project flink by apache.

the class TaskExecutorSubmissionTest method testCancellingDependentAndStateUpdateFails.

/**
 * This tests creates two tasks. The sender sends data but fails to send the state update back
 * to the job manager. the second one blocks to be canceled
 */
@Test
public void testCancellingDependentAndStateUpdateFails() throws Exception {
    ResourceID producerLocation = ResourceID.generate();
    NettyShuffleDescriptor sdd = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation);
    TaskDeploymentDescriptor tdd1 = createSender(sdd);
    TaskDeploymentDescriptor tdd2 = createReceiver(sdd);
    ExecutionAttemptID eid1 = tdd1.getExecutionAttemptId();
    ExecutionAttemptID eid2 = tdd2.getExecutionAttemptId();
    final CompletableFuture<Void> task1RunningFuture = new CompletableFuture<>();
    final CompletableFuture<Void> task2RunningFuture = new CompletableFuture<>();
    final CompletableFuture<Void> task1FailedFuture = new CompletableFuture<>();
    final CompletableFuture<Void> task2CanceledFuture = new CompletableFuture<>();
    final JobMasterId jobMasterId = JobMasterId.generate();
    TestingJobMasterGateway testingJobMasterGateway = new TestingJobMasterGatewayBuilder().setFencingTokenSupplier(() -> jobMasterId).setUpdateTaskExecutionStateFunction(taskExecutionState -> {
        if (taskExecutionState != null && taskExecutionState.getID().equals(eid1) && taskExecutionState.getExecutionState() == ExecutionState.RUNNING) {
            return FutureUtils.completedExceptionally(new ExecutionGraphException("The execution attempt " + eid2 + " was not found."));
        } else {
            return CompletableFuture.completedFuture(Acknowledge.get());
        }
    }).build();
    try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId).setResourceID(producerLocation).setSlotSize(2).addTaskManagerActionListener(eid1, ExecutionState.RUNNING, task1RunningFuture).addTaskManagerActionListener(eid2, ExecutionState.RUNNING, task2RunningFuture).addTaskManagerActionListener(eid1, ExecutionState.FAILED, task1FailedFuture).addTaskManagerActionListener(eid2, ExecutionState.CANCELED, task2CanceledFuture).setJobMasterId(jobMasterId).setJobMasterGateway(testingJobMasterGateway).useRealNonMockShuffleEnvironment().build()) {
        TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
        TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();
        taskSlotTable.allocateSlot(0, jobId, tdd1.getAllocationId(), Time.seconds(60));
        tmGateway.submitTask(tdd1, jobMasterId, timeout).get();
        task1RunningFuture.get();
        taskSlotTable.allocateSlot(1, jobId, tdd2.getAllocationId(), Time.seconds(60));
        tmGateway.submitTask(tdd2, jobMasterId, timeout).get();
        task2RunningFuture.get();
        task1FailedFuture.get();
        assertSame(taskSlotTable.getTask(eid1).getExecutionState(), ExecutionState.FAILED);
        tmGateway.cancelTask(eid2, timeout);
        task2CanceledFuture.get();
        assertSame(taskSlotTable.getTask(eid2).getExecutionState(), ExecutionState.CANCELED);
    }
}
Also used : CoreMatchers.is(org.hamcrest.CoreMatchers.is) NettyShuffleDescriptorBuilder(org.apache.flink.runtime.util.NettyShuffleDescriptorBuilder) ShuffleEnvironment(org.apache.flink.runtime.shuffle.ShuffleEnvironment) URL(java.net.URL) BlockingNoOpInvokable(org.apache.flink.runtime.testtasks.BlockingNoOpInvokable) PartitionDescriptorBuilder(org.apache.flink.runtime.shuffle.PartitionDescriptorBuilder) ExceptionUtils(org.apache.flink.util.ExceptionUtils) MemorySize(org.apache.flink.configuration.MemorySize) NetUtils(org.apache.flink.util.NetUtils) Assert.assertThat(org.junit.Assert.assertThat) Mockito.doThrow(org.mockito.Mockito.doThrow) NettyShuffleDescriptor(org.apache.flink.runtime.shuffle.NettyShuffleDescriptor) JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) TestLogger(org.apache.flink.util.TestLogger) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) NettyShuffleDescriptorBuilder.createRemoteWithIdAndLocation(org.apache.flink.runtime.util.NettyShuffleDescriptorBuilder.createRemoteWithIdAndLocation) Collection(java.util.Collection) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) PartitionNotFoundException(org.apache.flink.runtime.io.network.partition.PartitionNotFoundException) Preconditions(org.apache.flink.util.Preconditions) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) List(java.util.List) SerializedValue(org.apache.flink.util.SerializedValue) ResultPartitionDeploymentDescriptor(org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) ExecutionGraphException(org.apache.flink.runtime.executiongraph.ExecutionGraphException) Time(org.apache.flink.api.common.time.Time) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) InputGateDeploymentDescriptor(org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor) Environment(org.apache.flink.runtime.execution.Environment) Mockito.mock(org.mockito.Mockito.mock) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) ShuffleDescriptor(org.apache.flink.runtime.shuffle.ShuffleDescriptor) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) Assert.assertSame(org.junit.Assert.assertSame) NettyShuffleEnvironmentOptions(org.apache.flink.configuration.NettyShuffleEnvironmentOptions) TaskManagerOptions(org.apache.flink.configuration.TaskManagerOptions) TaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) TestName(org.junit.rules.TestName) PermanentBlobKey(org.apache.flink.runtime.blob.PermanentBlobKey) TestingAbstractInvokables(org.apache.flink.runtime.jobmaster.TestingAbstractInvokables) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) PartitionInfo(org.apache.flink.runtime.executiongraph.PartitionInfo) Configuration(org.apache.flink.configuration.Configuration) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) Mockito(org.mockito.Mockito) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobID(org.apache.flink.api.common.JobID) Task(org.apache.flink.runtime.taskmanager.Task) Rule(org.junit.Rule) PartitionDescriptor(org.apache.flink.runtime.shuffle.PartitionDescriptor) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) Collections(java.util.Collections) NettyShuffleDescriptor(org.apache.flink.runtime.shuffle.NettyShuffleDescriptor) Task(org.apache.flink.runtime.taskmanager.Task) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ExecutionGraphException(org.apache.flink.runtime.executiongraph.ExecutionGraphException) NettyShuffleDescriptorBuilder(org.apache.flink.runtime.util.NettyShuffleDescriptorBuilder) PartitionDescriptorBuilder(org.apache.flink.runtime.shuffle.PartitionDescriptorBuilder) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Example 28 with TestingJobMasterGateway

use of org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway in project flink by apache.

the class DefaultJobLeaderServiceTest method doesNotReconnectAfterTargetLostLeadership.

/**
 * Tests that the JobLeaderService won't try to reconnect to JobMaster after it has lost the
 * leadership. See FLINK-16836.
 */
@Test
public void doesNotReconnectAfterTargetLostLeadership() throws Exception {
    final JobID jobId = new JobID();
    final SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService();
    final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder().setJobMasterLeaderRetrieverFunction(ignored -> leaderRetrievalService).build();
    final TestingJobMasterGateway jobMasterGateway = registerJobMaster();
    final OneShotLatch jobManagerGainedLeadership = new OneShotLatch();
    final TestingJobLeaderListener testingJobLeaderListener = new TestingJobLeaderListener(ignored -> jobManagerGainedLeadership.trigger());
    final JobLeaderService jobLeaderService = createAndStartJobLeaderService(haServices, testingJobLeaderListener);
    try {
        jobLeaderService.addJob(jobId, jobMasterGateway.getAddress());
        leaderRetrievalService.notifyListener(jobMasterGateway.getAddress(), UUID.randomUUID());
        jobManagerGainedLeadership.await();
        // revoke the leadership
        leaderRetrievalService.notifyListener(null, null);
        testingJobLeaderListener.waitUntilJobManagerLostLeadership();
        jobLeaderService.reconnect(jobId);
    } finally {
        jobLeaderService.stop();
    }
}
Also used : CoreMatchers.is(org.hamcrest.CoreMatchers.is) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) FlinkException(org.apache.flink.util.FlinkException) TimeoutException(java.util.concurrent.TimeoutException) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) Assert.assertThat(org.junit.Assert.assertThat) CheckedThread(org.apache.flink.core.testutils.CheckedThread) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestLogger(org.apache.flink.util.TestLogger) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) Assert.fail(org.junit.Assert.fail) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) RetryingRegistrationConfiguration(org.apache.flink.runtime.registration.RetryingRegistrationConfiguration) JMTMRegistrationRejection(org.apache.flink.runtime.jobmaster.JMTMRegistrationRejection) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) Test(org.junit.Test) BlockingQueue(java.util.concurrent.BlockingQueue) UUID(java.util.UUID) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) JobID(org.apache.flink.api.common.JobID) Rule(org.junit.Rule) TestingRpcServiceResource(org.apache.flink.runtime.rpc.TestingRpcServiceResource) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) JobID(org.apache.flink.api.common.JobID) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) Test(org.junit.Test)

Example 29 with TestingJobMasterGateway

use of org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway in project flink by apache.

the class DefaultJobLeaderServiceTest method canReconnectToOldLeaderWithSameLeaderAddress.

/**
 * Tests that the JobLeaderService can reconnect to an old leader which seemed to have lost the
 * leadership in between. See FLINK-14316.
 */
@Test
public void canReconnectToOldLeaderWithSameLeaderAddress() throws Exception {
    final JobID jobId = new JobID();
    final SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService();
    final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder().setJobMasterLeaderRetrieverFunction(ignored -> leaderRetrievalService).build();
    final TestingJobMasterGateway jobMasterGateway = registerJobMaster();
    final BlockingQueue<JobID> leadershipQueue = new ArrayBlockingQueue<>(1);
    final TestingJobLeaderListener testingJobLeaderListener = new TestingJobLeaderListener(leadershipQueue::offer);
    final JobLeaderService jobLeaderService = createAndStartJobLeaderService(haServices, testingJobLeaderListener);
    try {
        jobLeaderService.addJob(jobId, jobMasterGateway.getAddress());
        final UUID leaderSessionId = UUID.randomUUID();
        leaderRetrievalService.notifyListener(jobMasterGateway.getAddress(), leaderSessionId);
        // wait for the first leadership
        assertThat(leadershipQueue.take(), is(jobId));
        // revoke the leadership
        leaderRetrievalService.notifyListener(null, null);
        testingJobLeaderListener.waitUntilJobManagerLostLeadership();
        leaderRetrievalService.notifyListener(jobMasterGateway.getAddress(), leaderSessionId);
        // check that we obtain the leadership a second time
        assertThat(leadershipQueue.take(), is(jobId));
    } finally {
        jobLeaderService.stop();
    }
}
Also used : CoreMatchers.is(org.hamcrest.CoreMatchers.is) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) FlinkException(org.apache.flink.util.FlinkException) TimeoutException(java.util.concurrent.TimeoutException) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) Assert.assertThat(org.junit.Assert.assertThat) CheckedThread(org.apache.flink.core.testutils.CheckedThread) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestLogger(org.apache.flink.util.TestLogger) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) Assert.fail(org.junit.Assert.fail) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) RetryingRegistrationConfiguration(org.apache.flink.runtime.registration.RetryingRegistrationConfiguration) JMTMRegistrationRejection(org.apache.flink.runtime.jobmaster.JMTMRegistrationRejection) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) Test(org.junit.Test) BlockingQueue(java.util.concurrent.BlockingQueue) UUID(java.util.UUID) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) JobID(org.apache.flink.api.common.JobID) Rule(org.junit.Rule) TestingRpcServiceResource(org.apache.flink.runtime.rpc.TestingRpcServiceResource) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) UUID(java.util.UUID) JobID(org.apache.flink.api.common.JobID) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) Test(org.junit.Test)

Example 30 with TestingJobMasterGateway

use of org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway in project flink by apache.

the class DefaultJobLeaderServiceTest method rejectedJobManagerRegistrationCallsJobLeaderListener.

@Test
public void rejectedJobManagerRegistrationCallsJobLeaderListener() throws Exception {
    final SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService();
    final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder().setJobMasterLeaderRetrieverFunction(ignored -> leaderRetrievalService).build();
    final JobID jobId = new JobID();
    final CompletableFuture<JobID> rejectedRegistrationFuture = new CompletableFuture<>();
    final TestingJobLeaderListener testingJobLeaderListener = new TestingJobLeaderListener(ignored -> {
    }, rejectedRegistrationFuture::complete);
    final JobLeaderService jobLeaderService = createAndStartJobLeaderService(haServices, testingJobLeaderListener);
    final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setRegisterTaskManagerFunction((jobID, taskManagerRegistrationInformation) -> CompletableFuture.completedFuture(new JMTMRegistrationRejection("foobar"))).build();
    rpcServiceResource.getTestingRpcService().registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
    try {
        jobLeaderService.addJob(jobId, "foobar");
        leaderRetrievalService.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
        assertThat(rejectedRegistrationFuture.get(), is(jobId));
    } finally {
        jobLeaderService.stop();
    }
}
Also used : CoreMatchers.is(org.hamcrest.CoreMatchers.is) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) FlinkException(org.apache.flink.util.FlinkException) TimeoutException(java.util.concurrent.TimeoutException) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) Assert.assertThat(org.junit.Assert.assertThat) CheckedThread(org.apache.flink.core.testutils.CheckedThread) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestLogger(org.apache.flink.util.TestLogger) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) Assert.fail(org.junit.Assert.fail) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) RetryingRegistrationConfiguration(org.apache.flink.runtime.registration.RetryingRegistrationConfiguration) JMTMRegistrationRejection(org.apache.flink.runtime.jobmaster.JMTMRegistrationRejection) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) Test(org.junit.Test) BlockingQueue(java.util.concurrent.BlockingQueue) UUID(java.util.UUID) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) JobID(org.apache.flink.api.common.JobID) Rule(org.junit.Rule) TestingRpcServiceResource(org.apache.flink.runtime.rpc.TestingRpcServiceResource) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) JMTMRegistrationRejection(org.apache.flink.runtime.jobmaster.JMTMRegistrationRejection) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

TestingJobMasterGateway (org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway)31 CompletableFuture (java.util.concurrent.CompletableFuture)30 TestingJobMasterGatewayBuilder (org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder)28 Test (org.junit.Test)28 JobID (org.apache.flink.api.common.JobID)26 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)23 SettableLeaderRetrievalService (org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService)23 TestLogger (org.apache.flink.util.TestLogger)23 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)22 TestingHighAvailabilityServices (org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices)21 Rule (org.junit.Rule)21 Assert.assertThat (org.junit.Assert.assertThat)20 UUID (java.util.UUID)19 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)19 Time (org.apache.flink.api.common.time.Time)19 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)19 Collection (java.util.Collection)18 Collections (java.util.Collections)18 BlockingQueue (java.util.concurrent.BlockingQueue)18 TimeUnit (java.util.concurrent.TimeUnit)18