Search in sources :

Example 71 with AllocationID

use of org.apache.flink.runtime.clusterframework.types.AllocationID in project flink by apache.

the class TaskExecutorTest method testFreeingInactiveSlotDoesNotFail.

/**
 * Tests that freeing an inactive slot is a legal operation that does not throw an exception.
 */
@Test
public void testFreeingInactiveSlotDoesNotFail() throws Exception {
    final OneShotLatch taskExecutorIsRegistered = new OneShotLatch();
    final CompletableFuture<Tuple3<InstanceID, SlotID, AllocationID>> availableSlotFuture = new CompletableFuture<>();
    final TestingResourceManagerGateway resourceManagerGateway = createRmWithTmRegisterAndNotifySlotHooks(new InstanceID(), taskExecutorIsRegistered, availableSlotFuture);
    rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
    final MultiShotLatch offerSlotsLatch = new MultiShotLatch();
    final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setOfferSlotsFunction((resourceID, slotOffers) -> {
        offerSlotsLatch.trigger();
        return new CompletableFuture<>();
    }).build();
    rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
    final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1);
    final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();
    final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation).setTaskSlotTable(taskSlotTable).setTaskStateManager(localStateStoresManager).build();
    final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices);
    final ThreadSafeTaskSlotTable<Task> threadSafeTaskSlotTable = new ThreadSafeTaskSlotTable<>(taskSlotTable, taskExecutor.getMainThreadExecutableForTesting());
    try {
        taskExecutor.start();
        taskExecutor.waitUntilStarted();
        final TaskExecutorGateway tmGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
        taskExecutorIsRegistered.await();
        jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
        final AllocationID allocationId = new AllocationID();
        requestSlot(tmGateway, jobId, allocationId, buildSlotID(0), ResourceProfile.UNKNOWN, jobMasterGateway.getAddress(), resourceManagerGateway.getFencingToken());
        offerSlotsLatch.await();
        tmGateway.freeSlot(allocationId, new RuntimeException("test exception"), timeout).get();
        assertThat(availableSlotFuture.get().f2, is(allocationId));
        assertThat(threadSafeTaskSlotTable.getAllocationIdsPerJob(jobId), empty());
    } finally {
        RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
    }
}
Also used : Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) TaskSlotUtils.createDefaultTimerService(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils.createDefaultTimerService) MemorySize(org.apache.flink.configuration.MemorySize) NetUtils(org.apache.flink.util.NetUtils) InetAddress(java.net.InetAddress) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) TaskExecutorPartitionTracker(org.apache.flink.runtime.io.network.partition.TaskExecutorPartitionTracker) FunctionUtils(org.apache.flink.util.function.FunctionUtils) Matchers.nullValue(org.hamcrest.Matchers.nullValue) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) Builder(org.apache.flink.runtime.taskexecutor.TaskSubmissionTestEnvironment.Builder) Matchers.notNullValue(org.hamcrest.Matchers.notNullValue) Failure(org.apache.flink.runtime.registration.RegistrationResponse.Failure) NoOpTaskExecutorBlobService(org.apache.flink.runtime.blob.NoOpTaskExecutorBlobService) TestingTaskExecutorPartitionTracker(org.apache.flink.runtime.io.network.partition.TestingTaskExecutorPartitionTracker) NoOpMetricRegistry(org.apache.flink.runtime.metrics.NoOpMetricRegistry) BlockingQueue(java.util.concurrent.BlockingQueue) Matchers.startsWith(org.hamcrest.Matchers.startsWith) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) TaskExecutorStateChangelogStoragesManager(org.apache.flink.runtime.state.TaskExecutorStateChangelogStoragesManager) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) TimeUtils(org.apache.flink.util.TimeUtils) Matchers.contains(org.hamcrest.Matchers.contains) Matchers.is(org.hamcrest.Matchers.is) Matchers.containsString(org.hamcrest.Matchers.containsString) Time(org.apache.flink.api.common.time.Time) TaskExecutorRegistration(org.apache.flink.runtime.resourcemanager.TaskExecutorRegistration) TaskExecutorPartitionTrackerImpl(org.apache.flink.runtime.io.network.partition.TaskExecutorPartitionTrackerImpl) FlinkException(org.apache.flink.util.FlinkException) TaskSlotTableImpl(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTableImpl) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) TaskSubmissionException(org.apache.flink.runtime.taskexecutor.exceptions.TaskSubmissionException) Callable(java.util.concurrent.Callable) DEFAULT_RESOURCE_PROFILE(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils.DEFAULT_RESOURCE_PROFILE) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) TestingTaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TestingTaskSlotTable) ArrayList(java.util.ArrayList) TaskManagerOptions(org.apache.flink.configuration.TaskManagerOptions) TestingClassLoaderLease(org.apache.flink.runtime.execution.librarycache.TestingClassLoaderLease) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) TestName(org.junit.rules.TestName) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) Matchers.hasSize(org.hamcrest.Matchers.hasSize) TriConsumer(org.apache.flink.util.function.TriConsumer) TestFileUtils(org.apache.flink.testutils.TestFileUtils) Before(org.junit.Before) RetryingRegistrationConfiguration(org.apache.flink.runtime.registration.RetryingRegistrationConfiguration) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) SlotNotFoundException(org.apache.flink.runtime.taskexecutor.slot.SlotNotFoundException) TriConsumerWithException(org.apache.flink.util.function.TriConsumerWithException) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) InstanceID(org.apache.flink.runtime.instance.InstanceID) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) Executors(org.apache.flink.util.concurrent.Executors) ThreadSafeTaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.ThreadSafeTaskSlotTable) JobID(org.apache.flink.api.common.JobID) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) Task(org.apache.flink.runtime.taskmanager.Task) Assert.assertNull(org.junit.Assert.assertNull) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) ArrayDeque(java.util.ArrayDeque) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) Assert.assertEquals(org.junit.Assert.assertEquals) CPUResource(org.apache.flink.api.common.resources.CPUResource) MultiShotLatch(org.apache.flink.core.testutils.MultiShotLatch) Deadline(org.apache.flink.api.common.time.Deadline) ClusterPartitionReport(org.apache.flink.runtime.taskexecutor.partition.ClusterPartitionReport) IntStream.range(java.util.stream.IntStream.range) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) ShuffleEnvironment(org.apache.flink.runtime.shuffle.ShuffleEnvironment) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) BlockingNoOpInvokable(org.apache.flink.runtime.testtasks.BlockingNoOpInvokable) TimeoutException(java.util.concurrent.TimeoutException) ExceptionUtils(org.apache.flink.util.ExceptionUtils) Lists(org.apache.flink.shaded.guava30.com.google.common.collect.Lists) Assert.assertThat(org.junit.Assert.assertThat) After(org.junit.After) TestLogger(org.apache.flink.util.TestLogger) Assert.fail(org.junit.Assert.fail) TransientBlobKey(org.apache.flink.runtime.blob.TransientBlobKey) RegistrationTimeoutException(org.apache.flink.runtime.taskexecutor.exceptions.RegistrationTimeoutException) Collection(java.util.Collection) KvStateRegistry(org.apache.flink.runtime.query.KvStateRegistry) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) CompletionException(java.util.concurrent.CompletionException) UUID(java.util.UUID) NettyShuffleEnvironment(org.apache.flink.runtime.io.network.NettyShuffleEnvironment) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) TaskManagerException(org.apache.flink.runtime.taskexecutor.exceptions.TaskManagerException) Collectors(java.util.stream.Collectors) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) ExecutorUtils(org.apache.flink.util.ExecutorUtils) TaskSlotUtils.createTotalResourceProfile(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils.createTotalResourceProfile) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Queue(java.util.Queue) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) AllocatedSlotInfo(org.apache.flink.runtime.jobmaster.AllocatedSlotInfo) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) LeaderRetrievalListener(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalListener) TaskInvokable(org.apache.flink.runtime.jobgraph.tasks.TaskInvokable) TaskExecutorLocalStateStoresManager(org.apache.flink.runtime.state.TaskExecutorLocalStateStoresManager) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) CompletableFuture(java.util.concurrent.CompletableFuture) TaskDeploymentDescriptorBuilder(org.apache.flink.runtime.deployment.TaskDeploymentDescriptorBuilder) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) NettyShuffleEnvironmentOptions(org.apache.flink.configuration.NettyShuffleEnvironmentOptions) TaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable) ExternalResourceInfoProvider(org.apache.flink.runtime.externalresource.ExternalResourceInfoProvider) ConfigConstants(org.apache.flink.configuration.ConfigConstants) ClusterInformation(org.apache.flink.runtime.entrypoint.ClusterInformation) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Nonnull(javax.annotation.Nonnull) TaskSlotUtils(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils) JMTMRegistrationRejection(org.apache.flink.runtime.jobmaster.JMTMRegistrationRejection) Matchers.empty(org.hamcrest.Matchers.empty) NettyShuffleEnvironmentBuilder(org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder) Assert.assertNotNull(org.junit.Assert.assertNotNull) Configuration(org.apache.flink.configuration.Configuration) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) Reference(org.apache.flink.util.Reference) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) AllocatedSlotReport(org.apache.flink.runtime.jobmaster.AllocatedSlotReport) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) Rule(org.junit.Rule) UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) Task(org.apache.flink.runtime.taskmanager.Task) InstanceID(org.apache.flink.runtime.instance.InstanceID) MultiShotLatch(org.apache.flink.core.testutils.MultiShotLatch) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) TaskExecutorLocalStateStoresManager(org.apache.flink.runtime.state.TaskExecutorLocalStateStoresManager) ThreadSafeTaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.ThreadSafeTaskSlotTable) CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) Tuple3(org.apache.flink.api.java.tuple.Tuple3) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) Test(org.junit.Test)

Example 72 with AllocationID

use of org.apache.flink.runtime.clusterframework.types.AllocationID in project flink by apache.

the class TaskExecutorTest method testHeartbeatReporting.

/**
 * Tests that the correct partition/slot report is sent as part of the heartbeat response.
 */
@Test
public void testHeartbeatReporting() throws Exception {
    final String rmAddress = "rm";
    final UUID rmLeaderId = UUID.randomUUID();
    // register the mock resource manager gateway
    final TestingResourceManagerGateway rmGateway = new TestingResourceManagerGateway();
    final CompletableFuture<ResourceID> taskExecutorRegistrationFuture = new CompletableFuture<>();
    final ResourceID rmResourceId = rmGateway.getOwnResourceId();
    final CompletableFuture<RegistrationResponse> registrationResponse = CompletableFuture.completedFuture(new TaskExecutorRegistrationSuccess(new InstanceID(), rmResourceId, new ClusterInformation("localhost", 1234)));
    rmGateway.setRegisterTaskExecutorFunction(taskExecutorRegistration -> {
        taskExecutorRegistrationFuture.complete(taskExecutorRegistration.getResourceId());
        return registrationResponse;
    });
    final CompletableFuture<SlotReport> initialSlotReportFuture = new CompletableFuture<>();
    rmGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
        initialSlotReportFuture.complete(resourceIDInstanceIDSlotReportTuple3.f2);
        return CompletableFuture.completedFuture(Acknowledge.get());
    });
    final CompletableFuture<TaskExecutorHeartbeatPayload> heartbeatPayloadCompletableFuture = new CompletableFuture<>();
    rmGateway.setTaskExecutorHeartbeatFunction((resourceID, heartbeatPayload) -> {
        heartbeatPayloadCompletableFuture.complete(heartbeatPayload);
        return FutureUtils.completedVoidFuture();
    });
    rpc.registerGateway(rmAddress, rmGateway);
    final SlotID slotId = buildSlotID(0);
    final ResourceProfile resourceProfile = ResourceProfile.fromResources(1.0, 1);
    final SlotReport slotReport1 = new SlotReport(new SlotStatus(slotId, resourceProfile));
    final SlotReport slotReport2 = new SlotReport(new SlotStatus(slotId, resourceProfile, new JobID(), new AllocationID()));
    final Queue<SlotReport> reports = new ArrayDeque<>(Arrays.asList(slotReport1, slotReport2));
    final TaskSlotTable<Task> taskSlotTable = TestingTaskSlotTable.<Task>newBuilder().createSlotReportSupplier(reports::poll).closeAsyncReturns(CompletableFuture.completedFuture(null)).build();
    final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();
    final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation).setTaskSlotTable(taskSlotTable).setTaskStateManager(localStateStoresManager).build();
    final TaskExecutorPartitionTracker partitionTracker = createPartitionTrackerWithFixedPartitionReport(taskManagerServices.getShuffleEnvironment());
    final TaskExecutor taskManager = createTaskExecutor(taskManagerServices, HEARTBEAT_SERVICES, partitionTracker);
    try {
        taskManager.start();
        // define a leader and see that a registration happens
        resourceManagerLeaderRetriever.notifyListener(rmAddress, rmLeaderId);
        // register resource manager success will trigger monitoring heartbeat target between tm
        // and rm
        assertThat(taskExecutorRegistrationFuture.get(), equalTo(unresolvedTaskManagerLocation.getResourceID()));
        assertThat(initialSlotReportFuture.get(), equalTo(slotReport1));
        TaskExecutorGateway taskExecutorGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);
        // trigger the heartbeat asynchronously
        taskExecutorGateway.heartbeatFromResourceManager(rmResourceId);
        // wait for heartbeat response
        SlotReport actualSlotReport = heartbeatPayloadCompletableFuture.get().getSlotReport();
        // the new slot report should be reported
        assertEquals(slotReport2, actualSlotReport);
        ClusterPartitionReport actualClusterPartitionReport = heartbeatPayloadCompletableFuture.get().getClusterPartitionReport();
        assertEquals(partitionTracker.createClusterPartitionReport(), actualClusterPartitionReport);
    } finally {
        RpcUtils.terminateRpcEndpoint(taskManager, timeout);
    }
}
Also used : Task(org.apache.flink.runtime.taskmanager.Task) InstanceID(org.apache.flink.runtime.instance.InstanceID) Matchers.containsString(org.hamcrest.Matchers.containsString) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) UUID(java.util.UUID) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) TaskSlotUtils.createTotalResourceProfile(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils.createTotalResourceProfile) TaskExecutorPartitionTracker(org.apache.flink.runtime.io.network.partition.TaskExecutorPartitionTracker) TestingTaskExecutorPartitionTracker(org.apache.flink.runtime.io.network.partition.TestingTaskExecutorPartitionTracker) AllocatedSlotReport(org.apache.flink.runtime.jobmaster.AllocatedSlotReport) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) TaskExecutorLocalStateStoresManager(org.apache.flink.runtime.state.TaskExecutorLocalStateStoresManager) ClusterInformation(org.apache.flink.runtime.entrypoint.ClusterInformation) ArrayDeque(java.util.ArrayDeque) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) ClusterPartitionReport(org.apache.flink.runtime.taskexecutor.partition.ClusterPartitionReport) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 73 with AllocationID

use of org.apache.flink.runtime.clusterframework.types.AllocationID in project flink by apache.

the class TaskExecutorTest method testIgnoringSlotRequestsIfNotRegistered.

/**
 * Tests that we ignore slot requests if the TaskExecutor is not registered at a
 * ResourceManager.
 */
@Test
public void testIgnoringSlotRequestsIfNotRegistered() throws Exception {
    final TaskExecutor taskExecutor = createTaskExecutor(1);
    taskExecutor.start();
    try {
        final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
        final CompletableFuture<RegistrationResponse> registrationFuture = new CompletableFuture<>();
        final CompletableFuture<ResourceID> taskExecutorResourceIdFuture = new CompletableFuture<>();
        testingResourceManagerGateway.setRegisterTaskExecutorFunction(taskExecutorRegistration -> {
            taskExecutorResourceIdFuture.complete(taskExecutorRegistration.getResourceId());
            return registrationFuture;
        });
        rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
        resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());
        final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
        final ResourceID resourceId = taskExecutorResourceIdFuture.get();
        final CompletableFuture<Acknowledge> slotRequestResponse = taskExecutorGateway.requestSlot(new SlotID(resourceId, 0), jobId, new AllocationID(), ResourceProfile.ZERO, "foobar", testingResourceManagerGateway.getFencingToken(), timeout);
        try {
            slotRequestResponse.get();
            fail("We should not be able to request slots before the TaskExecutor is registered at the ResourceManager.");
        } catch (ExecutionException ee) {
            assertThat(ExceptionUtils.stripExecutionException(ee), instanceOf(TaskManagerException.class));
        }
    } finally {
        RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
    }
}
Also used : Acknowledge(org.apache.flink.runtime.messages.Acknowledge) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) ExecutionException(java.util.concurrent.ExecutionException) Test(org.junit.Test)

Example 74 with AllocationID

use of org.apache.flink.runtime.clusterframework.types.AllocationID in project flink by apache.

the class TaskExecutorTest method testDisconnectFromJobMasterWhenNewLeader.

/**
 * Tests that the TaskExecutor disconnects from the JobMaster if a new leader is detected.
 */
@Test
public void testDisconnectFromJobMasterWhenNewLeader() throws Exception {
    final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(TaskSlotUtils.createTaskSlotTable(1)).build();
    final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);
    final CompletableFuture<Integer> offeredSlotsFuture = new CompletableFuture<>();
    final CompletableFuture<ResourceID> disconnectFuture = new CompletableFuture<>();
    final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setOfferSlotsFunction((resourceID, slotOffers) -> {
        offeredSlotsFuture.complete(slotOffers.size());
        return CompletableFuture.completedFuture(slotOffers);
    }).setDisconnectTaskManagerFunction(resourceID -> {
        disconnectFuture.complete(resourceID);
        return CompletableFuture.completedFuture(Acknowledge.get());
    }).build();
    final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
    final CompletableFuture<Void> initialSlotReportFuture = new CompletableFuture<>();
    resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
        initialSlotReportFuture.complete(null);
        return CompletableFuture.completedFuture(Acknowledge.get());
    });
    rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
    rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
    try {
        taskExecutor.start();
        TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
        resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID());
        initialSlotReportFuture.get();
        ResourceID resourceID = taskManagerServices.getUnresolvedTaskManagerLocation().getResourceID();
        requestSlot(taskExecutorGateway, jobId, new AllocationID(), new SlotID(resourceID, 0), ResourceProfile.ZERO, "foobar", resourceManagerGateway.getFencingToken());
        jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), UUID.randomUUID());
        assertThat(offeredSlotsFuture.get(), is(1));
        // notify loss of leadership
        jobManagerLeaderRetriever.notifyListener(null, null);
        assertThat(disconnectFuture.get(), is(resourceID));
    } finally {
        RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
    }
}
Also used : Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) TaskSlotUtils.createDefaultTimerService(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils.createDefaultTimerService) MemorySize(org.apache.flink.configuration.MemorySize) NetUtils(org.apache.flink.util.NetUtils) InetAddress(java.net.InetAddress) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) TaskExecutorPartitionTracker(org.apache.flink.runtime.io.network.partition.TaskExecutorPartitionTracker) FunctionUtils(org.apache.flink.util.function.FunctionUtils) Matchers.nullValue(org.hamcrest.Matchers.nullValue) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) Builder(org.apache.flink.runtime.taskexecutor.TaskSubmissionTestEnvironment.Builder) Matchers.notNullValue(org.hamcrest.Matchers.notNullValue) Failure(org.apache.flink.runtime.registration.RegistrationResponse.Failure) NoOpTaskExecutorBlobService(org.apache.flink.runtime.blob.NoOpTaskExecutorBlobService) TestingTaskExecutorPartitionTracker(org.apache.flink.runtime.io.network.partition.TestingTaskExecutorPartitionTracker) NoOpMetricRegistry(org.apache.flink.runtime.metrics.NoOpMetricRegistry) BlockingQueue(java.util.concurrent.BlockingQueue) Matchers.startsWith(org.hamcrest.Matchers.startsWith) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) TaskExecutorStateChangelogStoragesManager(org.apache.flink.runtime.state.TaskExecutorStateChangelogStoragesManager) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) TimeUtils(org.apache.flink.util.TimeUtils) Matchers.contains(org.hamcrest.Matchers.contains) Matchers.is(org.hamcrest.Matchers.is) Matchers.containsString(org.hamcrest.Matchers.containsString) Time(org.apache.flink.api.common.time.Time) TaskExecutorRegistration(org.apache.flink.runtime.resourcemanager.TaskExecutorRegistration) TaskExecutorPartitionTrackerImpl(org.apache.flink.runtime.io.network.partition.TaskExecutorPartitionTrackerImpl) FlinkException(org.apache.flink.util.FlinkException) TaskSlotTableImpl(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTableImpl) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) TaskSubmissionException(org.apache.flink.runtime.taskexecutor.exceptions.TaskSubmissionException) Callable(java.util.concurrent.Callable) DEFAULT_RESOURCE_PROFILE(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils.DEFAULT_RESOURCE_PROFILE) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) TestingTaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TestingTaskSlotTable) ArrayList(java.util.ArrayList) TaskManagerOptions(org.apache.flink.configuration.TaskManagerOptions) TestingClassLoaderLease(org.apache.flink.runtime.execution.librarycache.TestingClassLoaderLease) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) TestName(org.junit.rules.TestName) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) Matchers.hasSize(org.hamcrest.Matchers.hasSize) TriConsumer(org.apache.flink.util.function.TriConsumer) TestFileUtils(org.apache.flink.testutils.TestFileUtils) Before(org.junit.Before) RetryingRegistrationConfiguration(org.apache.flink.runtime.registration.RetryingRegistrationConfiguration) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) SlotNotFoundException(org.apache.flink.runtime.taskexecutor.slot.SlotNotFoundException) TriConsumerWithException(org.apache.flink.util.function.TriConsumerWithException) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) InstanceID(org.apache.flink.runtime.instance.InstanceID) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) Executors(org.apache.flink.util.concurrent.Executors) ThreadSafeTaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.ThreadSafeTaskSlotTable) JobID(org.apache.flink.api.common.JobID) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) Task(org.apache.flink.runtime.taskmanager.Task) Assert.assertNull(org.junit.Assert.assertNull) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) ArrayDeque(java.util.ArrayDeque) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) Assert.assertEquals(org.junit.Assert.assertEquals) CPUResource(org.apache.flink.api.common.resources.CPUResource) MultiShotLatch(org.apache.flink.core.testutils.MultiShotLatch) Deadline(org.apache.flink.api.common.time.Deadline) ClusterPartitionReport(org.apache.flink.runtime.taskexecutor.partition.ClusterPartitionReport) IntStream.range(java.util.stream.IntStream.range) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) ShuffleEnvironment(org.apache.flink.runtime.shuffle.ShuffleEnvironment) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) BlockingNoOpInvokable(org.apache.flink.runtime.testtasks.BlockingNoOpInvokable) TimeoutException(java.util.concurrent.TimeoutException) ExceptionUtils(org.apache.flink.util.ExceptionUtils) Lists(org.apache.flink.shaded.guava30.com.google.common.collect.Lists) Assert.assertThat(org.junit.Assert.assertThat) After(org.junit.After) TestLogger(org.apache.flink.util.TestLogger) Assert.fail(org.junit.Assert.fail) TransientBlobKey(org.apache.flink.runtime.blob.TransientBlobKey) RegistrationTimeoutException(org.apache.flink.runtime.taskexecutor.exceptions.RegistrationTimeoutException) Collection(java.util.Collection) KvStateRegistry(org.apache.flink.runtime.query.KvStateRegistry) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) CompletionException(java.util.concurrent.CompletionException) UUID(java.util.UUID) NettyShuffleEnvironment(org.apache.flink.runtime.io.network.NettyShuffleEnvironment) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) TaskManagerException(org.apache.flink.runtime.taskexecutor.exceptions.TaskManagerException) Collectors(java.util.stream.Collectors) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) ExecutorUtils(org.apache.flink.util.ExecutorUtils) TaskSlotUtils.createTotalResourceProfile(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils.createTotalResourceProfile) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Queue(java.util.Queue) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) AllocatedSlotInfo(org.apache.flink.runtime.jobmaster.AllocatedSlotInfo) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) LeaderRetrievalListener(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalListener) TaskInvokable(org.apache.flink.runtime.jobgraph.tasks.TaskInvokable) TaskExecutorLocalStateStoresManager(org.apache.flink.runtime.state.TaskExecutorLocalStateStoresManager) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) CompletableFuture(java.util.concurrent.CompletableFuture) TaskDeploymentDescriptorBuilder(org.apache.flink.runtime.deployment.TaskDeploymentDescriptorBuilder) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) NettyShuffleEnvironmentOptions(org.apache.flink.configuration.NettyShuffleEnvironmentOptions) TaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable) ExternalResourceInfoProvider(org.apache.flink.runtime.externalresource.ExternalResourceInfoProvider) ConfigConstants(org.apache.flink.configuration.ConfigConstants) ClusterInformation(org.apache.flink.runtime.entrypoint.ClusterInformation) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Nonnull(javax.annotation.Nonnull) TaskSlotUtils(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils) JMTMRegistrationRejection(org.apache.flink.runtime.jobmaster.JMTMRegistrationRejection) Matchers.empty(org.hamcrest.Matchers.empty) NettyShuffleEnvironmentBuilder(org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder) Assert.assertNotNull(org.junit.Assert.assertNotNull) Configuration(org.apache.flink.configuration.Configuration) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) Reference(org.apache.flink.util.Reference) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) AllocatedSlotReport(org.apache.flink.runtime.jobmaster.AllocatedSlotReport) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) Rule(org.junit.Rule) UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) Test(org.junit.Test)

Example 75 with AllocationID

use of org.apache.flink.runtime.clusterframework.types.AllocationID in project flink by apache.

the class TaskExecutorTest method testSubmitTaskBeforeAcceptSlot.

/**
 * This tests task executor receive SubmitTask before OfferSlot response.
 */
@Test
public void testSubmitTaskBeforeAcceptSlot() throws Exception {
    final InstanceID registrationId = new InstanceID();
    final OneShotLatch taskExecutorIsRegistered = new OneShotLatch();
    final CompletableFuture<Tuple3<InstanceID, SlotID, AllocationID>> availableSlotFuture = new CompletableFuture<>();
    final TestingResourceManagerGateway resourceManagerGateway = createRmWithTmRegisterAndNotifySlotHooks(registrationId, taskExecutorIsRegistered, availableSlotFuture);
    final AllocationID allocationId1 = new AllocationID();
    final AllocationID allocationId2 = new AllocationID();
    final SlotOffer offer1 = new SlotOffer(allocationId1, 0, ResourceProfile.ANY);
    final OneShotLatch offerSlotsLatch = new OneShotLatch();
    final OneShotLatch taskInTerminalState = new OneShotLatch();
    final CompletableFuture<Collection<SlotOffer>> offerResultFuture = new CompletableFuture<>();
    final TestingJobMasterGateway jobMasterGateway = createJobMasterWithSlotOfferAndTaskTerminationHooks(offerSlotsLatch, taskInTerminalState, offerResultFuture);
    rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
    rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
    final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(2);
    final TaskManagerServices taskManagerServices = createTaskManagerServicesWithTaskSlotTable(taskSlotTable);
    final TestingTaskExecutor taskManager = createTestingTaskExecutor(taskManagerServices);
    try {
        taskManager.start();
        taskManager.waitUntilStarted();
        final TaskExecutorGateway tmGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);
        // wait until registered at the RM
        taskExecutorIsRegistered.await();
        // request 2 slots for the given allocation ids
        AllocationID[] allocationIds = new AllocationID[] { allocationId1, allocationId2 };
        for (int i = 0; i < allocationIds.length; i++) {
            requestSlot(tmGateway, jobId, allocationIds[i], buildSlotID(i), ResourceProfile.UNKNOWN, jobMasterGateway.getAddress(), resourceManagerGateway.getFencingToken());
        }
        // notify job leader to start slot offering
        jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
        // wait until slots have been offered
        offerSlotsLatch.await();
        submit(allocationId1, jobMasterGateway, tmGateway, NoOpInvokable.class);
        // acknowledge the offered slots
        offerResultFuture.complete(Collections.singleton(offer1));
        // check that the rejected slot will be made available again
        final Tuple3<InstanceID, SlotID, AllocationID> instanceIDSlotIDAllocationIDTuple3 = availableSlotFuture.get();
        assertThat(instanceIDSlotIDAllocationIDTuple3.f2, equalTo(allocationId2));
        // wait for the task completion
        taskInTerminalState.await();
    } finally {
        RpcUtils.terminateRpcEndpoint(taskManager, timeout);
    }
}
Also used : Task(org.apache.flink.runtime.taskmanager.Task) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) InstanceID(org.apache.flink.runtime.instance.InstanceID) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) Tuple3(org.apache.flink.api.java.tuple.Tuple3) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) Collection(java.util.Collection) Test(org.junit.Test)

Aggregations

AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)194 Test (org.junit.Test)137 JobID (org.apache.flink.api.common.JobID)106 ResourceProfile (org.apache.flink.runtime.clusterframework.types.ResourceProfile)60 CompletableFuture (java.util.concurrent.CompletableFuture)56 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)56 SlotID (org.apache.flink.runtime.clusterframework.types.SlotID)53 ArrayList (java.util.ArrayList)39 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)36 Collection (java.util.Collection)35 Time (org.apache.flink.api.common.time.Time)35 Configuration (org.apache.flink.configuration.Configuration)34 Acknowledge (org.apache.flink.runtime.messages.Acknowledge)34 SlotOffer (org.apache.flink.runtime.taskexecutor.slot.SlotOffer)34 List (java.util.List)32 TestLogger (org.apache.flink.util.TestLogger)32 FlinkException (org.apache.flink.util.FlinkException)31 Matchers.is (org.hamcrest.Matchers.is)31 Assert.assertThat (org.junit.Assert.assertThat)31 Arrays (java.util.Arrays)30