Search in sources :

Example 21 with TestingResourceManagerGateway

use of org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway in project flink by apache.

the class TaskExecutorTest method testOfferSlotToJobMasterAfterTimeout.

/**
 * Tests that offers slots to job master timeout and retry.
 */
@Test
public void testOfferSlotToJobMasterAfterTimeout() throws Exception {
    final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(2);
    final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).build();
    final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);
    final AllocationID allocationId = new AllocationID();
    final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>();
    final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
    testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
        initialSlotReportFuture.complete(null);
        return CompletableFuture.completedFuture(Acknowledge.get());
    });
    rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
    resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());
    final CountDownLatch slotOfferings = new CountDownLatch(3);
    final CompletableFuture<AllocationID> offeredSlotFuture = new CompletableFuture<>();
    final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setOfferSlotsFunction((resourceID, slotOffers) -> {
        assertThat(slotOffers.size(), is(1));
        slotOfferings.countDown();
        if (slotOfferings.getCount() == 0) {
            offeredSlotFuture.complete(slotOffers.iterator().next().getAllocationId());
            return CompletableFuture.completedFuture(slotOffers);
        } else {
            return FutureUtils.completedExceptionally(new TimeoutException());
        }
    }).build();
    final String jobManagerAddress = jobMasterGateway.getAddress();
    rpc.registerGateway(jobManagerAddress, jobMasterGateway);
    jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID());
    try {
        taskExecutor.start();
        final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
        // wait for the connection to the ResourceManager
        initialSlotReportFuture.get();
        requestSlot(taskExecutorGateway, jobId, allocationId, new SlotID(taskExecutor.getResourceID(), 0), ResourceProfile.ZERO, jobManagerAddress, testingResourceManagerGateway.getFencingToken());
        slotOfferings.await();
        assertThat(offeredSlotFuture.get(), is(allocationId));
        assertTrue(taskSlotTable.isSlotFree(1));
    } finally {
        RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
    }
}
Also used : Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) TaskSlotUtils.createDefaultTimerService(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils.createDefaultTimerService) MemorySize(org.apache.flink.configuration.MemorySize) NetUtils(org.apache.flink.util.NetUtils) InetAddress(java.net.InetAddress) IOManagerAsync(org.apache.flink.runtime.io.disk.iomanager.IOManagerAsync) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) TaskExecutorPartitionTracker(org.apache.flink.runtime.io.network.partition.TaskExecutorPartitionTracker) FunctionUtils(org.apache.flink.util.function.FunctionUtils) Matchers.nullValue(org.hamcrest.Matchers.nullValue) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) Builder(org.apache.flink.runtime.taskexecutor.TaskSubmissionTestEnvironment.Builder) Matchers.notNullValue(org.hamcrest.Matchers.notNullValue) Failure(org.apache.flink.runtime.registration.RegistrationResponse.Failure) NoOpTaskExecutorBlobService(org.apache.flink.runtime.blob.NoOpTaskExecutorBlobService) TestingTaskExecutorPartitionTracker(org.apache.flink.runtime.io.network.partition.TestingTaskExecutorPartitionTracker) NoOpMetricRegistry(org.apache.flink.runtime.metrics.NoOpMetricRegistry) BlockingQueue(java.util.concurrent.BlockingQueue) Matchers.startsWith(org.hamcrest.Matchers.startsWith) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) TaskExecutorStateChangelogStoragesManager(org.apache.flink.runtime.state.TaskExecutorStateChangelogStoragesManager) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) TimeUtils(org.apache.flink.util.TimeUtils) Matchers.contains(org.hamcrest.Matchers.contains) Matchers.is(org.hamcrest.Matchers.is) Matchers.containsString(org.hamcrest.Matchers.containsString) Time(org.apache.flink.api.common.time.Time) TaskExecutorRegistration(org.apache.flink.runtime.resourcemanager.TaskExecutorRegistration) TaskExecutorPartitionTrackerImpl(org.apache.flink.runtime.io.network.partition.TaskExecutorPartitionTrackerImpl) FlinkException(org.apache.flink.util.FlinkException) TaskSlotTableImpl(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTableImpl) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) TaskSubmissionException(org.apache.flink.runtime.taskexecutor.exceptions.TaskSubmissionException) Callable(java.util.concurrent.Callable) DEFAULT_RESOURCE_PROFILE(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils.DEFAULT_RESOURCE_PROFILE) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) TestingTaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TestingTaskSlotTable) ArrayList(java.util.ArrayList) TaskManagerOptions(org.apache.flink.configuration.TaskManagerOptions) TestingClassLoaderLease(org.apache.flink.runtime.execution.librarycache.TestingClassLoaderLease) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) TestName(org.junit.rules.TestName) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) Matchers.hasSize(org.hamcrest.Matchers.hasSize) TriConsumer(org.apache.flink.util.function.TriConsumer) TestFileUtils(org.apache.flink.testutils.TestFileUtils) Before(org.junit.Before) RetryingRegistrationConfiguration(org.apache.flink.runtime.registration.RetryingRegistrationConfiguration) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) SlotNotFoundException(org.apache.flink.runtime.taskexecutor.slot.SlotNotFoundException) TriConsumerWithException(org.apache.flink.util.function.TriConsumerWithException) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) InstanceID(org.apache.flink.runtime.instance.InstanceID) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) Executors(org.apache.flink.util.concurrent.Executors) ThreadSafeTaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.ThreadSafeTaskSlotTable) JobID(org.apache.flink.api.common.JobID) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) Task(org.apache.flink.runtime.taskmanager.Task) Assert.assertNull(org.junit.Assert.assertNull) UnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation) ArrayDeque(java.util.ArrayDeque) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) Assert.assertEquals(org.junit.Assert.assertEquals) CPUResource(org.apache.flink.api.common.resources.CPUResource) MultiShotLatch(org.apache.flink.core.testutils.MultiShotLatch) Deadline(org.apache.flink.api.common.time.Deadline) ClusterPartitionReport(org.apache.flink.runtime.taskexecutor.partition.ClusterPartitionReport) IntStream.range(java.util.stream.IntStream.range) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) ShuffleEnvironment(org.apache.flink.runtime.shuffle.ShuffleEnvironment) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) BlockingNoOpInvokable(org.apache.flink.runtime.testtasks.BlockingNoOpInvokable) TimeoutException(java.util.concurrent.TimeoutException) ExceptionUtils(org.apache.flink.util.ExceptionUtils) Lists(org.apache.flink.shaded.guava30.com.google.common.collect.Lists) Assert.assertThat(org.junit.Assert.assertThat) After(org.junit.After) TestLogger(org.apache.flink.util.TestLogger) Assert.fail(org.junit.Assert.fail) TransientBlobKey(org.apache.flink.runtime.blob.TransientBlobKey) RegistrationTimeoutException(org.apache.flink.runtime.taskexecutor.exceptions.RegistrationTimeoutException) Collection(java.util.Collection) KvStateRegistry(org.apache.flink.runtime.query.KvStateRegistry) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) CompletionException(java.util.concurrent.CompletionException) UUID(java.util.UUID) NettyShuffleEnvironment(org.apache.flink.runtime.io.network.NettyShuffleEnvironment) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) TaskManagerException(org.apache.flink.runtime.taskexecutor.exceptions.TaskManagerException) Collectors(java.util.stream.Collectors) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) ExecutorUtils(org.apache.flink.util.ExecutorUtils) TaskSlotUtils.createTotalResourceProfile(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils.createTotalResourceProfile) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Queue(java.util.Queue) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) AllocatedSlotInfo(org.apache.flink.runtime.jobmaster.AllocatedSlotInfo) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) LeaderRetrievalListener(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalListener) TaskInvokable(org.apache.flink.runtime.jobgraph.tasks.TaskInvokable) TaskExecutorLocalStateStoresManager(org.apache.flink.runtime.state.TaskExecutorLocalStateStoresManager) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) CompletableFuture(java.util.concurrent.CompletableFuture) TaskDeploymentDescriptorBuilder(org.apache.flink.runtime.deployment.TaskDeploymentDescriptorBuilder) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) NettyShuffleEnvironmentOptions(org.apache.flink.configuration.NettyShuffleEnvironmentOptions) TaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable) ExternalResourceInfoProvider(org.apache.flink.runtime.externalresource.ExternalResourceInfoProvider) ConfigConstants(org.apache.flink.configuration.ConfigConstants) ClusterInformation(org.apache.flink.runtime.entrypoint.ClusterInformation) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Nonnull(javax.annotation.Nonnull) TaskSlotUtils(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils) JMTMRegistrationRejection(org.apache.flink.runtime.jobmaster.JMTMRegistrationRejection) Matchers.empty(org.hamcrest.Matchers.empty) NettyShuffleEnvironmentBuilder(org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder) Assert.assertNotNull(org.junit.Assert.assertNotNull) Configuration(org.apache.flink.configuration.Configuration) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) Reference(org.apache.flink.util.Reference) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) AllocatedSlotReport(org.apache.flink.runtime.jobmaster.AllocatedSlotReport) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) Rule(org.junit.Rule) UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) Task(org.apache.flink.runtime.taskmanager.Task) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) Matchers.containsString(org.hamcrest.Matchers.containsString) CountDownLatch(java.util.concurrent.CountDownLatch) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) TimeoutException(java.util.concurrent.TimeoutException) RegistrationTimeoutException(org.apache.flink.runtime.taskexecutor.exceptions.RegistrationTimeoutException) Test(org.junit.Test)

Example 22 with TestingResourceManagerGateway

use of org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway in project flink by apache.

the class JobMasterTest method testCloseUnestablishedResourceManagerConnection.

/**
 * Tests that we can close an unestablished ResourceManager connection.
 */
@Test
public void testCloseUnestablishedResourceManagerConnection() throws Exception {
    final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withConfiguration(configuration).withHighAvailabilityServices(haServices).createJobMaster();
    try {
        jobMaster.start();
        final TestingResourceManagerGateway firstResourceManagerGateway = createAndRegisterTestingResourceManagerGateway();
        final TestingResourceManagerGateway secondResourceManagerGateway = createAndRegisterTestingResourceManagerGateway();
        final OneShotLatch firstJobManagerRegistration = new OneShotLatch();
        final OneShotLatch secondJobManagerRegistration = new OneShotLatch();
        firstResourceManagerGateway.setRegisterJobManagerFunction((jobMasterId, resourceID, s, jobID) -> {
            firstJobManagerRegistration.trigger();
            return CompletableFuture.completedFuture(firstResourceManagerGateway.getJobMasterRegistrationSuccess());
        });
        secondResourceManagerGateway.setRegisterJobManagerFunction((jobMasterId, resourceID, s, jobID) -> {
            secondJobManagerRegistration.trigger();
            return CompletableFuture.completedFuture(secondResourceManagerGateway.getJobMasterRegistrationSuccess());
        });
        notifyResourceManagerLeaderListeners(firstResourceManagerGateway);
        // wait until we have seen the first registration attempt
        firstJobManagerRegistration.await();
        // this should stop the connection attempts towards the first RM
        notifyResourceManagerLeaderListeners(secondResourceManagerGateway);
        // check that we start registering at the second RM
        secondJobManagerRegistration.await();
    } finally {
        RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
    }
}
Also used : TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) Test(org.junit.Test)

Example 23 with TestingResourceManagerGateway

use of org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway in project flink by apache.

the class TaskManagerDetailsHandlerTest method setup.

@Before
public void setup() throws HandlerRequestException {
    resourceManagerGateway = new TestingResourceManagerGateway();
    metricFetcher = new TestingMetricFetcher();
    testInstance = new TaskManagerDetailsHandler(() -> CompletableFuture.completedFuture(null), TestingUtils.TIMEOUT, Collections.emptyMap(), TaskManagerDetailsHeaders.getInstance(), () -> CompletableFuture.completedFuture(resourceManagerGateway), metricFetcher);
}
Also used : TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) Before(org.junit.Before)

Example 24 with TestingResourceManagerGateway

use of org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway in project flink by apache.

the class TaskManagerLogListHandlerTest method setUp.

@Before
public void setUp() throws HandlerRequestException {
    resourceManagerGateway = new TestingResourceManagerGateway();
    taskManagerLogListHandler = new TaskManagerLogListHandler(() -> CompletableFuture.completedFuture(null), TestingUtils.TIMEOUT, Collections.emptyMap(), TaskManagerLogsHeaders.getInstance(), () -> CompletableFuture.completedFuture(resourceManagerGateway));
    handlerRequest = createRequest(EXPECTED_TASK_MANAGER_ID);
}
Also used : TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) Before(org.junit.Before)

Example 25 with TestingResourceManagerGateway

use of org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway in project flink by apache.

the class JobMasterTest method testResourceManagerConnectionAfterStart.

/**
 * Tests that the a JM connects to the leading RM after regaining leadership.
 */
@Test
public void testResourceManagerConnectionAfterStart() throws Exception {
    final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withJobMasterId(jobMasterId).withConfiguration(configuration).withHighAvailabilityServices(haServices).withHeartbeatServices(heartbeatServices).createJobMaster();
    try {
        final TestingResourceManagerGateway testingResourceManagerGateway = createAndRegisterTestingResourceManagerGateway();
        final BlockingQueue<JobMasterId> registrationQueue = new ArrayBlockingQueue<>(1);
        testingResourceManagerGateway.setRegisterJobManagerFunction((jobMasterId, resourceID, s, jobID) -> {
            registrationQueue.offer(jobMasterId);
            return CompletableFuture.completedFuture(testingResourceManagerGateway.getJobMasterRegistrationSuccess());
        });
        notifyResourceManagerLeaderListeners(testingResourceManagerGateway);
        jobMaster.start();
        final JobMasterId firstRegistrationAttempt = registrationQueue.take();
        assertThat(firstRegistrationAttempt, equalTo(jobMasterId));
    } finally {
        RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
    }
}
Also used : ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) Test(org.junit.Test)

Aggregations

TestingResourceManagerGateway (org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway)46 Test (org.junit.Test)36 CompletableFuture (java.util.concurrent.CompletableFuture)31 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)26 Task (org.apache.flink.runtime.taskmanager.Task)21 InstanceID (org.apache.flink.runtime.instance.InstanceID)20 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)19 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)19 SlotID (org.apache.flink.runtime.clusterframework.types.SlotID)19 ClusterInformation (org.apache.flink.runtime.entrypoint.ClusterInformation)19 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)18 JobID (org.apache.flink.api.common.JobID)18 TestingJobMasterGateway (org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway)17 FlinkException (org.apache.flink.util.FlinkException)17 CountDownLatch (java.util.concurrent.CountDownLatch)16 TaskExecutorLocalStateStoresManager (org.apache.flink.runtime.state.TaskExecutorLocalStateStoresManager)16 Collection (java.util.Collection)15 ResourceProfile (org.apache.flink.runtime.clusterframework.types.ResourceProfile)15 RegistrationResponse (org.apache.flink.runtime.registration.RegistrationResponse)15 ExecutionException (java.util.concurrent.ExecutionException)14