Search in sources :

Example 1 with TestingResourceManagerGateway

use of org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway in project flink by apache.

the class JobMasterTest method testHeartbeatTimeoutWithResourceManager.

@Test
public void testHeartbeatTimeoutWithResourceManager() throws Exception {
    final String resourceManagerAddress = "rm";
    final ResourceManagerId resourceManagerId = ResourceManagerId.generate();
    final ResourceID rmResourceId = new ResourceID(resourceManagerAddress);
    final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(resourceManagerId, rmResourceId, resourceManagerAddress, "localhost");
    final CompletableFuture<Tuple3<JobMasterId, ResourceID, JobID>> jobManagerRegistrationFuture = new CompletableFuture<>();
    final CompletableFuture<JobID> disconnectedJobManagerFuture = new CompletableFuture<>();
    final CountDownLatch registrationAttempts = new CountDownLatch(2);
    resourceManagerGateway.setRegisterJobManagerFunction((jobMasterId, resourceID, s, jobID) -> {
        jobManagerRegistrationFuture.complete(Tuple3.of(jobMasterId, resourceID, jobID));
        registrationAttempts.countDown();
        return CompletableFuture.completedFuture(resourceManagerGateway.getJobMasterRegistrationSuccess());
    });
    resourceManagerGateway.setDisconnectJobManagerConsumer(tuple -> disconnectedJobManagerFuture.complete(tuple.f0));
    rpcService.registerGateway(resourceManagerAddress, resourceManagerGateway);
    final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withJobMasterId(jobMasterId).withResourceId(jmResourceId).withConfiguration(configuration).withHighAvailabilityServices(haServices).withHeartbeatServices(fastHeartbeatServices).createJobMaster();
    jobMaster.start();
    try {
        // define a leader and see that a registration happens
        rmLeaderRetrievalService.notifyListener(resourceManagerAddress, resourceManagerId.toUUID());
        // register job manager success will trigger monitor heartbeat target between jm and rm
        final Tuple3<JobMasterId, ResourceID, JobID> registrationInformation = jobManagerRegistrationFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);
        assertThat(registrationInformation.f0, Matchers.equalTo(jobMasterId));
        assertThat(registrationInformation.f1, Matchers.equalTo(jmResourceId));
        assertThat(registrationInformation.f2, Matchers.equalTo(jobGraph.getJobID()));
        final JobID disconnectedJobManager = disconnectedJobManagerFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);
        // heartbeat timeout should trigger disconnect JobManager from ResourceManager
        assertThat(disconnectedJobManager, Matchers.equalTo(jobGraph.getJobID()));
        // the JobMaster should try to reconnect to the RM
        registrationAttempts.await();
    } finally {
        RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
    }
}
Also used : CountDownLatch(java.util.concurrent.CountDownLatch) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Tuple3(org.apache.flink.api.java.tuple.Tuple3) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 2 with TestingResourceManagerGateway

use of org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway in project flink by apache.

the class JobMasterTest method testReconnectionAfterDisconnect.

/**
 * Tests that we continue reconnecting to the latest known RM after a disconnection message.
 */
@Test
public void testReconnectionAfterDisconnect() throws Exception {
    final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withJobMasterId(jobMasterId).withConfiguration(configuration).withHighAvailabilityServices(haServices).withHeartbeatServices(heartbeatServices).createJobMaster();
    jobMaster.start();
    final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
    try {
        final TestingResourceManagerGateway testingResourceManagerGateway = createAndRegisterTestingResourceManagerGateway();
        final BlockingQueue<JobMasterId> registrationsQueue = new ArrayBlockingQueue<>(1);
        testingResourceManagerGateway.setRegisterJobManagerFunction((jobMasterId, resourceID, s, jobID) -> {
            registrationsQueue.offer(jobMasterId);
            return CompletableFuture.completedFuture(testingResourceManagerGateway.getJobMasterRegistrationSuccess());
        });
        final ResourceManagerId resourceManagerId = testingResourceManagerGateway.getFencingToken();
        notifyResourceManagerLeaderListeners(testingResourceManagerGateway);
        // wait for first registration attempt
        final JobMasterId firstRegistrationAttempt = registrationsQueue.take();
        assertThat(firstRegistrationAttempt, equalTo(jobMasterId));
        assertThat(registrationsQueue.isEmpty(), is(true));
        jobMasterGateway.disconnectResourceManager(resourceManagerId, new FlinkException("Test exception"));
        // wait for the second registration attempt after the disconnect call
        assertThat(registrationsQueue.take(), equalTo(jobMasterId));
    } finally {
        RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
    }
}
Also used : ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) ResourceManagerId(org.apache.flink.runtime.resourcemanager.ResourceManagerId) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) JobMasterBuilder(org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder) FlinkException(org.apache.flink.util.FlinkException) Test(org.junit.Test)

Example 3 with TestingResourceManagerGateway

use of org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway in project flink by apache.

the class DeclarativeSlotPoolServiceTest method testConnectToResourceManagerDeclaresRequiredResources.

@Test
public void testConnectToResourceManagerDeclaresRequiredResources() throws Exception {
    final Collection<ResourceRequirement> requiredResources = Arrays.asList(ResourceRequirement.create(ResourceProfile.UNKNOWN, 2), ResourceRequirement.create(ResourceProfile.ZERO, 4));
    try (DeclarativeSlotPoolService declarativeSlotPoolService = createDeclarativeSlotPoolService(new TestingDeclarativeSlotPoolFactory(new TestingDeclarativeSlotPoolBuilder().setGetResourceRequirementsSupplier(() -> requiredResources)))) {
        final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
        final CompletableFuture<ResourceRequirements> declaredResourceRequirements = new CompletableFuture<>();
        resourceManagerGateway.setDeclareRequiredResourcesFunction((jobMasterId, resourceRequirements) -> {
            declaredResourceRequirements.complete(resourceRequirements);
            return CompletableFuture.completedFuture(Acknowledge.get());
        });
        declarativeSlotPoolService.connectToResourceManager(resourceManagerGateway);
        final ResourceRequirements resourceRequirements = declaredResourceRequirements.join();
        assertThat(resourceRequirements.getResourceRequirements(), is(requiredResources));
        assertThat(resourceRequirements.getJobId(), is(jobId));
        assertThat(resourceRequirements.getTargetAddress(), is(address));
    }
}
Also used : CompletableFuture(java.util.concurrent.CompletableFuture) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) ResourceRequirement(org.apache.flink.runtime.slots.ResourceRequirement) ResourceRequirements(org.apache.flink.runtime.slots.ResourceRequirements) Test(org.junit.Test)

Example 4 with TestingResourceManagerGateway

use of org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway in project flink by apache.

the class SlotPoolBatchSlotRequestTest method testPendingBatchSlotRequestFailsIfAllocationFailsUnfulfillably.

/**
 * Tests that a batch slot request does react to {@link
 * SlotPoolService#notifyNotEnoughResourcesAvailable}.
 */
@Test
public void testPendingBatchSlotRequestFailsIfAllocationFailsUnfulfillably() throws Exception {
    final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
    try (final DeclarativeSlotPoolBridge slotPool = new DeclarativeSlotPoolBridgeBuilder().setResourceManagerGateway(testingResourceManagerGateway).buildAndStart(mainThreadExecutor)) {
        final CompletableFuture<PhysicalSlot> slotFuture = SlotPoolUtils.requestNewAllocatedBatchSlot(slotPool, mainThreadExecutor, resourceProfile);
        SlotPoolUtils.notifyNotEnoughResourcesAvailable(slotPool, mainThreadExecutor, Collections.emptyList());
        assertThat(slotFuture, FlinkMatchers.futureWillCompleteExceptionally(Duration.ofSeconds(10L)));
    }
}
Also used : TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) Test(org.junit.Test)

Example 5 with TestingResourceManagerGateway

use of org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway in project flink by apache.

the class TaskExecutorToResourceManagerConnectionTest method setUp.

@Before
public void setUp() {
    rpcService = new TestingRpcService();
    testingResourceManagerGateway = new TestingResourceManagerGateway();
    rpcService.registerGateway(RESOURCE_MANAGER_ADDRESS, testingResourceManagerGateway);
    registrationSuccessFuture = new CompletableFuture<>();
    registrationRejectionFuture = new CompletableFuture<>();
}
Also used : TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) Before(org.junit.Before)

Aggregations

TestingResourceManagerGateway (org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway)46 Test (org.junit.Test)36 CompletableFuture (java.util.concurrent.CompletableFuture)31 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)26 Task (org.apache.flink.runtime.taskmanager.Task)21 InstanceID (org.apache.flink.runtime.instance.InstanceID)20 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)19 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)19 SlotID (org.apache.flink.runtime.clusterframework.types.SlotID)19 ClusterInformation (org.apache.flink.runtime.entrypoint.ClusterInformation)19 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)18 JobID (org.apache.flink.api.common.JobID)18 TestingJobMasterGateway (org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway)17 FlinkException (org.apache.flink.util.FlinkException)17 CountDownLatch (java.util.concurrent.CountDownLatch)16 TaskExecutorLocalStateStoresManager (org.apache.flink.runtime.state.TaskExecutorLocalStateStoresManager)16 Collection (java.util.Collection)15 ResourceProfile (org.apache.flink.runtime.clusterframework.types.ResourceProfile)15 RegistrationResponse (org.apache.flink.runtime.registration.RegistrationResponse)15 ExecutionException (java.util.concurrent.ExecutionException)14