Search in sources :

Example 16 with RegistrationResponse

use of org.apache.flink.runtime.registration.RegistrationResponse in project flink by apache.

the class ResourceManagerJobMasterTest method testRegisterJobMasterFromInvalidAddress.

/**
 * Test receive registration with invalid address from job master.
 */
@Test
public void testRegisterJobMasterFromInvalidAddress() throws Exception {
    // test throw exception when receive a registration from job master which takes invalid
    // address
    String invalidAddress = "/jobMasterAddress2";
    CompletableFuture<RegistrationResponse> invalidAddressFuture = resourceManagerGateway.registerJobMaster(new JobMasterId(HighAvailabilityServices.DEFAULT_LEADER_ID), jobMasterResourceId, invalidAddress, jobId, TIMEOUT);
    assertTrue(invalidAddressFuture.get(5, TimeUnit.SECONDS) instanceof RegistrationResponse.Failure);
}
Also used : JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) Test(org.junit.Test)

Example 17 with RegistrationResponse

use of org.apache.flink.runtime.registration.RegistrationResponse in project flink by apache.

the class ResourceManagerTaskExecutorTest method testDelayedRegisterTaskExecutor.

/**
 * Test delayed registration of task executor where the delay is introduced during connection
 * from resource manager to the registering task executor.
 */
@Test
public void testDelayedRegisterTaskExecutor() throws Exception {
    final Time fastTimeout = Time.milliseconds(1L);
    try {
        final OneShotLatch startConnection = new OneShotLatch();
        final OneShotLatch finishConnection = new OneShotLatch();
        // first registration is with blocking connection
        rpcService.setRpcGatewayFutureFunction(rpcGateway -> CompletableFuture.supplyAsync(() -> {
            startConnection.trigger();
            try {
                finishConnection.await();
            } catch (InterruptedException ignored) {
            }
            return rpcGateway;
        }, TestingUtils.defaultExecutor()));
        TaskExecutorRegistration taskExecutorRegistration = new TaskExecutorRegistration(taskExecutorGateway.getAddress(), taskExecutorResourceID, dataPort, jmxPort, hardwareDescription, new TaskExecutorMemoryConfiguration(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), DEFAULT_SLOT_PROFILE, DEFAULT_SLOT_PROFILE);
        CompletableFuture<RegistrationResponse> firstFuture = rmGateway.registerTaskExecutor(taskExecutorRegistration, fastTimeout);
        try {
            firstFuture.get();
            fail("Should have failed because connection to taskmanager is delayed beyond timeout");
        } catch (Exception e) {
            final Throwable cause = ExceptionUtils.stripExecutionException(e);
            assertThat(cause, instanceOf(TimeoutException.class));
            assertThat(cause.getMessage(), containsString("ResourceManagerGateway.registerTaskExecutor"));
        }
        startConnection.await();
        // second registration after timeout is with no delay, expecting it to be succeeded
        rpcService.resetRpcGatewayFutureFunction();
        CompletableFuture<RegistrationResponse> secondFuture = rmGateway.registerTaskExecutor(taskExecutorRegistration, TIMEOUT);
        RegistrationResponse response = secondFuture.get();
        assertTrue(response instanceof TaskExecutorRegistrationSuccess);
        // on success, send slot report for taskmanager registration
        final SlotReport slotReport = new SlotReport(new SlotStatus(new SlotID(taskExecutorResourceID, 0), ResourceProfile.ANY));
        rmGateway.sendSlotReport(taskExecutorResourceID, ((TaskExecutorRegistrationSuccess) response).getRegistrationId(), slotReport, TIMEOUT).get();
        // let the remaining part of the first registration proceed
        finishConnection.trigger();
        Thread.sleep(1L);
        // verify that the latest registration is valid not being unregistered by the delayed
        // one
        final TaskManagerInfoWithSlots taskManagerInfoWithSlots = rmGateway.requestTaskManagerDetailsInfo(taskExecutorResourceID, TIMEOUT).get();
        assertThat(taskManagerInfoWithSlots.getTaskManagerInfo().getResourceId(), equalTo(taskExecutorResourceID));
        assertThat(taskManagerInfoWithSlots.getTaskManagerInfo().getNumberSlots(), equalTo(1));
    } finally {
        rpcService.resetRpcGatewayFutureFunction();
    }
}
Also used : SlotStatus(org.apache.flink.runtime.taskexecutor.SlotStatus) SlotReport(org.apache.flink.runtime.taskexecutor.SlotReport) TaskExecutorMemoryConfiguration(org.apache.flink.runtime.taskexecutor.TaskExecutorMemoryConfiguration) TaskExecutorRegistrationSuccess(org.apache.flink.runtime.taskexecutor.TaskExecutorRegistrationSuccess) Time(org.apache.flink.api.common.time.Time) FlinkException(org.apache.flink.util.FlinkException) FencingTokenException(org.apache.flink.runtime.rpc.exceptions.FencingTokenException) TimeoutException(java.util.concurrent.TimeoutException) ExecutionException(java.util.concurrent.ExecutionException) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) Test(org.junit.Test)

Example 18 with RegistrationResponse

use of org.apache.flink.runtime.registration.RegistrationResponse in project flink by apache.

the class ResourceManagerTaskExecutorTest method testRegisterTaskExecutor.

/**
 * Test receive normal registration from task executor and receive duplicate registration from
 * task executor.
 */
@Test
public void testRegisterTaskExecutor() throws Exception {
    // test response successful
    CompletableFuture<RegistrationResponse> successfulFuture = registerTaskExecutor(rmGateway, taskExecutorGateway.getAddress());
    RegistrationResponse response = successfulFuture.get(TIMEOUT.toMilliseconds(), TimeUnit.MILLISECONDS);
    assertTrue(response instanceof TaskExecutorRegistrationSuccess);
    final TaskManagerInfoWithSlots taskManagerInfoWithSlots = rmGateway.requestTaskManagerDetailsInfo(taskExecutorResourceID, TIMEOUT).get();
    assertThat(taskManagerInfoWithSlots.getTaskManagerInfo().getResourceId(), equalTo(taskExecutorResourceID));
    // test response successful with instanceID not equal to previous when receive duplicate
    // registration from taskExecutor
    CompletableFuture<RegistrationResponse> duplicateFuture = registerTaskExecutor(rmGateway, taskExecutorGateway.getAddress());
    RegistrationResponse duplicateResponse = duplicateFuture.get();
    assertTrue(duplicateResponse instanceof TaskExecutorRegistrationSuccess);
    assertNotEquals(((TaskExecutorRegistrationSuccess) response).getRegistrationId(), ((TaskExecutorRegistrationSuccess) duplicateResponse).getRegistrationId());
    assertThat(rmGateway.requestResourceOverview(TIMEOUT).get().getNumberTaskManagers(), is(1));
}
Also used : TaskExecutorRegistrationSuccess(org.apache.flink.runtime.taskexecutor.TaskExecutorRegistrationSuccess) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) Test(org.junit.Test)

Example 19 with RegistrationResponse

use of org.apache.flink.runtime.registration.RegistrationResponse in project flink by apache.

the class ResourceManagerTaskExecutorTest method testRegisterTaskExecutorFromInvalidAddress.

/**
 * Test receive registration with invalid address from task executor.
 */
@Test
public void testRegisterTaskExecutorFromInvalidAddress() throws Exception {
    // test throw exception when receive a registration from taskExecutor which takes invalid
    // address
    String invalidAddress = "/taskExecutor2";
    CompletableFuture<RegistrationResponse> invalidAddressFuture = registerTaskExecutor(rmGateway, invalidAddress);
    assertTrue(invalidAddressFuture.get(TIMEOUT.toMilliseconds(), TimeUnit.MILLISECONDS) instanceof RegistrationResponse.Failure);
}
Also used : Matchers.containsString(org.hamcrest.Matchers.containsString) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) Test(org.junit.Test)

Example 20 with RegistrationResponse

use of org.apache.flink.runtime.registration.RegistrationResponse in project flink by apache.

the class TaskExecutorTest method testHeartbeatReporting.

/**
 * Tests that the correct partition/slot report is sent as part of the heartbeat response.
 */
@Test
public void testHeartbeatReporting() throws Exception {
    final String rmAddress = "rm";
    final UUID rmLeaderId = UUID.randomUUID();
    // register the mock resource manager gateway
    final TestingResourceManagerGateway rmGateway = new TestingResourceManagerGateway();
    final CompletableFuture<ResourceID> taskExecutorRegistrationFuture = new CompletableFuture<>();
    final ResourceID rmResourceId = rmGateway.getOwnResourceId();
    final CompletableFuture<RegistrationResponse> registrationResponse = CompletableFuture.completedFuture(new TaskExecutorRegistrationSuccess(new InstanceID(), rmResourceId, new ClusterInformation("localhost", 1234)));
    rmGateway.setRegisterTaskExecutorFunction(taskExecutorRegistration -> {
        taskExecutorRegistrationFuture.complete(taskExecutorRegistration.getResourceId());
        return registrationResponse;
    });
    final CompletableFuture<SlotReport> initialSlotReportFuture = new CompletableFuture<>();
    rmGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
        initialSlotReportFuture.complete(resourceIDInstanceIDSlotReportTuple3.f2);
        return CompletableFuture.completedFuture(Acknowledge.get());
    });
    final CompletableFuture<TaskExecutorHeartbeatPayload> heartbeatPayloadCompletableFuture = new CompletableFuture<>();
    rmGateway.setTaskExecutorHeartbeatFunction((resourceID, heartbeatPayload) -> {
        heartbeatPayloadCompletableFuture.complete(heartbeatPayload);
        return FutureUtils.completedVoidFuture();
    });
    rpc.registerGateway(rmAddress, rmGateway);
    final SlotID slotId = buildSlotID(0);
    final ResourceProfile resourceProfile = ResourceProfile.fromResources(1.0, 1);
    final SlotReport slotReport1 = new SlotReport(new SlotStatus(slotId, resourceProfile));
    final SlotReport slotReport2 = new SlotReport(new SlotStatus(slotId, resourceProfile, new JobID(), new AllocationID()));
    final Queue<SlotReport> reports = new ArrayDeque<>(Arrays.asList(slotReport1, slotReport2));
    final TaskSlotTable<Task> taskSlotTable = TestingTaskSlotTable.<Task>newBuilder().createSlotReportSupplier(reports::poll).closeAsyncReturns(CompletableFuture.completedFuture(null)).build();
    final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();
    final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation).setTaskSlotTable(taskSlotTable).setTaskStateManager(localStateStoresManager).build();
    final TaskExecutorPartitionTracker partitionTracker = createPartitionTrackerWithFixedPartitionReport(taskManagerServices.getShuffleEnvironment());
    final TaskExecutor taskManager = createTaskExecutor(taskManagerServices, HEARTBEAT_SERVICES, partitionTracker);
    try {
        taskManager.start();
        // define a leader and see that a registration happens
        resourceManagerLeaderRetriever.notifyListener(rmAddress, rmLeaderId);
        // register resource manager success will trigger monitoring heartbeat target between tm
        // and rm
        assertThat(taskExecutorRegistrationFuture.get(), equalTo(unresolvedTaskManagerLocation.getResourceID()));
        assertThat(initialSlotReportFuture.get(), equalTo(slotReport1));
        TaskExecutorGateway taskExecutorGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);
        // trigger the heartbeat asynchronously
        taskExecutorGateway.heartbeatFromResourceManager(rmResourceId);
        // wait for heartbeat response
        SlotReport actualSlotReport = heartbeatPayloadCompletableFuture.get().getSlotReport();
        // the new slot report should be reported
        assertEquals(slotReport2, actualSlotReport);
        ClusterPartitionReport actualClusterPartitionReport = heartbeatPayloadCompletableFuture.get().getClusterPartitionReport();
        assertEquals(partitionTracker.createClusterPartitionReport(), actualClusterPartitionReport);
    } finally {
        RpcUtils.terminateRpcEndpoint(taskManager, timeout);
    }
}
Also used : Task(org.apache.flink.runtime.taskmanager.Task) InstanceID(org.apache.flink.runtime.instance.InstanceID) Matchers.containsString(org.hamcrest.Matchers.containsString) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) UUID(java.util.UUID) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) TaskSlotUtils.createTotalResourceProfile(org.apache.flink.runtime.taskexecutor.slot.TaskSlotUtils.createTotalResourceProfile) TaskExecutorPartitionTracker(org.apache.flink.runtime.io.network.partition.TaskExecutorPartitionTracker) TestingTaskExecutorPartitionTracker(org.apache.flink.runtime.io.network.partition.TestingTaskExecutorPartitionTracker) AllocatedSlotReport(org.apache.flink.runtime.jobmaster.AllocatedSlotReport) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) TaskExecutorLocalStateStoresManager(org.apache.flink.runtime.state.TaskExecutorLocalStateStoresManager) ClusterInformation(org.apache.flink.runtime.entrypoint.ClusterInformation) ArrayDeque(java.util.ArrayDeque) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) ClusterPartitionReport(org.apache.flink.runtime.taskexecutor.partition.ClusterPartitionReport) TestingResourceManagerGateway(org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

RegistrationResponse (org.apache.flink.runtime.registration.RegistrationResponse)40 Test (org.junit.Test)35 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)23 CompletableFuture (java.util.concurrent.CompletableFuture)18 UUID (java.util.UUID)14 JobID (org.apache.flink.api.common.JobID)14 ArrayList (java.util.ArrayList)12 FlinkException (org.apache.flink.util.FlinkException)11 Time (org.apache.flink.api.common.time.Time)10 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)10 ResourceProfile (org.apache.flink.runtime.clusterframework.types.ResourceProfile)10 TestingResourceManagerGateway (org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway)9 LocalUnresolvedTaskManagerLocation (org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation)9 ExecutionException (java.util.concurrent.ExecutionException)8 TestingHighAvailabilityServices (org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices)8 UnresolvedTaskManagerLocation (org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation)8 TestingFatalErrorHandler (org.apache.flink.runtime.util.TestingFatalErrorHandler)8 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)7 TimeoutException (java.util.concurrent.TimeoutException)7 Configuration (org.apache.flink.configuration.Configuration)7