use of org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway in project flink by apache.
the class TaskExecutorTest method testSlotReportDoesNotContainStaleInformation.
/**
* Tests that the {@link SlotReport} sent to the RM does not contain out dated/stale information
* as slots are being requested from the TM.
*
* <p>This is a probabilistic test case and needs to be executed several times to produce a
* failure without the fix for FLINK-12865.
*/
@Test
public void testSlotReportDoesNotContainStaleInformation() throws Exception {
final OneShotLatch receivedSlotRequest = new OneShotLatch();
final CompletableFuture<Void> verifySlotReportFuture = new CompletableFuture<>();
final OneShotLatch terminateSlotReportVerification = new OneShotLatch();
final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
// Assertions for this test
testingResourceManagerGateway.setTaskExecutorHeartbeatFunction((ignored, heartbeatPayload) -> {
try {
final ArrayList<SlotStatus> slots = Lists.newArrayList(heartbeatPayload.getSlotReport());
assertThat(slots, hasSize(1));
final SlotStatus slotStatus = slots.get(0);
log.info("Received SlotStatus: {}", slotStatus);
if (receivedSlotRequest.isTriggered()) {
assertThat(slotStatus.getAllocationID(), is(notNullValue()));
} else {
assertThat(slotStatus.getAllocationID(), is(nullValue()));
}
} catch (AssertionError e) {
verifySlotReportFuture.completeExceptionally(e);
}
if (terminateSlotReportVerification.isTriggered()) {
verifySlotReportFuture.complete(null);
}
return FutureUtils.completedVoidFuture();
});
final CompletableFuture<ResourceID> taskExecutorRegistrationFuture = new CompletableFuture<>();
testingResourceManagerGateway.setSendSlotReportFunction(ignored -> {
taskExecutorRegistrationFuture.complete(null);
return CompletableFuture.completedFuture(Acknowledge.get());
});
rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());
final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(new AllocateSlotNotifyingTaskSlotTable(receivedSlotRequest)).build();
final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);
final ResourceID taskExecutorResourceId = taskManagerServices.getUnresolvedTaskManagerLocation().getResourceID();
taskExecutor.start();
final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
final ScheduledExecutorService heartbeatExecutor = java.util.concurrent.Executors.newSingleThreadScheduledExecutor();
try {
taskExecutorRegistrationFuture.get();
final OneShotLatch scheduleFirstHeartbeat = new OneShotLatch();
final ResourceID resourceManagerResourceId = testingResourceManagerGateway.getOwnResourceId();
final long heartbeatInterval = 5L;
heartbeatExecutor.scheduleWithFixedDelay(() -> {
scheduleFirstHeartbeat.trigger();
taskExecutorGateway.heartbeatFromResourceManager(resourceManagerResourceId);
}, 0L, heartbeatInterval, TimeUnit.MILLISECONDS);
scheduleFirstHeartbeat.await();
taskExecutorGateway.requestSlot(new SlotID(taskExecutorResourceId, 0), jobId, new AllocationID(), ResourceProfile.ZERO, "foobar", testingResourceManagerGateway.getFencingToken(), timeout).get();
terminateSlotReportVerification.trigger();
verifySlotReportFuture.get();
} finally {
ExecutorUtils.gracefulShutdown(timeout.toMilliseconds(), TimeUnit.MILLISECONDS, heartbeatExecutor);
RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
}
}
use of org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway in project flink by apache.
the class TaskExecutorTest method runJobManagerHeartbeatTest.
private void runJobManagerHeartbeatTest(ResourceID jmResourceId, HeartbeatServices heartbeatServices, Consumer<TestingJobMasterGatewayBuilder> jobMasterGatewayBuilderConsumer, TriConsumer<ResourceID, TaskExecutorGateway, AllocationID> heartbeatAction) throws IOException, InterruptedException, ExecutionException, TimeoutException {
final JobLeaderService jobLeaderService = new DefaultJobLeaderService(unresolvedTaskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration());
final String jobMasterAddress = "jm";
final UUID jmLeaderId = UUID.randomUUID();
final CountDownLatch registrationAttempts = new CountDownLatch(2);
final OneShotLatch slotOfferedLatch = new OneShotLatch();
final CompletableFuture<ResourceID> disconnectTaskManagerFuture = new CompletableFuture<>();
final TestingJobMasterGatewayBuilder testingJobMasterGatewayBuilder = new TestingJobMasterGatewayBuilder().setRegisterTaskManagerFunction((ignoredJobId, ignoredTaskManagerRegistrationInformation) -> {
registrationAttempts.countDown();
return CompletableFuture.completedFuture(new JMTMRegistrationSuccess(jmResourceId));
}).setDisconnectTaskManagerFunction(resourceID -> {
disconnectTaskManagerFuture.complete(resourceID);
return CompletableFuture.completedFuture(Acknowledge.get());
}).setOfferSlotsFunction((resourceID, slotOffers) -> {
slotOfferedLatch.trigger();
return CompletableFuture.completedFuture(slotOffers);
});
jobMasterGatewayBuilderConsumer.accept(testingJobMasterGatewayBuilder);
final TestingJobMasterGateway jobMasterGateway = testingJobMasterGatewayBuilder.build();
final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();
final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation).setTaskSlotTable(TaskSlotUtils.createTaskSlotTable(1)).setJobLeaderService(jobLeaderService).setTaskStateManager(localStateStoresManager).build();
final TestingTaskExecutor taskManager = createTestingTaskExecutor(taskManagerServices, heartbeatServices);
final OneShotLatch slotReportReceived = new OneShotLatch();
final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
testingResourceManagerGateway.setSendSlotReportFunction(ignored -> {
slotReportReceived.trigger();
return CompletableFuture.completedFuture(Acknowledge.get());
});
final Queue<CompletableFuture<RegistrationResponse>> registrationResponses = new ArrayDeque<>();
registrationResponses.add(CompletableFuture.completedFuture(new TaskExecutorRegistrationSuccess(new InstanceID(), testingResourceManagerGateway.getOwnResourceId(), new ClusterInformation("foobar", 1234))));
registrationResponses.add(new CompletableFuture<>());
testingResourceManagerGateway.setRegisterTaskExecutorFunction(taskExecutorRegistration -> registrationResponses.poll());
rpc.registerGateway(jobMasterAddress, jobMasterGateway);
rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
try {
taskManager.start();
taskManager.waitUntilStarted();
final TaskExecutorGateway taskExecutorGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);
resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());
slotReportReceived.await();
final AllocationID allocationId = new AllocationID();
requestSlot(taskExecutorGateway, jobId, allocationId, buildSlotID(0), ResourceProfile.UNKNOWN, jobMasterAddress, testingResourceManagerGateway.getFencingToken());
// now inform the task manager about the new job leader
jobManagerLeaderRetriever.notifyListener(jobMasterAddress, jmLeaderId);
// register task manager success will trigger monitoring heartbeat target between tm and
// jm
slotOfferedLatch.await();
heartbeatAction.accept(unresolvedTaskManagerLocation.getResourceID(), taskExecutorGateway, allocationId);
// the timeout should trigger disconnecting from the JobManager
final ResourceID resourceID = disconnectTaskManagerFuture.get();
assertThat(resourceID, equalTo(unresolvedTaskManagerLocation.getResourceID()));
assertTrue("The TaskExecutor should try to reconnect to the JM", registrationAttempts.await(timeout.toMilliseconds(), TimeUnit.SECONDS));
} finally {
RpcUtils.terminateRpcEndpoint(taskManager, timeout);
}
}
use of org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway in project flink by apache.
the class TaskExecutorTest method testRegisterWithDefaultSlotResourceProfile.
@Test
public void testRegisterWithDefaultSlotResourceProfile() throws Exception {
final int numberOfSlots = 2;
final TaskExecutor taskExecutor = createTaskExecutor(numberOfSlots);
taskExecutor.start();
try {
final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
final CompletableFuture<ResourceProfile> registeredDefaultSlotResourceProfileFuture = new CompletableFuture<>();
final ResourceID ownResourceId = testingResourceManagerGateway.getOwnResourceId();
testingResourceManagerGateway.setRegisterTaskExecutorFunction(taskExecutorRegistration -> {
registeredDefaultSlotResourceProfileFuture.complete(taskExecutorRegistration.getDefaultSlotResourceProfile());
return CompletableFuture.completedFuture(new TaskExecutorRegistrationSuccess(new InstanceID(), ownResourceId, new ClusterInformation("localhost", 1234)));
});
rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());
assertThat(registeredDefaultSlotResourceProfileFuture.get(), equalTo(TaskExecutorResourceUtils.generateDefaultSlotResourceProfile(TM_RESOURCE_SPEC, numberOfSlots)));
} finally {
RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
}
}
use of org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway in project flink by apache.
the class TaskExecutorTest method runResourceManagerHeartbeatTest.
private void runResourceManagerHeartbeatTest(HeartbeatServices heartbeatServices, Consumer<TestingResourceManagerGateway> setupResourceManagerGateway, TriConsumerWithException<TaskExecutorGateway, ResourceID, CompletableFuture<ResourceID>, Exception> heartbeatAction) throws Exception {
final String rmAddress = "rm";
final ResourceID rmResourceId = new ResourceID(rmAddress);
final ResourceManagerId rmLeaderId = ResourceManagerId.generate();
TestingResourceManagerGateway rmGateway = new TestingResourceManagerGateway(rmLeaderId, rmResourceId, rmAddress, rmAddress);
final TaskExecutorRegistrationSuccess registrationResponse = new TaskExecutorRegistrationSuccess(new InstanceID(), rmResourceId, new ClusterInformation("localhost", 1234));
final Queue<CompletableFuture<RegistrationResponse>> registrationResponses = new ArrayDeque<>(2);
registrationResponses.add(CompletableFuture.completedFuture(registrationResponse));
registrationResponses.add(new CompletableFuture<>());
final CompletableFuture<ResourceID> taskExecutorRegistrationFuture = new CompletableFuture<>();
final CountDownLatch registrationAttempts = new CountDownLatch(2);
rmGateway.setRegisterTaskExecutorFunction(registration -> {
taskExecutorRegistrationFuture.complete(registration.getResourceId());
registrationAttempts.countDown();
return registrationResponses.poll();
});
setupResourceManagerGateway.accept(rmGateway);
final CompletableFuture<ResourceID> taskExecutorDisconnectFuture = new CompletableFuture<>();
rmGateway.setDisconnectTaskExecutorConsumer(disconnectInfo -> taskExecutorDisconnectFuture.complete(disconnectInfo.f0));
rpc.registerGateway(rmAddress, rmGateway);
final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation).build();
final TaskExecutor taskManager = createTaskExecutor(taskManagerServices, heartbeatServices);
try {
taskManager.start();
final TaskExecutorGateway taskExecutorGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);
// define a leader and see that a registration happens
resourceManagerLeaderRetriever.notifyListener(rmAddress, rmLeaderId.toUUID());
// register resource manager success will trigger monitoring heartbeat target between tm
// and rm
assertThat(taskExecutorRegistrationFuture.get(), equalTo(unresolvedTaskManagerLocation.getResourceID()));
heartbeatAction.accept(taskExecutorGateway, rmGateway.getOwnResourceId(), taskExecutorDisconnectFuture);
// heartbeat timeout should trigger disconnect TaskManager from ResourceManager
assertThat(taskExecutorDisconnectFuture.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS), equalTo(unresolvedTaskManagerLocation.getResourceID()));
assertTrue("The TaskExecutor should try to reconnect to the RM", registrationAttempts.await(timeout.toMilliseconds(), TimeUnit.SECONDS));
} finally {
RpcUtils.terminateRpcEndpoint(taskManager, timeout);
}
}
use of org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway in project flink by apache.
the class TaskExecutorTest method createAndRegisterResourceManager.
private ResourceManagerId createAndRegisterResourceManager(CompletableFuture<Tuple3<ResourceID, InstanceID, SlotReport>> initialSlotReportFuture) {
final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
initialSlotReportFuture.complete(resourceIDInstanceIDSlotReportTuple3);
return CompletableFuture.completedFuture(Acknowledge.get());
});
rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
// tell the task manager about the rm leader
resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID());
return resourceManagerGateway.getFencingToken();
}
Aggregations