use of org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation in project flink by apache.
the class JobMasterTest method runHeartbeatTest.
private void runHeartbeatTest(TestingTaskExecutorGatewayBuilder testingTaskExecutorGatewayBuilder, HeartbeatServices heartbeatServices) throws Exception {
final CompletableFuture<JobID> disconnectedJobManagerFuture = new CompletableFuture<>();
final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
final TestingTaskExecutorGateway taskExecutorGateway = testingTaskExecutorGatewayBuilder.setDisconnectJobManagerConsumer((jobId, throwable) -> disconnectedJobManagerFuture.complete(jobId)).createTestingTaskExecutorGateway();
rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withResourceId(jmResourceId).withConfiguration(configuration).withHighAvailabilityServices(haServices).withHeartbeatServices(heartbeatServices).createJobMaster();
jobMaster.start();
try {
final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
// register task manager will trigger monitor heartbeat target, schedule heartbeat
// request at interval time
CompletableFuture<RegistrationResponse> registrationResponse = jobMasterGateway.registerTaskManager(jobGraph.getJobID(), TaskManagerRegistrationInformation.create(taskExecutorGateway.getAddress(), unresolvedTaskManagerLocation, TestingUtils.zeroUUID()), testingTimeout);
// wait for the completion of the registration
registrationResponse.get();
final JobID disconnectedJobManager = disconnectedJobManagerFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS);
assertThat(disconnectedJobManager, equalTo(jobGraph.getJobID()));
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
use of org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation in project flink by apache.
the class TaskExecutorTest method testFreeingInactiveSlotDoesNotFail.
/**
* Tests that freeing an inactive slot is a legal operation that does not throw an exception.
*/
@Test
public void testFreeingInactiveSlotDoesNotFail() throws Exception {
final OneShotLatch taskExecutorIsRegistered = new OneShotLatch();
final CompletableFuture<Tuple3<InstanceID, SlotID, AllocationID>> availableSlotFuture = new CompletableFuture<>();
final TestingResourceManagerGateway resourceManagerGateway = createRmWithTmRegisterAndNotifySlotHooks(new InstanceID(), taskExecutorIsRegistered, availableSlotFuture);
rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
final MultiShotLatch offerSlotsLatch = new MultiShotLatch();
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setOfferSlotsFunction((resourceID, slotOffers) -> {
offerSlotsLatch.trigger();
return new CompletableFuture<>();
}).build();
rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1);
final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();
final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation).setTaskSlotTable(taskSlotTable).setTaskStateManager(localStateStoresManager).build();
final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices);
final ThreadSafeTaskSlotTable<Task> threadSafeTaskSlotTable = new ThreadSafeTaskSlotTable<>(taskSlotTable, taskExecutor.getMainThreadExecutableForTesting());
try {
taskExecutor.start();
taskExecutor.waitUntilStarted();
final TaskExecutorGateway tmGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
taskExecutorIsRegistered.await();
jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
final AllocationID allocationId = new AllocationID();
requestSlot(tmGateway, jobId, allocationId, buildSlotID(0), ResourceProfile.UNKNOWN, jobMasterGateway.getAddress(), resourceManagerGateway.getFencingToken());
offerSlotsLatch.await();
tmGateway.freeSlot(allocationId, new RuntimeException("test exception"), timeout).get();
assertThat(availableSlotFuture.get().f2, is(allocationId));
assertThat(threadSafeTaskSlotTable.getAllocationIdsPerJob(jobId), empty());
} finally {
RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
}
}
use of org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation in project flink by apache.
the class TaskExecutorTest method testInitialSlotReportFailure.
/**
* Tests that the {@link TaskExecutor} tries to reconnect if the initial slot report fails.
*/
@Test
public void testInitialSlotReportFailure() throws Exception {
final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1);
final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation();
final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation).build();
final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);
taskExecutor.start();
try {
final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
final BlockingQueue<CompletableFuture<Acknowledge>> responseQueue = new ArrayBlockingQueue<>(2);
testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
try {
return responseQueue.take();
} catch (InterruptedException e) {
return FutureUtils.completedExceptionally(e);
}
});
final CompletableFuture<RegistrationResponse> registrationResponse = CompletableFuture.completedFuture(new TaskExecutorRegistrationSuccess(new InstanceID(), testingResourceManagerGateway.getOwnResourceId(), new ClusterInformation("foobar", 1234)));
final CountDownLatch numberRegistrations = new CountDownLatch(2);
testingResourceManagerGateway.setRegisterTaskExecutorFunction(taskExecutorRegistration -> {
numberRegistrations.countDown();
return registrationResponse;
});
responseQueue.offer(FutureUtils.completedExceptionally(new FlinkException("Test exception")));
responseQueue.offer(CompletableFuture.completedFuture(Acknowledge.get()));
rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());
// wait for the second registration attempt
numberRegistrations.await();
} finally {
RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
}
}
use of org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation in project flink by apache.
the class TaskExecutorTest method createTaskExecutorTestingContext.
private TaskExecutorTestingContext createTaskExecutorTestingContext(final TaskSlotTable<Task> taskSlotTable) throws IOException {
final OneShotLatch offerSlotsLatch = new OneShotLatch();
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setOfferSlotsFunction((resourceID, slotOffers) -> {
offerSlotsLatch.trigger();
return CompletableFuture.completedFuture(slotOffers);
}).build();
rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
final JobLeaderService jobLeaderService = new DefaultJobLeaderService(unresolvedTaskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration());
TaskExecutorLocalStateStoresManager stateStoresManager = createTaskExecutorLocalStateStoresManager();
TaskExecutorStateChangelogStoragesManager changelogStoragesManager = new TaskExecutorStateChangelogStoragesManager();
TaskManagerMetricGroup metricGroup = TaskManagerMetricGroup.createTaskManagerMetricGroup(NoOpMetricRegistry.INSTANCE, "", ResourceID.generate());
final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).setJobLeaderService(jobLeaderService).setTaskStateManager(stateStoresManager).setTaskChangelogStoragesManager(changelogStoragesManager).build(), HEARTBEAT_SERVICES, metricGroup);
jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
return new TaskExecutorTestingContext(jobMasterGateway, taskSlotTable, taskExecutor, changelogStoragesManager, metricGroup, offerSlotsLatch);
}
use of org.apache.flink.runtime.taskmanager.UnresolvedTaskManagerLocation in project flink by apache.
the class TaskExecutorTest method testJobLeaderDetection.
/**
* Tests that a TaskManager detects a job leader for which it has reserved slots. Upon detecting
* the job leader, it will offer all reserved slots to the JobManager.
*/
@Test
public void testJobLeaderDetection() throws Exception {
final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1);
final JobLeaderService jobLeaderService = new DefaultJobLeaderService(unresolvedTaskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration());
final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway();
CompletableFuture<Void> initialSlotReportFuture = new CompletableFuture<>();
resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
initialSlotReportFuture.complete(null);
return CompletableFuture.completedFuture(Acknowledge.get());
});
final CompletableFuture<Collection<SlotOffer>> offeredSlotsFuture = new CompletableFuture<>();
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setOfferSlotsFunction((resourceID, slotOffers) -> {
offeredSlotsFuture.complete(new ArrayList<>(slotOffers));
return CompletableFuture.completedFuture(slotOffers);
}).build();
rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
final AllocationID allocationId = new AllocationID();
final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();
final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation).setTaskSlotTable(taskSlotTable).setJobLeaderService(jobLeaderService).setTaskStateManager(localStateStoresManager).build();
TaskExecutor taskManager = createTaskExecutor(taskManagerServices);
try {
taskManager.start();
final TaskExecutorGateway tmGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);
// tell the task manager about the rm leader
resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID());
initialSlotReportFuture.get();
requestSlot(tmGateway, jobId, allocationId, buildSlotID(0), ResourceProfile.ZERO, jobMasterGateway.getAddress(), resourceManagerGateway.getFencingToken());
// now inform the task manager about the new job leader
jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
final Collection<SlotOffer> offeredSlots = offeredSlotsFuture.get();
final Collection<AllocationID> allocationIds = offeredSlots.stream().map(SlotOffer::getAllocationId).collect(Collectors.toList());
assertThat(allocationIds, containsInAnyOrder(allocationId));
} finally {
RpcUtils.terminateRpcEndpoint(taskManager, timeout);
}
}
Aggregations