use of org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway in project flink by apache.
the class TaskExecutorTest method testReleaseInactiveSlots.
@Test
public void testReleaseInactiveSlots() throws Exception {
final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(TaskSlotUtils.createTaskSlotTable(1)).build();
final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices, HEARTBEAT_SERVICES);
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setRegisterTaskManagerFunction((ignoredJobId, ignoredTaskManagerRegistrationInformation) -> new CompletableFuture<>()).build();
rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
final InstanceID registrationId = new InstanceID();
final OneShotLatch taskExecutorIsRegistered = new OneShotLatch();
final CompletableFuture<Tuple3<InstanceID, SlotID, AllocationID>> availableSlotFuture = new CompletableFuture<>();
final TestingResourceManagerGateway resourceManagerGateway = createRmWithTmRegisterAndNotifySlotHooks(registrationId, taskExecutorIsRegistered, availableSlotFuture);
rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID());
try {
taskExecutor.start();
final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
taskExecutorIsRegistered.await();
final AllocationID allocationId = new AllocationID();
final SlotID slotId = new SlotID(taskExecutor.getResourceID(), 0);
requestSlot(taskExecutorGateway, jobId, allocationId, slotId, ResourceProfile.UNKNOWN, jobMasterGateway.getAddress(), resourceManagerGateway.getFencingToken());
taskExecutorGateway.freeInactiveSlots(jobId, timeout);
// the slot should be freed
assertThat(availableSlotFuture.get().f1, is(slotId));
assertThat(availableSlotFuture.get().f2, is(allocationId));
} finally {
RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
}
}
use of org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway in project flink by apache.
the class TaskExecutorTest method testSlotOfferResponseWithPendingSlotOffer.
/**
* Tests the behavior of the task executor when a slot offer response is received while a newer
* slot offer is in progress.
*/
private void testSlotOfferResponseWithPendingSlotOffer(final ResponseOrder responseOrder) throws Exception {
final OneShotLatch taskExecutorIsRegistered = new OneShotLatch();
final TestingResourceManagerGateway resourceManagerGateway = createRmWithTmRegisterAndNotifySlotHooks(new InstanceID(), taskExecutorIsRegistered, new CompletableFuture<>());
final CompletableFuture<Collection<SlotOffer>> firstOfferResponseFuture = new CompletableFuture<>();
final CompletableFuture<Collection<SlotOffer>> secondOfferResponseFuture = new CompletableFuture<>();
final Queue<CompletableFuture<Collection<SlotOffer>>> slotOfferResponses = new ArrayDeque<>(Arrays.asList(firstOfferResponseFuture, secondOfferResponseFuture));
final MultiShotLatch offerSlotsLatch = new MultiShotLatch();
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setOfferSlotsFunction((resourceID, slotOffers) -> {
offerSlotsLatch.trigger();
return slotOfferResponses.remove();
}).build();
rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(2);
final TaskManagerServices taskManagerServices = createTaskManagerServicesWithTaskSlotTable(taskSlotTable);
final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices);
final ThreadSafeTaskSlotTable<Task> threadSafeTaskSlotTable = new ThreadSafeTaskSlotTable<>(taskSlotTable, taskExecutor.getMainThreadExecutableForTesting());
final SlotOffer slotOffer1 = new SlotOffer(new AllocationID(), 0, ResourceProfile.ANY);
final SlotOffer slotOffer2 = new SlotOffer(new AllocationID(), 1, ResourceProfile.ANY);
try {
taskExecutor.start();
taskExecutor.waitUntilStarted();
final TaskExecutorGateway tmGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
// wait until task executor registered at the RM
taskExecutorIsRegistered.await();
// notify job leader to start slot offering
jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
// request the first slot
requestSlot(tmGateway, jobId, slotOffer1.getAllocationId(), buildSlotID(slotOffer1.getSlotIndex()), ResourceProfile.UNKNOWN, jobMasterGateway.getAddress(), resourceManagerGateway.getFencingToken());
// wait until first slot offer as arrived
offerSlotsLatch.await();
// request second slot, triggering another offer containing both slots
int slotIndex = slotOffer2.getSlotIndex();
requestSlot(tmGateway, jobId, slotOffer2.getAllocationId(), buildSlotID(slotIndex), ResourceProfile.UNKNOWN, jobMasterGateway.getAddress(), resourceManagerGateway.getFencingToken());
// wait until second slot offer as arrived
offerSlotsLatch.await();
switch(responseOrder) {
case ACCEPT_THEN_REJECT:
// accept the first offer, but reject both slots for the second offer
firstOfferResponseFuture.complete(Collections.singletonList(slotOffer1));
assertThat(threadSafeTaskSlotTable.getActiveTaskSlotAllocationIdsPerJob(jobId), empty());
secondOfferResponseFuture.complete(Collections.emptyList());
assertThat(threadSafeTaskSlotTable.getAllocationIdsPerJob(jobId), empty());
return;
case REJECT_THEN_ACCEPT:
// fail the first offer, but accept both slots for the second offer
// in the past the rejection of the first offer freed the slot; when the slot is
// accepted from the second offer the activation of said slot then failed
firstOfferResponseFuture.complete(Collections.emptyList());
secondOfferResponseFuture.complete(Arrays.asList(slotOffer1, slotOffer2));
assertThat(threadSafeTaskSlotTable.getAllocationIdsPerJob(jobId), containsInAnyOrder(slotOffer1.getAllocationId(), slotOffer2.getAllocationId()));
return;
}
} finally {
RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
}
}
use of org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway in project flink by apache.
the class TaskExecutorTest method testSlotOfferCounterIsSeparatedByJob.
@Test
public void testSlotOfferCounterIsSeparatedByJob() throws Exception {
final OneShotLatch taskExecutorIsRegistered = new OneShotLatch();
final TestingResourceManagerGateway resourceManagerGateway = createRmWithTmRegisterAndNotifySlotHooks(new InstanceID(), taskExecutorIsRegistered, new CompletableFuture<>());
final CompletableFuture<Collection<SlotOffer>> firstOfferResponseFuture = new CompletableFuture<>();
final CompletableFuture<Collection<SlotOffer>> secondOfferResponseFuture = new CompletableFuture<>();
final Queue<CompletableFuture<Collection<SlotOffer>>> slotOfferResponses = new ArrayDeque<>(Arrays.asList(firstOfferResponseFuture, secondOfferResponseFuture));
final MultiShotLatch offerSlotsLatch = new MultiShotLatch();
final TestingJobMasterGateway jobMasterGateway1 = new TestingJobMasterGatewayBuilder().setAddress("jm1").setOfferSlotsFunction((resourceID, slotOffers) -> {
offerSlotsLatch.trigger();
return slotOfferResponses.remove();
}).build();
final TestingJobMasterGateway jobMasterGateway2 = new TestingJobMasterGatewayBuilder().setAddress("jm2").setOfferSlotsFunction((resourceID, slotOffers) -> {
offerSlotsLatch.trigger();
return slotOfferResponses.remove();
}).build();
rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
rpc.registerGateway(jobMasterGateway1.getAddress(), jobMasterGateway1);
rpc.registerGateway(jobMasterGateway2.getAddress(), jobMasterGateway2);
final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(2);
final TaskManagerServices taskManagerServices = createTaskManagerServicesWithTaskSlotTable(taskSlotTable);
final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices);
final ThreadSafeTaskSlotTable<Task> threadSafeTaskSlotTable = new ThreadSafeTaskSlotTable<>(taskSlotTable, taskExecutor.getMainThreadExecutableForTesting());
final SlotOffer slotOffer1 = new SlotOffer(new AllocationID(), 0, ResourceProfile.ANY);
final SlotOffer slotOffer2 = new SlotOffer(new AllocationID(), 1, ResourceProfile.ANY);
try {
taskExecutor.start();
taskExecutor.waitUntilStarted();
final TaskExecutorGateway tmGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
// wait until task executor registered at the RM
taskExecutorIsRegistered.await();
// notify job leader to start slot offering
jobManagerLeaderRetriever.notifyListener(jobMasterGateway1.getAddress(), jobMasterGateway1.getFencingToken().toUUID());
jobManagerLeaderRetriever2.notifyListener(jobMasterGateway2.getAddress(), jobMasterGateway2.getFencingToken().toUUID());
// request the first slot
requestSlot(tmGateway, jobId, slotOffer1.getAllocationId(), buildSlotID(slotOffer1.getSlotIndex()), ResourceProfile.UNKNOWN, jobMasterGateway1.getAddress(), resourceManagerGateway.getFencingToken());
// wait until first slot offer as arrived
offerSlotsLatch.await();
// request second slot, triggering another offer containing both slots
requestSlot(tmGateway, jobId2, slotOffer2.getAllocationId(), buildSlotID(slotOffer2.getSlotIndex()), ResourceProfile.UNKNOWN, jobMasterGateway2.getAddress(), resourceManagerGateway.getFencingToken());
// wait until second slot offer as arrived
offerSlotsLatch.await();
firstOfferResponseFuture.complete(Collections.singletonList(slotOffer1));
secondOfferResponseFuture.complete(Collections.singletonList(slotOffer2));
assertThat(threadSafeTaskSlotTable.getActiveTaskSlotAllocationIdsPerJob(jobId), contains(slotOffer1.getAllocationId()));
assertThat(threadSafeTaskSlotTable.getActiveTaskSlotAllocationIdsPerJob(jobId2), contains(slotOffer2.getAllocationId()));
} finally {
RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
}
}
use of org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway in project flink by apache.
the class TaskExecutorTest method testSyncSlotsWithJobMasterByHeartbeat.
/**
* Tests that the TaskExecutor syncs its slots view with the JobMaster's view via the
* AllocatedSlotReport reported by the heartbeat (See FLINK-11059).
*/
@Test
public void testSyncSlotsWithJobMasterByHeartbeat() throws Exception {
final CountDownLatch activeSlots = new CountDownLatch(2);
final TaskSlotTable<Task> taskSlotTable = new ActivateSlotNotifyingTaskSlotTable(2, activeSlots);
final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).build();
final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices);
final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
final BlockingQueue<AllocationID> allocationsNotifiedFree = new ArrayBlockingQueue<>(2);
OneShotLatch initialSlotReporting = new OneShotLatch();
testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> {
initialSlotReporting.trigger();
return CompletableFuture.completedFuture(Acknowledge.get());
});
testingResourceManagerGateway.setNotifySlotAvailableConsumer(instanceIDSlotIDAllocationIDTuple3 -> allocationsNotifiedFree.offer(instanceIDSlotIDAllocationIDTuple3.f2));
rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());
final BlockingQueue<AllocationID> failedSlotFutures = new ArrayBlockingQueue<>(2);
final ResourceID jobManagerResourceId = ResourceID.generate();
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setFailSlotConsumer((resourceID, allocationID, throwable) -> failedSlotFutures.offer(allocationID)).setOfferSlotsFunction((resourceID, slotOffers) -> CompletableFuture.completedFuture(new ArrayList<>(slotOffers))).setRegisterTaskManagerFunction((ignoredJobId, ignoredTaskManagerRegistrationInformation) -> CompletableFuture.completedFuture(new JMTMRegistrationSuccess(jobManagerResourceId))).build();
final String jobManagerAddress = jobMasterGateway.getAddress();
rpc.registerGateway(jobManagerAddress, jobMasterGateway);
jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID());
taskExecutor.start();
try {
final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
initialSlotReporting.await();
final AllocationID allocationIdInBoth = new AllocationID();
final AllocationID allocationIdOnlyInJM = new AllocationID();
final AllocationID allocationIdOnlyInTM = new AllocationID();
taskExecutorGateway.requestSlot(new SlotID(taskExecutor.getResourceID(), 0), jobId, allocationIdInBoth, ResourceProfile.ZERO, "foobar", testingResourceManagerGateway.getFencingToken(), timeout);
taskExecutorGateway.requestSlot(new SlotID(taskExecutor.getResourceID(), 1), jobId, allocationIdOnlyInTM, ResourceProfile.ZERO, "foobar", testingResourceManagerGateway.getFencingToken(), timeout);
activeSlots.await();
List<AllocatedSlotInfo> allocatedSlotInfos = Arrays.asList(new AllocatedSlotInfo(0, allocationIdInBoth), new AllocatedSlotInfo(1, allocationIdOnlyInJM));
AllocatedSlotReport allocatedSlotReport = new AllocatedSlotReport(jobId, allocatedSlotInfos);
taskExecutorGateway.heartbeatFromJobManager(jobManagerResourceId, allocatedSlotReport);
assertThat(failedSlotFutures.take(), is(allocationIdOnlyInJM));
assertThat(allocationsNotifiedFree.take(), is(allocationIdOnlyInTM));
assertThat(failedSlotFutures.poll(5L, TimeUnit.MILLISECONDS), nullValue());
assertThat(allocationsNotifiedFree.poll(5L, TimeUnit.MILLISECONDS), nullValue());
} finally {
RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
}
}
use of org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway in project flink by apache.
the class TaskExecutorTest method runJobManagerHeartbeatTest.
private void runJobManagerHeartbeatTest(ResourceID jmResourceId, HeartbeatServices heartbeatServices, Consumer<TestingJobMasterGatewayBuilder> jobMasterGatewayBuilderConsumer, TriConsumer<ResourceID, TaskExecutorGateway, AllocationID> heartbeatAction) throws IOException, InterruptedException, ExecutionException, TimeoutException {
final JobLeaderService jobLeaderService = new DefaultJobLeaderService(unresolvedTaskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration());
final String jobMasterAddress = "jm";
final UUID jmLeaderId = UUID.randomUUID();
final CountDownLatch registrationAttempts = new CountDownLatch(2);
final OneShotLatch slotOfferedLatch = new OneShotLatch();
final CompletableFuture<ResourceID> disconnectTaskManagerFuture = new CompletableFuture<>();
final TestingJobMasterGatewayBuilder testingJobMasterGatewayBuilder = new TestingJobMasterGatewayBuilder().setRegisterTaskManagerFunction((ignoredJobId, ignoredTaskManagerRegistrationInformation) -> {
registrationAttempts.countDown();
return CompletableFuture.completedFuture(new JMTMRegistrationSuccess(jmResourceId));
}).setDisconnectTaskManagerFunction(resourceID -> {
disconnectTaskManagerFuture.complete(resourceID);
return CompletableFuture.completedFuture(Acknowledge.get());
}).setOfferSlotsFunction((resourceID, slotOffers) -> {
slotOfferedLatch.trigger();
return CompletableFuture.completedFuture(slotOffers);
});
jobMasterGatewayBuilderConsumer.accept(testingJobMasterGatewayBuilder);
final TestingJobMasterGateway jobMasterGateway = testingJobMasterGatewayBuilder.build();
final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager();
final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation).setTaskSlotTable(TaskSlotUtils.createTaskSlotTable(1)).setJobLeaderService(jobLeaderService).setTaskStateManager(localStateStoresManager).build();
final TestingTaskExecutor taskManager = createTestingTaskExecutor(taskManagerServices, heartbeatServices);
final OneShotLatch slotReportReceived = new OneShotLatch();
final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
testingResourceManagerGateway.setSendSlotReportFunction(ignored -> {
slotReportReceived.trigger();
return CompletableFuture.completedFuture(Acknowledge.get());
});
final Queue<CompletableFuture<RegistrationResponse>> registrationResponses = new ArrayDeque<>();
registrationResponses.add(CompletableFuture.completedFuture(new TaskExecutorRegistrationSuccess(new InstanceID(), testingResourceManagerGateway.getOwnResourceId(), new ClusterInformation("foobar", 1234))));
registrationResponses.add(new CompletableFuture<>());
testingResourceManagerGateway.setRegisterTaskExecutorFunction(taskExecutorRegistration -> registrationResponses.poll());
rpc.registerGateway(jobMasterAddress, jobMasterGateway);
rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
try {
taskManager.start();
taskManager.waitUntilStarted();
final TaskExecutorGateway taskExecutorGateway = taskManager.getSelfGateway(TaskExecutorGateway.class);
resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID());
slotReportReceived.await();
final AllocationID allocationId = new AllocationID();
requestSlot(taskExecutorGateway, jobId, allocationId, buildSlotID(0), ResourceProfile.UNKNOWN, jobMasterAddress, testingResourceManagerGateway.getFencingToken());
// now inform the task manager about the new job leader
jobManagerLeaderRetriever.notifyListener(jobMasterAddress, jmLeaderId);
// register task manager success will trigger monitoring heartbeat target between tm and
// jm
slotOfferedLatch.await();
heartbeatAction.accept(unresolvedTaskManagerLocation.getResourceID(), taskExecutorGateway, allocationId);
// the timeout should trigger disconnecting from the JobManager
final ResourceID resourceID = disconnectTaskManagerFuture.get();
assertThat(resourceID, equalTo(unresolvedTaskManagerLocation.getResourceID()));
assertTrue("The TaskExecutor should try to reconnect to the JM", registrationAttempts.await(timeout.toMilliseconds(), TimeUnit.SECONDS));
} finally {
RpcUtils.terminateRpcEndpoint(taskManager, timeout);
}
}
Aggregations