use of org.apache.flink.runtime.io.network.partition.TestingTaskExecutorPartitionTracker in project flink by apache.
the class TaskExecutorTest method testReleaseOfJobResourcesIfJobMasterIsNotCorrect.
/**
* Tests that the TaskExecutor releases all of its job resources if the JobMaster is not running
* the specified job. See FLINK-21606.
*/
@Test
public void testReleaseOfJobResourcesIfJobMasterIsNotCorrect() throws Exception {
final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(TaskSlotUtils.createTaskSlotTable(1)).build();
final TestingTaskExecutorPartitionTracker taskExecutorPartitionTracker = new TestingTaskExecutorPartitionTracker();
final CompletableFuture<JobID> jobPartitionsReleaseFuture = new CompletableFuture<>();
// simulate that we have some partitions tracked
taskExecutorPartitionTracker.setIsTrackingPartitionsForFunction(ignored -> true);
taskExecutorPartitionTracker.setStopTrackingAndReleaseAllPartitionsConsumer(jobPartitionsReleaseFuture::complete);
final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices, HEARTBEAT_SERVICES, taskExecutorPartitionTracker);
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setRegisterTaskManagerFunction((ignoredJobId, ignoredTaskManagerRegistrationInformation) -> CompletableFuture.completedFuture(new JMTMRegistrationRejection("foobar"))).build();
rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
final InstanceID registrationId = new InstanceID();
final OneShotLatch taskExecutorIsRegistered = new OneShotLatch();
final CompletableFuture<Tuple3<InstanceID, SlotID, AllocationID>> availableSlotFuture = new CompletableFuture<>();
final TestingResourceManagerGateway resourceManagerGateway = createRmWithTmRegisterAndNotifySlotHooks(registrationId, taskExecutorIsRegistered, availableSlotFuture);
rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway);
resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID());
try {
taskExecutor.start();
final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
taskExecutorIsRegistered.await();
final AllocationID allocationId = new AllocationID();
final SlotID slotId = new SlotID(taskExecutor.getResourceID(), 0);
requestSlot(taskExecutorGateway, jobId, allocationId, slotId, ResourceProfile.UNKNOWN, jobMasterGateway.getAddress(), resourceManagerGateway.getFencingToken());
// The JobManager should reject the registration which should release all job resources
// on the TaskExecutor
jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
// the slot should be freed
assertThat(availableSlotFuture.get().f1, is(slotId));
assertThat(availableSlotFuture.get().f2, is(allocationId));
// all job partitions should be released
assertThat(jobPartitionsReleaseFuture.get(), is(jobId));
} finally {
RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
}
}
use of org.apache.flink.runtime.io.network.partition.TestingTaskExecutorPartitionTracker in project flink by apache.
the class TaskExecutorPartitionLifecycleTest method testPartitionRelease.
private void testPartitionRelease(PartitionTrackerSetup partitionTrackerSetup, TestAction testAction) throws Exception {
final TestingTaskExecutorPartitionTracker partitionTracker = new TestingTaskExecutorPartitionTracker();
final CompletableFuture<ResultPartitionID> startTrackingFuture = new CompletableFuture<>();
partitionTracker.setStartTrackingPartitionsConsumer((jobId, partitionInfo) -> startTrackingFuture.complete(partitionInfo.getResultPartitionId()));
partitionTrackerSetup.accept(partitionTracker);
internalTestPartitionRelease(partitionTracker, new NettyShuffleEnvironmentBuilder().build(), startTrackingFuture, testAction);
}
use of org.apache.flink.runtime.io.network.partition.TestingTaskExecutorPartitionTracker in project flink by apache.
the class TaskExecutorPartitionLifecycleTest method testJobMasterConnectionTerminationAfterExternalReleaseOrPromotion.
private void testJobMasterConnectionTerminationAfterExternalReleaseOrPromotion(TriConsumer<TaskExecutorGateway, JobID, ResultPartitionID> releaseOrPromoteCall) throws Exception {
final CompletableFuture<Void> disconnectFuture = new CompletableFuture<>();
final JobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setDisconnectTaskManagerFunction(resourceID -> {
disconnectFuture.complete(null);
return CompletableFuture.completedFuture(Acknowledge.get());
}).build();
final DefaultJobTable jobTable = DefaultJobTable.create();
final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setJobTable(jobTable).setShuffleEnvironment(new NettyShuffleEnvironmentBuilder().build()).setTaskSlotTable(createTaskSlotTable()).build();
final TestingTaskExecutorPartitionTracker partitionTracker = new TestingTaskExecutorPartitionTracker();
final AtomicBoolean trackerIsTrackingPartitions = new AtomicBoolean(false);
partitionTracker.setIsTrackingPartitionsForFunction(jobId -> trackerIsTrackingPartitions.get());
final CompletableFuture<Collection<ResultPartitionID>> firstReleasePartitionsCallFuture = new CompletableFuture<>();
partitionTracker.setStopTrackingAndReleasePartitionsConsumer(firstReleasePartitionsCallFuture::complete);
final ResultPartitionDeploymentDescriptor resultPartitionDeploymentDescriptor = PartitionTestUtils.createPartitionDeploymentDescriptor(ResultPartitionType.BLOCKING);
final ResultPartitionID resultPartitionId = resultPartitionDeploymentDescriptor.getShuffleDescriptor().getResultPartitionID();
final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices, partitionTracker);
try {
taskExecutor.start();
taskExecutor.waitUntilStarted();
TaskSubmissionTestEnvironment.registerJobMasterConnection(jobTable, jobId, rpc, jobMasterGateway, new NoOpTaskManagerActions(), timeout, taskExecutor.getMainThreadExecutableForTesting());
final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
trackerIsTrackingPartitions.set(true);
assertThat(firstReleasePartitionsCallFuture.isDone(), is(false));
taskExecutorGateway.releaseOrPromotePartitions(jobId, Collections.singleton(new ResultPartitionID()), Collections.emptySet());
// at this point we only know that the TE has entered releasePartitions; we cannot be
// certain whether it
// has already checked whether it should disconnect or not
firstReleasePartitionsCallFuture.get();
// connection should be kept alive since the table still contains partitions
assertThat(disconnectFuture.isDone(), is(false));
trackerIsTrackingPartitions.set(false);
// the TM should check whether partitions are still stored, and afterwards terminate the
// connection
releaseOrPromoteCall.accept(taskExecutorGateway, jobId, resultPartitionId);
disconnectFuture.get();
} finally {
RpcUtils.terminateRpcEndpoint(taskExecutor, timeout);
}
}
Aggregations