use of org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway in project flink by apache.
the class ResourceManagerTest method testDisconnectJobManagerClearsRequirements.
@Test
public void testDisconnectJobManagerClearsRequirements() throws Exception {
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setAddress(UUID.randomUUID().toString()).build();
rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
final JobLeaderIdService jobLeaderIdService = TestingJobLeaderIdService.newBuilder().setGetLeaderIdFunction(jobId -> CompletableFuture.completedFuture(jobMasterGateway.getFencingToken())).build();
final CompletableFuture<JobID> clearRequirementsFuture = new CompletableFuture<>();
final SlotManager slotManager = new TestingSlotManagerBuilder().setClearRequirementsConsumer(clearRequirementsFuture::complete).createSlotManager();
resourceManager = new ResourceManagerBuilder().withJobLeaderIdService(jobLeaderIdService).withSlotManager(slotManager).buildAndStart();
final JobID jobId = JobID.generate();
final ResourceManagerGateway resourceManagerGateway = resourceManager.getSelfGateway(ResourceManagerGateway.class);
resourceManagerGateway.registerJobMaster(jobMasterGateway.getFencingToken(), ResourceID.generate(), jobMasterGateway.getAddress(), jobId, TIMEOUT).get();
resourceManagerGateway.declareRequiredResources(jobMasterGateway.getFencingToken(), ResourceRequirements.create(jobId, jobMasterGateway.getAddress(), Collections.singleton(ResourceRequirement.create(ResourceProfile.UNKNOWN, 1))), TIMEOUT).get();
resourceManagerGateway.disconnectJobManager(jobId, JobStatus.FINISHED, new FlinkException("Test exception"));
assertThat(clearRequirementsFuture.get(5, TimeUnit.SECONDS), is(jobId));
}
use of org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway in project flink by apache.
the class ResourceManagerTest method testJobMasterBecomesUnreachableTriggersDisconnect.
@Test
public void testJobMasterBecomesUnreachableTriggersDisconnect() throws Exception {
final JobID jobId = new JobID();
final ResourceID jobMasterResourceId = ResourceID.generate();
final CompletableFuture<ResourceManagerId> disconnectFuture = new CompletableFuture<>();
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setAddress(UUID.randomUUID().toString()).setResourceManagerHeartbeatFunction(resourceId -> FutureUtils.completedExceptionally(new RecipientUnreachableException("sender", "recipient", "task executor is unreachable"))).setDisconnectResourceManagerConsumer(disconnectFuture::complete).build();
rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
final LeaderRetrievalService jobMasterLeaderRetrievalService = new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> {
assertThat(requestedJobId, is(equalTo(jobId)));
return jobMasterLeaderRetrievalService;
});
runHeartbeatTargetBecomesUnreachableTest((ignore) -> {
}, resourceManagerGateway -> {
final CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerJobMaster(jobMasterGateway.getFencingToken(), jobMasterResourceId, jobMasterGateway.getAddress(), jobId, TIMEOUT);
assertThat(registrationFuture.get(), instanceOf(RegistrationResponse.Success.class));
}, resourceManagerResourceId -> assertThat(disconnectFuture.get(), is(equalTo(resourceManagerId))));
}
use of org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway in project flink by apache.
the class ResourceManagerTest method testDisconnectJobManager.
private void testDisconnectJobManager(JobStatus jobStatus) throws Exception {
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setAddress(UUID.randomUUID().toString()).build();
rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
final OneShotLatch jobAdded = new OneShotLatch();
final OneShotLatch jobRemoved = new OneShotLatch();
final JobLeaderIdService jobLeaderIdService = TestingJobLeaderIdService.newBuilder().setAddJobConsumer(ignored -> jobAdded.trigger()).setRemoveJobConsumer(ignored -> jobRemoved.trigger()).build();
resourceManager = new ResourceManagerBuilder().withJobLeaderIdService(jobLeaderIdService).buildAndStart();
highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()));
final JobID jobId = JobID.generate();
final ResourceManagerGateway resourceManagerGateway = resourceManager.getSelfGateway(ResourceManagerGateway.class);
resourceManagerGateway.registerJobMaster(jobMasterGateway.getFencingToken(), ResourceID.generate(), jobMasterGateway.getAddress(), jobId, TIMEOUT);
jobAdded.await();
resourceManagerGateway.disconnectJobManager(jobId, jobStatus, new FlinkException("Test exception"));
if (jobStatus.isGloballyTerminalState()) {
jobRemoved.await();
} else {
// job should not get removed
try {
jobRemoved.await(10L, TimeUnit.MILLISECONDS);
fail("We should not have removed the job.");
} catch (TimeoutException expected) {
}
}
}
use of org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway in project flink by apache.
the class ResourceManagerTest method testHeartbeatTimeoutWithJobMaster.
@Test
public void testHeartbeatTimeoutWithJobMaster() throws Exception {
final CompletableFuture<ResourceID> heartbeatRequestFuture = new CompletableFuture<>();
final CompletableFuture<ResourceManagerId> disconnectFuture = new CompletableFuture<>();
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setResourceManagerHeartbeatFunction(resourceId -> {
heartbeatRequestFuture.complete(resourceId);
return FutureUtils.completedVoidFuture();
}).setDisconnectResourceManagerConsumer(disconnectFuture::complete).build();
rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
final JobID jobId = new JobID();
final ResourceID jobMasterResourceId = ResourceID.generate();
final LeaderRetrievalService jobMasterLeaderRetrievalService = new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> {
assertThat(requestedJobId, is(equalTo(jobId)));
return jobMasterLeaderRetrievalService;
});
runHeartbeatTimeoutTest((ignore) -> {
}, resourceManagerGateway -> {
final CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerJobMaster(jobMasterGateway.getFencingToken(), jobMasterResourceId, jobMasterGateway.getAddress(), jobId, TIMEOUT);
assertThat(registrationFuture.get(), instanceOf(RegistrationResponse.Success.class));
}, resourceManagerResourceId -> {
// might have been completed or not depending whether the timeout was triggered
// first
final ResourceID optionalHeartbeatRequestOrigin = heartbeatRequestFuture.getNow(null);
assertThat(optionalHeartbeatRequestOrigin, anyOf(is(resourceManagerResourceId), is(nullValue())));
assertThat(disconnectFuture.get(), is(equalTo(resourceManagerId)));
});
}
use of org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway in project flink by apache.
the class TaskExecutorSubmissionTest method testFailingNotifyPartitionDataAvailable.
/**
* Test that a failing notifyPartitionDataAvailable call leads to the failing of the respective
* task.
*
* <p>IMPORTANT: We have to make sure that the invokable's cancel method is called, because only
* then the future is completed. We do this by not eagerly deploying consumer tasks and
* requiring the invokable to fill one memory segment. The completed memory segment will trigger
* the scheduling of the downstream operator since it is in pipeline mode. After we've filled
* the memory segment, we'll block the invokable and wait for the task failure due to the failed
* notifyPartitionDataAvailable call.
*/
@Test
public void testFailingNotifyPartitionDataAvailable() throws Exception {
final Configuration configuration = new Configuration();
// set the memory segment to the smallest size possible, because we have to fill one
// memory buffer to trigger notifyPartitionDataAvailable to the downstream
// operators
configuration.set(TaskManagerOptions.MEMORY_SEGMENT_SIZE, MemorySize.parse("4096"));
NettyShuffleDescriptor sdd = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), ResourceID.generate());
TaskDeploymentDescriptor tdd = createSender(sdd, TestingAbstractInvokables.TestInvokableRecordCancel.class);
ExecutionAttemptID eid = tdd.getExecutionAttemptId();
final CompletableFuture<Void> taskRunningFuture = new CompletableFuture<>();
final Exception exception = new Exception("Failed notifyPartitionDataAvailable");
final JobMasterId jobMasterId = JobMasterId.generate();
TestingJobMasterGateway testingJobMasterGateway = new TestingJobMasterGatewayBuilder().setFencingTokenSupplier(() -> jobMasterId).setNotifyPartitionDataAvailableFunction(resultPartitionID -> FutureUtils.completedExceptionally(exception)).build();
try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId).setSlotSize(1).setConfiguration(configuration).addTaskManagerActionListener(eid, ExecutionState.RUNNING, taskRunningFuture).setJobMasterId(jobMasterId).setJobMasterGateway(testingJobMasterGateway).useRealNonMockShuffleEnvironment().build()) {
TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();
TestingAbstractInvokables.TestInvokableRecordCancel.resetGotCanceledFuture();
taskSlotTable.allocateSlot(0, jobId, tdd.getAllocationId(), Time.seconds(60));
tmGateway.submitTask(tdd, jobMasterId, timeout).get();
taskRunningFuture.get();
CompletableFuture<Boolean> cancelFuture = TestingAbstractInvokables.TestInvokableRecordCancel.gotCanceled();
assertTrue(cancelFuture.get());
assertTrue(ExceptionUtils.findThrowableWithMessage(taskSlotTable.getTask(eid).getFailureCause(), exception.getMessage()).isPresent());
}
}
Aggregations