use of org.apache.flink.runtime.jobmaster.JobMasterId in project flink by apache.
the class ResourceManager method closeJobManagerConnection.
/**
* This method should be called by the framework once it detects that a currently registered job
* manager has failed.
*
* @param jobId identifying the job whose leader shall be disconnected.
* @param resourceRequirementHandling indicating how existing resource requirements for the
* corresponding job should be handled
* @param cause The exception which cause the JobManager failed.
*/
protected void closeJobManagerConnection(JobID jobId, ResourceRequirementHandling resourceRequirementHandling, Exception cause) {
JobManagerRegistration jobManagerRegistration = jobManagerRegistrations.remove(jobId);
if (jobManagerRegistration != null) {
final ResourceID jobManagerResourceId = jobManagerRegistration.getJobManagerResourceID();
final JobMasterGateway jobMasterGateway = jobManagerRegistration.getJobManagerGateway();
final JobMasterId jobMasterId = jobManagerRegistration.getJobMasterId();
log.info("Disconnect job manager {}@{} for job {} from the resource manager.", jobMasterId, jobMasterGateway.getAddress(), jobId);
jobManagerHeartbeatManager.unmonitorTarget(jobManagerResourceId);
jmResourceIdRegistrations.remove(jobManagerResourceId);
if (resourceRequirementHandling == ResourceRequirementHandling.CLEAR) {
slotManager.clearResourceRequirements(jobId);
}
// tell the job manager about the disconnect
jobMasterGateway.disconnectResourceManager(getFencingToken(), cause);
} else {
log.debug("There was no registered job manager for job {}.", jobId);
}
}
use of org.apache.flink.runtime.jobmaster.JobMasterId in project flink by apache.
the class ResourceManager method registerJobMaster.
// ------------------------------------------------------------------------
// RPC methods
// ------------------------------------------------------------------------
@Override
public CompletableFuture<RegistrationResponse> registerJobMaster(final JobMasterId jobMasterId, final ResourceID jobManagerResourceId, final String jobManagerAddress, final JobID jobId, final Time timeout) {
checkNotNull(jobMasterId);
checkNotNull(jobManagerResourceId);
checkNotNull(jobManagerAddress);
checkNotNull(jobId);
if (!jobLeaderIdService.containsJob(jobId)) {
try {
jobLeaderIdService.addJob(jobId);
} catch (Exception e) {
ResourceManagerException exception = new ResourceManagerException("Could not add the job " + jobId + " to the job id leader service.", e);
onFatalError(exception);
log.error("Could not add job {} to job leader id service.", jobId, e);
return FutureUtils.completedExceptionally(exception);
}
}
log.info("Registering job manager {}@{} for job {}.", jobMasterId, jobManagerAddress, jobId);
CompletableFuture<JobMasterId> jobMasterIdFuture;
try {
jobMasterIdFuture = jobLeaderIdService.getLeaderId(jobId);
} catch (Exception e) {
// we cannot check the job leader id so let's fail
// TODO: Maybe it's also ok to skip this check in case that we cannot check the leader
// id
ResourceManagerException exception = new ResourceManagerException("Cannot obtain the " + "job leader id future to verify the correct job leader.", e);
onFatalError(exception);
log.debug("Could not obtain the job leader id future to verify the correct job leader.");
return FutureUtils.completedExceptionally(exception);
}
CompletableFuture<JobMasterGateway> jobMasterGatewayFuture = getRpcService().connect(jobManagerAddress, jobMasterId, JobMasterGateway.class);
CompletableFuture<RegistrationResponse> registrationResponseFuture = jobMasterGatewayFuture.thenCombineAsync(jobMasterIdFuture, (JobMasterGateway jobMasterGateway, JobMasterId leadingJobMasterId) -> {
if (Objects.equals(leadingJobMasterId, jobMasterId)) {
return registerJobMasterInternal(jobMasterGateway, jobId, jobManagerAddress, jobManagerResourceId);
} else {
final String declineMessage = String.format("The leading JobMaster id %s did not match the received JobMaster id %s. " + "This indicates that a JobMaster leader change has happened.", leadingJobMasterId, jobMasterId);
log.debug(declineMessage);
return new RegistrationResponse.Failure(new FlinkException(declineMessage));
}
}, getMainThreadExecutor());
// handle exceptions which might have occurred in one of the futures inputs of combine
return registrationResponseFuture.handleAsync((RegistrationResponse registrationResponse, Throwable throwable) -> {
if (throwable != null) {
if (log.isDebugEnabled()) {
log.debug("Registration of job manager {}@{} failed.", jobMasterId, jobManagerAddress, throwable);
} else {
log.info("Registration of job manager {}@{} failed.", jobMasterId, jobManagerAddress);
}
return new RegistrationResponse.Failure(throwable);
} else {
return registrationResponse;
}
}, ioExecutor);
}
use of org.apache.flink.runtime.jobmaster.JobMasterId in project flink by apache.
the class TaskExecutor method internalOfferSlotsToJobManager.
private void internalOfferSlotsToJobManager(JobTable.Connection jobManagerConnection) {
final JobID jobId = jobManagerConnection.getJobId();
if (taskSlotTable.hasAllocatedSlots(jobId)) {
log.info("Offer reserved slots to the leader of job {}.", jobId);
final JobMasterGateway jobMasterGateway = jobManagerConnection.getJobManagerGateway();
final Iterator<TaskSlot<Task>> reservedSlotsIterator = taskSlotTable.getAllocatedSlots(jobId);
final JobMasterId jobMasterId = jobManagerConnection.getJobMasterId();
final Collection<SlotOffer> reservedSlots = new HashSet<>(2);
while (reservedSlotsIterator.hasNext()) {
SlotOffer offer = reservedSlotsIterator.next().generateSlotOffer();
reservedSlots.add(offer);
}
final UUID slotOfferId = UUID.randomUUID();
currentSlotOfferPerJob.put(jobId, slotOfferId);
CompletableFuture<Collection<SlotOffer>> acceptedSlotsFuture = jobMasterGateway.offerSlots(getResourceID(), reservedSlots, taskManagerConfiguration.getRpcTimeout());
acceptedSlotsFuture.whenCompleteAsync(handleAcceptedSlotOffers(jobId, jobMasterGateway, jobMasterId, reservedSlots, slotOfferId), getMainThreadExecutor());
} else {
log.debug("There are no unassigned slots for the job {}.", jobId);
}
}
use of org.apache.flink.runtime.jobmaster.JobMasterId in project flink by apache.
the class DeclarativeSlotPoolBridgeTest method testIfJobIsRestartingAllOfferedSlotsWillBeRegistered.
@Test
public void testIfJobIsRestartingAllOfferedSlotsWillBeRegistered() throws Exception {
final CompletableFuture<Void> registerSlotsCalledFuture = new CompletableFuture<>();
final TestingDeclarativeSlotPoolFactory declarativeSlotPoolFactory = new TestingDeclarativeSlotPoolFactory(TestingDeclarativeSlotPool.builder().setRegisterSlotsFunction((slotOffers, taskManagerLocation, taskManagerGateway, aLong) -> registerSlotsCalledFuture.complete(null)));
try (DeclarativeSlotPoolBridge declarativeSlotPoolBridge = createDeclarativeSlotPoolBridge(declarativeSlotPoolFactory, requestSlotMatchingStrategy)) {
declarativeSlotPoolBridge.start(jobMasterId, "localhost", mainThreadExecutor);
declarativeSlotPoolBridge.setIsJobRestarting(true);
final LocalTaskManagerLocation localTaskManagerLocation = new LocalTaskManagerLocation();
declarativeSlotPoolBridge.registerTaskManager(localTaskManagerLocation.getResourceID());
declarativeSlotPoolBridge.offerSlots(localTaskManagerLocation, new SimpleAckingTaskManagerGateway(), Collections.singleton(new SlotOffer(new AllocationID(), 0, ResourceProfile.ANY)));
// make sure that the register slots method is called
registerSlotsCalledFuture.join();
}
}
use of org.apache.flink.runtime.jobmaster.JobMasterId in project flink by apache.
the class DeclarativeSlotPoolBridgeTest method testNoConcurrentModificationWhenSuspendingAndReleasingSlot.
@Test
public void testNoConcurrentModificationWhenSuspendingAndReleasingSlot() throws Exception {
try (DeclarativeSlotPoolBridge declarativeSlotPoolBridge = createDeclarativeSlotPoolBridge(new DefaultDeclarativeSlotPoolFactory(), requestSlotMatchingStrategy)) {
declarativeSlotPoolBridge.start(jobMasterId, "localhost", mainThreadExecutor);
final List<SlotRequestId> slotRequestIds = Arrays.asList(new SlotRequestId(), new SlotRequestId());
final List<CompletableFuture<PhysicalSlot>> slotFutures = slotRequestIds.stream().map(slotRequestId -> {
final CompletableFuture<PhysicalSlot> slotFuture = declarativeSlotPoolBridge.requestNewAllocatedSlot(slotRequestId, ResourceProfile.UNKNOWN, rpcTimeout);
slotFuture.whenComplete((physicalSlot, throwable) -> {
if (throwable != null) {
declarativeSlotPoolBridge.releaseSlot(slotRequestId, throwable);
}
});
return slotFuture;
}).collect(Collectors.toList());
declarativeSlotPoolBridge.close();
try {
FutureUtils.waitForAll(slotFutures).get();
fail("The slot futures should be completed exceptionally.");
} catch (ExecutionException expected) {
// expected
}
}
}
Aggregations