use of org.apache.flink.runtime.rpc.RpcMethod in project flink by apache.
the class TaskExecutor method requestSlot.
// ----------------------------------------------------------------------
// Slot allocation RPCs
// ----------------------------------------------------------------------
/**
* /**
* Requests a slot from the TaskManager
*
* @param slotId identifying the requested slot
* @param jobId identifying the job for which the request is issued
* @param allocationId id for the request
* @param targetAddress of the job manager requesting the slot
* @param rmLeaderId current leader id of the ResourceManager
* @throws SlotAllocationException if the slot allocation fails
* @return answer to the slot request
*/
@RpcMethod
public TMSlotRequestReply requestSlot(final SlotID slotId, final JobID jobId, final AllocationID allocationId, final String targetAddress, final UUID rmLeaderId) throws SlotAllocationException {
log.info("Receive slot request {} for job {} from resource manager with leader id {}.", allocationId, jobId, rmLeaderId);
if (resourceManagerConnection == null) {
final String message = "TaskManager is not connected to a resource manager.";
log.debug(message);
throw new SlotAllocationException(message);
}
if (!resourceManagerConnection.getTargetLeaderId().equals(rmLeaderId)) {
final String message = "The leader id " + rmLeaderId + " does not match with the leader id of the connected resource manager " + resourceManagerConnection.getTargetLeaderId() + '.';
log.debug(message);
throw new SlotAllocationException(message);
}
if (taskSlotTable.isSlotFree(slotId.getSlotNumber())) {
if (taskSlotTable.allocateSlot(slotId.getSlotNumber(), jobId, allocationId, taskManagerConfiguration.getTimeout())) {
log.info("Allocated slot for {}.", allocationId);
} else {
log.info("Could not allocate slot for {}.", allocationId);
throw new SlotAllocationException("Could not allocate slot.");
}
} else if (!taskSlotTable.isAllocated(slotId.getSlotNumber(), jobId, allocationId)) {
final String message = "The slot " + slotId + " has already been allocated for a different job.";
log.info(message);
throw new SlotAllocationException(message);
}
if (jobManagerTable.contains(jobId)) {
offerSlotsToJobManager(jobId);
} else {
try {
jobLeaderService.addJob(jobId, targetAddress);
} catch (Exception e) {
// free the allocated slot
try {
taskSlotTable.freeSlot(allocationId);
} catch (SlotNotFoundException slotNotFoundException) {
// slot no longer existent, this should actually never happen, because we've
// just allocated the slot. So let's fail hard in this case!
onFatalError(slotNotFoundException);
}
// sanity check
if (!taskSlotTable.isSlotFree(slotId.getSlotNumber())) {
onFatalError(new Exception("Could not free slot " + slotId));
}
throw new SlotAllocationException("Could not add job to job leader service.", e);
}
}
return new TMSlotRequestRegistered(resourceManagerConnection.getRegistrationId(), getResourceID(), allocationId);
}
use of org.apache.flink.runtime.rpc.RpcMethod in project flink by apache.
the class TaskExecutor method stopTask.
@RpcMethod
public Acknowledge stopTask(ExecutionAttemptID executionAttemptID) throws TaskException {
final Task task = taskSlotTable.getTask(executionAttemptID);
if (task != null) {
try {
task.stopExecution();
return Acknowledge.get();
} catch (Throwable t) {
throw new TaskException("Cannot stop task for execution " + executionAttemptID + '.', t);
}
} else {
final String message = "Cannot find task to stop for execution " + executionAttemptID + '.';
log.debug(message);
throw new TaskException(message);
}
}
use of org.apache.flink.runtime.rpc.RpcMethod in project flink by apache.
the class TaskExecutor method triggerCheckpoint.
// ----------------------------------------------------------------------
// Checkpointing RPCs
// ----------------------------------------------------------------------
@RpcMethod
public Acknowledge triggerCheckpoint(ExecutionAttemptID executionAttemptID, long checkpointId, long checkpointTimestamp, CheckpointOptions checkpointOptions) throws CheckpointException {
log.debug("Trigger checkpoint {}@{} for {}.", checkpointId, checkpointTimestamp, executionAttemptID);
final Task task = taskSlotTable.getTask(executionAttemptID);
if (task != null) {
task.triggerCheckpointBarrier(checkpointId, checkpointTimestamp, checkpointOptions);
return Acknowledge.get();
} else {
final String message = "TaskManager received a checkpoint request for unknown task " + executionAttemptID + '.';
log.debug(message);
throw new CheckpointException(message);
}
}
use of org.apache.flink.runtime.rpc.RpcMethod in project flink by apache.
the class TaskExecutor method cancelTask.
@RpcMethod
public Acknowledge cancelTask(ExecutionAttemptID executionAttemptID) throws TaskException {
final Task task = taskSlotTable.getTask(executionAttemptID);
if (task != null) {
try {
task.cancelExecution();
return Acknowledge.get();
} catch (Throwable t) {
throw new TaskException("Cannot cancel task for execution " + executionAttemptID + '.', t);
}
} else {
final String message = "Cannot find task to stop for execution " + executionAttemptID + '.';
log.debug(message);
throw new TaskException(message);
}
}
use of org.apache.flink.runtime.rpc.RpcMethod in project flink by apache.
the class ResourceManager method notifySlotAvailable.
/**
* Notification from a TaskExecutor that a slot has become available
* @param resourceManagerLeaderId TaskExecutor's resource manager leader id
* @param instanceID TaskExecutor's instance id
* @param slotId The slot id of the available slot
* @return SlotAvailableReply
*/
@RpcMethod
public void notifySlotAvailable(final UUID resourceManagerLeaderId, final InstanceID instanceID, final SlotID slotId) {
if (resourceManagerLeaderId.equals(leaderSessionId)) {
final ResourceID resourceId = slotId.getResourceID();
WorkerRegistration<WorkerType> registration = taskExecutors.get(resourceId);
if (registration != null) {
InstanceID registrationId = registration.getInstanceID();
if (registrationId.equals(instanceID)) {
slotManager.notifySlotAvailable(resourceId, slotId);
} else {
log.debug("Invalid registration id for slot available message. This indicates an" + " outdated request.");
}
} else {
log.debug("Could not find registration for resource id {}. Discarding the slot available" + "message {}.", resourceId, slotId);
}
} else {
log.debug("Discarding notify slot available message for slot {}, because the " + "leader id {} did not match the expected leader id {}.", slotId, resourceManagerLeaderId, leaderSessionId);
}
}
Aggregations