use of org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway in project flink by apache.
the class Execution method sendReleaseIntermediateResultPartitionsRpcCall.
private void sendReleaseIntermediateResultPartitionsRpcCall() {
LOG.info("Discarding the results produced by task execution {}.", attemptId);
final LogicalSlot slot = assignedResource;
if (slot != null) {
final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway();
final ShuffleMaster<?> shuffleMaster = getVertex().getExecutionGraphAccessor().getShuffleMaster();
Set<ResultPartitionID> partitionIds = producedPartitions.values().stream().filter(resultPartitionDeploymentDescriptor -> resultPartitionDeploymentDescriptor.getPartitionType().isPipelined()).map(ResultPartitionDeploymentDescriptor::getShuffleDescriptor).peek(shuffleMaster::releasePartitionExternally).map(ShuffleDescriptor::getResultPartitionID).collect(Collectors.toSet());
if (!partitionIds.isEmpty()) {
// TODO For some tests this could be a problem when querying too early if all
// resources were released
taskManagerGateway.releasePartitions(getVertex().getJobId(), partitionIds);
}
}
}
use of org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway in project flink by apache.
the class Execution method notifyCheckpointAborted.
/**
* Notify the task of this execution about a aborted checkpoint.
*
* @param abortCheckpointId of the subsumed checkpoint
* @param latestCompletedCheckpointId of the latest completed checkpoint
* @param timestamp of the subsumed checkpoint
*/
public void notifyCheckpointAborted(long abortCheckpointId, long latestCompletedCheckpointId, long timestamp) {
final LogicalSlot slot = assignedResource;
if (slot != null) {
final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway();
taskManagerGateway.notifyCheckpointAborted(attemptId, getVertex().getJobId(), abortCheckpointId, latestCompletedCheckpointId, timestamp);
} else {
LOG.debug("The execution has no slot assigned. This indicates that the execution is " + "no longer running.");
}
}
use of org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway in project flink by apache.
the class Execution method sendCancelRpcCall.
/**
* This method sends a CancelTask message to the instance of the assigned slot.
*
* <p>The sending is tried up to NUM_CANCEL_CALL_TRIES times.
*/
private void sendCancelRpcCall(int numberRetries) {
final LogicalSlot slot = assignedResource;
if (slot != null) {
final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway();
final ComponentMainThreadExecutor jobMasterMainThreadExecutor = getVertex().getExecutionGraphAccessor().getJobMasterMainThreadExecutor();
CompletableFuture<Acknowledge> cancelResultFuture = FutureUtils.retry(() -> taskManagerGateway.cancelTask(attemptId, rpcTimeout), numberRetries, jobMasterMainThreadExecutor);
cancelResultFuture.whenComplete((ack, failure) -> {
if (failure != null) {
fail(new Exception("Task could not be canceled.", failure));
}
});
}
}
use of org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway in project flink by apache.
the class Execution method deploy.
/**
* Deploys the execution to the previously assigned resource.
*
* @throws JobException if the execution cannot be deployed to the assigned resource
*/
public void deploy() throws JobException {
assertRunningInJobMasterMainThread();
final LogicalSlot slot = assignedResource;
checkNotNull(slot, "In order to deploy the execution we first have to assign a resource via tryAssignResource.");
// The more general check is the rpcTimeout of the deployment call
if (!slot.isAlive()) {
throw new JobException("Target slot (TaskManager) for deployment is no longer alive.");
}
// make sure exactly one deployment call happens from the correct state
ExecutionState previous = this.state;
if (previous == SCHEDULED) {
if (!transitionState(previous, DEPLOYING)) {
// this should actually not happen and indicates a race somewhere else
throw new IllegalStateException("Cannot deploy task: Concurrent deployment call race.");
}
} else {
// vertex may have been cancelled, or it was already scheduled
throw new IllegalStateException("The vertex must be in SCHEDULED state to be deployed. Found state " + previous);
}
if (this != slot.getPayload()) {
throw new IllegalStateException(String.format("The execution %s has not been assigned to the assigned slot.", this));
}
try {
// race double check, did we fail/cancel and do we need to release the slot?
if (this.state != DEPLOYING) {
slot.releaseSlot(new FlinkException("Actual state of execution " + this + " (" + state + ") does not match expected state DEPLOYING."));
return;
}
LOG.info("Deploying {} (attempt #{}) with attempt id {} and vertex id {} to {} with allocation id {}", vertex.getTaskNameWithSubtaskIndex(), attemptNumber, vertex.getCurrentExecutionAttempt().getAttemptId(), vertex.getID(), getAssignedResourceLocation(), slot.getAllocationId());
final TaskDeploymentDescriptor deployment = TaskDeploymentDescriptorFactory.fromExecutionVertex(vertex, attemptNumber).createDeploymentDescriptor(slot.getAllocationId(), taskRestore, producedPartitions.values());
// null taskRestore to let it be GC'ed
taskRestore = null;
final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway();
final ComponentMainThreadExecutor jobMasterMainThreadExecutor = vertex.getExecutionGraphAccessor().getJobMasterMainThreadExecutor();
getVertex().notifyPendingDeployment(this);
// We run the submission in the future executor so that the serialization of large TDDs
// does not block
// the main thread and sync back to the main thread once submission is completed.
CompletableFuture.supplyAsync(() -> taskManagerGateway.submitTask(deployment, rpcTimeout), executor).thenCompose(Function.identity()).whenCompleteAsync((ack, failure) -> {
if (failure == null) {
vertex.notifyCompletedDeployment(this);
} else {
final Throwable actualFailure = ExceptionUtils.stripCompletionException(failure);
if (actualFailure instanceof TimeoutException) {
String taskname = vertex.getTaskNameWithSubtaskIndex() + " (" + attemptId + ')';
markFailed(new Exception("Cannot deploy task " + taskname + " - TaskManager (" + getAssignedResourceLocation() + ") not responding after a rpcTimeout of " + rpcTimeout, actualFailure));
} else {
markFailed(actualFailure);
}
}
}, jobMasterMainThreadExecutor);
} catch (Throwable t) {
markFailed(t);
}
}
use of org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway in project flink by apache.
the class Execution method notifyCheckpointOnComplete.
/**
* Notify the task of this execution about a completed checkpoint and the last subsumed
* checkpoint id if possible.
*
* @param completedCheckpointId of the completed checkpoint
* @param completedTimestamp of the completed checkpoint
* @param lastSubsumedCheckpointId of the last subsumed checkpoint, a value of {@link
* org.apache.flink.runtime.checkpoint.CheckpointStoreUtil#INVALID_CHECKPOINT_ID} means no
* checkpoint has been subsumed.
*/
public void notifyCheckpointOnComplete(long completedCheckpointId, long completedTimestamp, long lastSubsumedCheckpointId) {
final LogicalSlot slot = assignedResource;
if (slot != null) {
final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway();
taskManagerGateway.notifyCheckpointOnComplete(attemptId, getVertex().getJobId(), completedCheckpointId, completedTimestamp, lastSubsumedCheckpointId);
} else {
LOG.debug("The execution has no slot assigned. This indicates that the execution is " + "no longer running.");
}
}
Aggregations