Search in sources :

Example 16 with RpcMethod

use of org.apache.flink.runtime.rpc.RpcMethod in project flink by apache.

the class TaskExecutor method updatePartitions.

// ----------------------------------------------------------------------
// Partition lifecycle RPCs
// ----------------------------------------------------------------------
@RpcMethod
public Acknowledge updatePartitions(final ExecutionAttemptID executionAttemptID, Iterable<PartitionInfo> partitionInfos) throws PartitionException {
    final Task task = taskSlotTable.getTask(executionAttemptID);
    if (task != null) {
        for (final PartitionInfo partitionInfo : partitionInfos) {
            IntermediateDataSetID intermediateResultPartitionID = partitionInfo.getIntermediateDataSetID();
            final SingleInputGate singleInputGate = task.getInputGateById(intermediateResultPartitionID);
            if (singleInputGate != null) {
                // Run asynchronously because it might be blocking
                getRpcService().execute(new Runnable() {

                    @Override
                    public void run() {
                        try {
                            singleInputGate.updateInputChannel(partitionInfo.getInputChannelDeploymentDescriptor());
                        } catch (IOException | InterruptedException e) {
                            log.error("Could not update input data location for task {}. Trying to fail task.", task.getTaskInfo().getTaskName(), e);
                            try {
                                task.failExternally(e);
                            } catch (RuntimeException re) {
                                // TODO: Check whether we need this or make exception in failExtenally checked
                                log.error("Failed canceling task with execution ID {} after task update failure.", executionAttemptID, re);
                            }
                        }
                    }
                });
            } else {
                throw new PartitionException("No reader with ID " + intermediateResultPartitionID + " for task " + executionAttemptID + " was found.");
            }
        }
        return Acknowledge.get();
    } else {
        log.debug("Discard update for input partitions of task {}. Task is no longer running.", executionAttemptID);
        return Acknowledge.get();
    }
}
Also used : Task(org.apache.flink.runtime.taskmanager.Task) PartitionException(org.apache.flink.runtime.taskexecutor.exceptions.PartitionException) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) PartitionInfo(org.apache.flink.runtime.executiongraph.PartitionInfo) SingleInputGate(org.apache.flink.runtime.io.network.partition.consumer.SingleInputGate) RpcMethod(org.apache.flink.runtime.rpc.RpcMethod)

Example 17 with RpcMethod

use of org.apache.flink.runtime.rpc.RpcMethod in project flink by apache.

the class TaskExecutor method submitTask.

// ======================================================================
//  RPC methods
// ======================================================================
// ----------------------------------------------------------------------
// Task lifecycle RPCs
// ----------------------------------------------------------------------
@RpcMethod
public Acknowledge submitTask(TaskDeploymentDescriptor tdd, UUID jobManagerLeaderId) throws TaskSubmissionException {
    // first, deserialize the pre-serialized information
    final JobInformation jobInformation;
    final TaskInformation taskInformation;
    try {
        jobInformation = tdd.getSerializedJobInformation().deserializeValue(getClass().getClassLoader());
        taskInformation = tdd.getSerializedTaskInformation().deserializeValue(getClass().getClassLoader());
    } catch (IOException | ClassNotFoundException e) {
        throw new TaskSubmissionException("Could not deserialize the job or task information.", e);
    }
    final JobID jobId = jobInformation.getJobId();
    final JobManagerConnection jobManagerConnection = jobManagerTable.get(jobId);
    if (jobManagerConnection == null) {
        final String message = "Could not submit task because there is no JobManager " + "associated for the job " + jobId + '.';
        log.debug(message);
        throw new TaskSubmissionException(message);
    }
    if (!jobManagerConnection.getLeaderId().equals(jobManagerLeaderId)) {
        final String message = "Rejecting the task submission because the job manager leader id " + jobManagerLeaderId + " does not match the expected job manager leader id " + jobManagerConnection.getLeaderId() + '.';
        log.debug(message);
        throw new TaskSubmissionException(message);
    }
    if (!taskSlotTable.existsActiveSlot(jobId, tdd.getAllocationId())) {
        final String message = "No task slot allocated for job ID " + jobId + " and allocation ID " + tdd.getAllocationId() + '.';
        log.debug(message);
        throw new TaskSubmissionException(message);
    }
    TaskMetricGroup taskMetricGroup = taskManagerMetricGroup.addTaskForJob(jobInformation.getJobId(), jobInformation.getJobName(), taskInformation.getJobVertexId(), tdd.getExecutionAttemptId(), taskInformation.getTaskName(), tdd.getSubtaskIndex(), tdd.getAttemptNumber());
    InputSplitProvider inputSplitProvider = new RpcInputSplitProvider(jobManagerConnection.getLeaderId(), jobManagerConnection.getJobManagerGateway(), jobInformation.getJobId(), taskInformation.getJobVertexId(), tdd.getExecutionAttemptId(), taskManagerConfiguration.getTimeout());
    TaskManagerActions taskManagerActions = jobManagerConnection.getTaskManagerActions();
    CheckpointResponder checkpointResponder = jobManagerConnection.getCheckpointResponder();
    LibraryCacheManager libraryCache = jobManagerConnection.getLibraryCacheManager();
    ResultPartitionConsumableNotifier resultPartitionConsumableNotifier = jobManagerConnection.getResultPartitionConsumableNotifier();
    PartitionProducerStateChecker partitionStateChecker = jobManagerConnection.getPartitionStateChecker();
    Task task = new Task(jobInformation, taskInformation, tdd.getExecutionAttemptId(), tdd.getAllocationId(), tdd.getSubtaskIndex(), tdd.getAttemptNumber(), tdd.getProducedPartitions(), tdd.getInputGates(), tdd.getTargetSlotNumber(), tdd.getTaskStateHandles(), memoryManager, ioManager, networkEnvironment, broadcastVariableManager, taskManagerActions, inputSplitProvider, checkpointResponder, libraryCache, fileCache, taskManagerConfiguration, taskMetricGroup, resultPartitionConsumableNotifier, partitionStateChecker, getRpcService().getExecutor());
    log.info("Received task {}.", task.getTaskInfo().getTaskNameWithSubtasks());
    boolean taskAdded;
    try {
        taskAdded = taskSlotTable.addTask(task);
    } catch (SlotNotFoundException | SlotNotActiveException e) {
        throw new TaskSubmissionException("Could not submit task.", e);
    }
    if (taskAdded) {
        task.startTaskThread();
        return Acknowledge.get();
    } else {
        final String message = "TaskManager already contains a task for id " + task.getExecutionId() + '.';
        log.debug(message);
        throw new TaskSubmissionException(message);
    }
}
Also used : SlotNotFoundException(org.apache.flink.runtime.taskexecutor.slot.SlotNotFoundException) JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) Task(org.apache.flink.runtime.taskmanager.Task) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) TaskMetricGroup(org.apache.flink.runtime.metrics.groups.TaskMetricGroup) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) RpcCheckpointResponder(org.apache.flink.runtime.taskexecutor.rpc.RpcCheckpointResponder) SlotNotActiveException(org.apache.flink.runtime.taskexecutor.slot.SlotNotActiveException) RpcInputSplitProvider(org.apache.flink.runtime.taskexecutor.rpc.RpcInputSplitProvider) IOException(java.io.IOException) BlobLibraryCacheManager(org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager) LibraryCacheManager(org.apache.flink.runtime.execution.librarycache.LibraryCacheManager) TaskManagerActions(org.apache.flink.runtime.taskmanager.TaskManagerActions) TaskSubmissionException(org.apache.flink.runtime.taskexecutor.exceptions.TaskSubmissionException) PartitionProducerStateChecker(org.apache.flink.runtime.io.network.netty.PartitionProducerStateChecker) InputSplitProvider(org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider) RpcInputSplitProvider(org.apache.flink.runtime.taskexecutor.rpc.RpcInputSplitProvider) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) RpcResultPartitionConsumableNotifier(org.apache.flink.runtime.taskexecutor.rpc.RpcResultPartitionConsumableNotifier) JobID(org.apache.flink.api.common.JobID) RpcMethod(org.apache.flink.runtime.rpc.RpcMethod)

Example 18 with RpcMethod

use of org.apache.flink.runtime.rpc.RpcMethod in project flink by apache.

the class TaskExecutor method confirmCheckpoint.

@RpcMethod
public Acknowledge confirmCheckpoint(ExecutionAttemptID executionAttemptID, long checkpointId, long checkpointTimestamp) throws CheckpointException {
    log.debug("Confirm checkpoint {}@{} for {}.", checkpointId, checkpointTimestamp, executionAttemptID);
    final Task task = taskSlotTable.getTask(executionAttemptID);
    if (task != null) {
        task.notifyCheckpointComplete(checkpointId);
        return Acknowledge.get();
    } else {
        final String message = "TaskManager received a checkpoint confirmation for unknown task " + executionAttemptID + '.';
        log.debug(message);
        throw new CheckpointException(message);
    }
}
Also used : Task(org.apache.flink.runtime.taskmanager.Task) CheckpointException(org.apache.flink.runtime.taskexecutor.exceptions.CheckpointException) RpcMethod(org.apache.flink.runtime.rpc.RpcMethod)

Aggregations

RpcMethod (org.apache.flink.runtime.rpc.RpcMethod)18 Task (org.apache.flink.runtime.taskmanager.Task)6 IOException (java.io.IOException)5 TimeoutException (java.util.concurrent.TimeoutException)4 LeaderIdMismatchException (org.apache.flink.runtime.highavailability.LeaderIdMismatchException)4 PartitionProducerDisposedException (org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException)4 JobID (org.apache.flink.api.common.JobID)3 CheckpointException (org.apache.flink.runtime.checkpoint.CheckpointException)3 JobExecutionException (org.apache.flink.runtime.client.JobExecutionException)3 AllocatedSlot (org.apache.flink.runtime.jobmanager.slots.AllocatedSlot)3 CheckpointException (org.apache.flink.runtime.taskexecutor.exceptions.CheckpointException)3 TaskException (org.apache.flink.runtime.taskexecutor.exceptions.TaskException)3 CheckpointCoordinator (org.apache.flink.runtime.checkpoint.CheckpointCoordinator)2 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)2 Execution (org.apache.flink.runtime.executiongraph.Execution)2 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)2 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)2 JobManagerRegistration (org.apache.flink.runtime.resourcemanager.registration.JobManagerRegistration)2 PartitionException (org.apache.flink.runtime.taskexecutor.exceptions.PartitionException)2 TaskSubmissionException (org.apache.flink.runtime.taskexecutor.exceptions.TaskSubmissionException)2