Search in sources :

Example 21 with Task

use of org.apache.flink.runtime.taskmanager.Task in project flink by apache.

the class TaskExecutor method submitTask.

// ======================================================================
//  RPC methods
// ======================================================================
// ----------------------------------------------------------------------
// Task lifecycle RPCs
// ----------------------------------------------------------------------
@RpcMethod
public Acknowledge submitTask(TaskDeploymentDescriptor tdd, UUID jobManagerLeaderId) throws TaskSubmissionException {
    // first, deserialize the pre-serialized information
    final JobInformation jobInformation;
    final TaskInformation taskInformation;
    try {
        jobInformation = tdd.getSerializedJobInformation().deserializeValue(getClass().getClassLoader());
        taskInformation = tdd.getSerializedTaskInformation().deserializeValue(getClass().getClassLoader());
    } catch (IOException | ClassNotFoundException e) {
        throw new TaskSubmissionException("Could not deserialize the job or task information.", e);
    }
    final JobID jobId = jobInformation.getJobId();
    final JobManagerConnection jobManagerConnection = jobManagerTable.get(jobId);
    if (jobManagerConnection == null) {
        final String message = "Could not submit task because there is no JobManager " + "associated for the job " + jobId + '.';
        log.debug(message);
        throw new TaskSubmissionException(message);
    }
    if (!jobManagerConnection.getLeaderId().equals(jobManagerLeaderId)) {
        final String message = "Rejecting the task submission because the job manager leader id " + jobManagerLeaderId + " does not match the expected job manager leader id " + jobManagerConnection.getLeaderId() + '.';
        log.debug(message);
        throw new TaskSubmissionException(message);
    }
    if (!taskSlotTable.existsActiveSlot(jobId, tdd.getAllocationId())) {
        final String message = "No task slot allocated for job ID " + jobId + " and allocation ID " + tdd.getAllocationId() + '.';
        log.debug(message);
        throw new TaskSubmissionException(message);
    }
    TaskMetricGroup taskMetricGroup = taskManagerMetricGroup.addTaskForJob(jobInformation.getJobId(), jobInformation.getJobName(), taskInformation.getJobVertexId(), tdd.getExecutionAttemptId(), taskInformation.getTaskName(), tdd.getSubtaskIndex(), tdd.getAttemptNumber());
    InputSplitProvider inputSplitProvider = new RpcInputSplitProvider(jobManagerConnection.getLeaderId(), jobManagerConnection.getJobManagerGateway(), jobInformation.getJobId(), taskInformation.getJobVertexId(), tdd.getExecutionAttemptId(), taskManagerConfiguration.getTimeout());
    TaskManagerActions taskManagerActions = jobManagerConnection.getTaskManagerActions();
    CheckpointResponder checkpointResponder = jobManagerConnection.getCheckpointResponder();
    LibraryCacheManager libraryCache = jobManagerConnection.getLibraryCacheManager();
    ResultPartitionConsumableNotifier resultPartitionConsumableNotifier = jobManagerConnection.getResultPartitionConsumableNotifier();
    PartitionProducerStateChecker partitionStateChecker = jobManagerConnection.getPartitionStateChecker();
    Task task = new Task(jobInformation, taskInformation, tdd.getExecutionAttemptId(), tdd.getAllocationId(), tdd.getSubtaskIndex(), tdd.getAttemptNumber(), tdd.getProducedPartitions(), tdd.getInputGates(), tdd.getTargetSlotNumber(), tdd.getTaskStateHandles(), memoryManager, ioManager, networkEnvironment, broadcastVariableManager, taskManagerActions, inputSplitProvider, checkpointResponder, libraryCache, fileCache, taskManagerConfiguration, taskMetricGroup, resultPartitionConsumableNotifier, partitionStateChecker, getRpcService().getExecutor());
    log.info("Received task {}.", task.getTaskInfo().getTaskNameWithSubtasks());
    boolean taskAdded;
    try {
        taskAdded = taskSlotTable.addTask(task);
    } catch (SlotNotFoundException | SlotNotActiveException e) {
        throw new TaskSubmissionException("Could not submit task.", e);
    }
    if (taskAdded) {
        task.startTaskThread();
        return Acknowledge.get();
    } else {
        final String message = "TaskManager already contains a task for id " + task.getExecutionId() + '.';
        log.debug(message);
        throw new TaskSubmissionException(message);
    }
}
Also used : SlotNotFoundException(org.apache.flink.runtime.taskexecutor.slot.SlotNotFoundException) JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) Task(org.apache.flink.runtime.taskmanager.Task) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) TaskMetricGroup(org.apache.flink.runtime.metrics.groups.TaskMetricGroup) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) RpcCheckpointResponder(org.apache.flink.runtime.taskexecutor.rpc.RpcCheckpointResponder) SlotNotActiveException(org.apache.flink.runtime.taskexecutor.slot.SlotNotActiveException) RpcInputSplitProvider(org.apache.flink.runtime.taskexecutor.rpc.RpcInputSplitProvider) IOException(java.io.IOException) BlobLibraryCacheManager(org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager) LibraryCacheManager(org.apache.flink.runtime.execution.librarycache.LibraryCacheManager) TaskManagerActions(org.apache.flink.runtime.taskmanager.TaskManagerActions) TaskSubmissionException(org.apache.flink.runtime.taskexecutor.exceptions.TaskSubmissionException) PartitionProducerStateChecker(org.apache.flink.runtime.io.network.netty.PartitionProducerStateChecker) InputSplitProvider(org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider) RpcInputSplitProvider(org.apache.flink.runtime.taskexecutor.rpc.RpcInputSplitProvider) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) RpcResultPartitionConsumableNotifier(org.apache.flink.runtime.taskexecutor.rpc.RpcResultPartitionConsumableNotifier) JobID(org.apache.flink.api.common.JobID) RpcMethod(org.apache.flink.runtime.rpc.RpcMethod)

Example 22 with Task

use of org.apache.flink.runtime.taskmanager.Task in project flink by apache.

the class TaskExecutor method confirmCheckpoint.

@RpcMethod
public Acknowledge confirmCheckpoint(ExecutionAttemptID executionAttemptID, long checkpointId, long checkpointTimestamp) throws CheckpointException {
    log.debug("Confirm checkpoint {}@{} for {}.", checkpointId, checkpointTimestamp, executionAttemptID);
    final Task task = taskSlotTable.getTask(executionAttemptID);
    if (task != null) {
        task.notifyCheckpointComplete(checkpointId);
        return Acknowledge.get();
    } else {
        final String message = "TaskManager received a checkpoint confirmation for unknown task " + executionAttemptID + '.';
        log.debug(message);
        throw new CheckpointException(message);
    }
}
Also used : Task(org.apache.flink.runtime.taskmanager.Task) CheckpointException(org.apache.flink.runtime.taskexecutor.exceptions.CheckpointException) RpcMethod(org.apache.flink.runtime.rpc.RpcMethod)

Example 23 with Task

use of org.apache.flink.runtime.taskmanager.Task in project flink by apache.

the class BlockingCheckpointsTest method createTask.

// ------------------------------------------------------------------------
//  Utilities
// ------------------------------------------------------------------------
private static Task createTask(Configuration taskConfig) throws IOException {
    JobInformation jobInformation = new JobInformation(new JobID(), "test job name", new SerializedValue<>(new ExecutionConfig()), new Configuration(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList());
    TaskInformation taskInformation = new TaskInformation(new JobVertexID(), "test task name", 1, 11, TestStreamTask.class.getName(), taskConfig);
    TaskKvStateRegistry mockKvRegistry = mock(TaskKvStateRegistry.class);
    NetworkEnvironment network = mock(NetworkEnvironment.class);
    when(network.createKvStateTaskRegistry(any(JobID.class), any(JobVertexID.class))).thenReturn(mockKvRegistry);
    return new Task(jobInformation, taskInformation, new ExecutionAttemptID(), new AllocationID(), 0, 0, Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), 0, null, mock(MemoryManager.class), mock(IOManager.class), network, mock(BroadcastVariableManager.class), mock(TaskManagerActions.class), mock(InputSplitProvider.class), mock(CheckpointResponder.class), new FallbackLibraryCacheManager(), new FileCache(new String[] { EnvironmentInformation.getTemporaryFileDirectory() }), new TestingTaskManagerRuntimeInfo(), new UnregisteredTaskMetricsGroup(), mock(ResultPartitionConsumableNotifier.class), mock(PartitionProducerStateChecker.class), Executors.directExecutor());
}
Also used : Task(org.apache.flink.runtime.taskmanager.Task) Configuration(org.apache.flink.configuration.Configuration) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) TaskKvStateRegistry(org.apache.flink.runtime.query.TaskKvStateRegistry) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TaskManagerActions(org.apache.flink.runtime.taskmanager.TaskManagerActions) TestingTaskManagerRuntimeInfo(org.apache.flink.runtime.util.TestingTaskManagerRuntimeInfo) BroadcastVariableManager(org.apache.flink.runtime.broadcast.BroadcastVariableManager) PartitionProducerStateChecker(org.apache.flink.runtime.io.network.netty.PartitionProducerStateChecker) InputSplitProvider(org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) UnregisteredTaskMetricsGroup(org.apache.flink.runtime.operators.testutils.UnregisteredTaskMetricsGroup) JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) FallbackLibraryCacheManager(org.apache.flink.runtime.execution.librarycache.FallbackLibraryCacheManager) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) FileCache(org.apache.flink.runtime.filecache.FileCache) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) JobID(org.apache.flink.api.common.JobID)

Example 24 with Task

use of org.apache.flink.runtime.taskmanager.Task in project flink by apache.

the class InterruptSensitiveRestoreTest method createTask.

// ------------------------------------------------------------------------
//  Utilities
// ------------------------------------------------------------------------
private static Task createTask(Configuration taskConfig, StreamStateHandle state, int mode) throws IOException {
    NetworkEnvironment networkEnvironment = mock(NetworkEnvironment.class);
    when(networkEnvironment.createKvStateTaskRegistry(any(JobID.class), any(JobVertexID.class))).thenReturn(mock(TaskKvStateRegistry.class));
    ChainedStateHandle<StreamStateHandle> operatorState = null;
    List<KeyGroupsStateHandle> keyGroupStateFromBackend = Collections.emptyList();
    List<KeyGroupsStateHandle> keyGroupStateFromStream = Collections.emptyList();
    List<Collection<OperatorStateHandle>> operatorStateBackend = Collections.emptyList();
    List<Collection<OperatorStateHandle>> operatorStateStream = Collections.emptyList();
    Map<String, OperatorStateHandle.StateMetaInfo> operatorStateMetadata = new HashMap<>(1);
    OperatorStateHandle.StateMetaInfo metaInfo = new OperatorStateHandle.StateMetaInfo(new long[] { 0 }, OperatorStateHandle.Mode.SPLIT_DISTRIBUTE);
    operatorStateMetadata.put(DefaultOperatorStateBackend.DEFAULT_OPERATOR_STATE_NAME, metaInfo);
    KeyGroupRangeOffsets keyGroupRangeOffsets = new KeyGroupRangeOffsets(new KeyGroupRange(0, 0));
    Collection<OperatorStateHandle> operatorStateHandles = Collections.singletonList(new OperatorStateHandle(operatorStateMetadata, state));
    List<KeyGroupsStateHandle> keyGroupsStateHandles = Collections.singletonList(new KeyGroupsStateHandle(keyGroupRangeOffsets, state));
    switch(mode) {
        case OPERATOR_MANAGED:
            operatorStateBackend = Collections.singletonList(operatorStateHandles);
            break;
        case OPERATOR_RAW:
            operatorStateStream = Collections.singletonList(operatorStateHandles);
            break;
        case KEYED_MANAGED:
            keyGroupStateFromBackend = keyGroupsStateHandles;
            break;
        case KEYED_RAW:
            keyGroupStateFromStream = keyGroupsStateHandles;
            break;
        case LEGACY:
            operatorState = new ChainedStateHandle<>(Collections.singletonList(state));
            break;
        default:
            throw new IllegalArgumentException();
    }
    TaskStateHandles taskStateHandles = new TaskStateHandles(operatorState, operatorStateBackend, operatorStateStream, keyGroupStateFromBackend, keyGroupStateFromStream);
    JobInformation jobInformation = new JobInformation(new JobID(), "test job name", new SerializedValue<>(new ExecutionConfig()), new Configuration(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList());
    TaskInformation taskInformation = new TaskInformation(new JobVertexID(), "test task name", 1, 1, SourceStreamTask.class.getName(), taskConfig);
    return new Task(jobInformation, taskInformation, new ExecutionAttemptID(), new AllocationID(), 0, 0, Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), 0, taskStateHandles, mock(MemoryManager.class), mock(IOManager.class), networkEnvironment, mock(BroadcastVariableManager.class), mock(TaskManagerActions.class), mock(InputSplitProvider.class), mock(CheckpointResponder.class), new FallbackLibraryCacheManager(), new FileCache(new String[] { EnvironmentInformation.getTemporaryFileDirectory() }), new TestingTaskManagerRuntimeInfo(), new UnregisteredTaskMetricsGroup(), mock(ResultPartitionConsumableNotifier.class), mock(PartitionProducerStateChecker.class), mock(Executor.class));
}
Also used : Task(org.apache.flink.runtime.taskmanager.Task) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) KeyGroupRangeOffsets(org.apache.flink.runtime.state.KeyGroupRangeOffsets) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) TaskKvStateRegistry(org.apache.flink.runtime.query.TaskKvStateRegistry) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) TaskManagerActions(org.apache.flink.runtime.taskmanager.TaskManagerActions) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) TestingTaskManagerRuntimeInfo(org.apache.flink.runtime.util.TestingTaskManagerRuntimeInfo) Executor(java.util.concurrent.Executor) BroadcastVariableManager(org.apache.flink.runtime.broadcast.BroadcastVariableManager) PartitionProducerStateChecker(org.apache.flink.runtime.io.network.netty.PartitionProducerStateChecker) InputSplitProvider(org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) UnregisteredTaskMetricsGroup(org.apache.flink.runtime.operators.testutils.UnregisteredTaskMetricsGroup) JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) FallbackLibraryCacheManager(org.apache.flink.runtime.execution.librarycache.FallbackLibraryCacheManager) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) FileCache(org.apache.flink.runtime.filecache.FileCache) TaskStateHandles(org.apache.flink.runtime.state.TaskStateHandles) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) Collection(java.util.Collection) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) JobID(org.apache.flink.api.common.JobID)

Example 25 with Task

use of org.apache.flink.runtime.taskmanager.Task in project flink by apache.

the class InterruptSensitiveRestoreTest method testRestoreWithInterrupt.

private void testRestoreWithInterrupt(int mode) throws Exception {
    IN_RESTORE_LATCH.reset();
    Configuration taskConfig = new Configuration();
    StreamConfig cfg = new StreamConfig(taskConfig);
    cfg.setTimeCharacteristic(TimeCharacteristic.ProcessingTime);
    switch(mode) {
        case OPERATOR_MANAGED:
        case OPERATOR_RAW:
        case KEYED_MANAGED:
        case KEYED_RAW:
            cfg.setStateKeySerializer(IntSerializer.INSTANCE);
            cfg.setStreamOperator(new StreamSource<>(new TestSource()));
            break;
        case LEGACY:
            cfg.setStreamOperator(new StreamSource<>(new TestSourceLegacy()));
            break;
        default:
            throw new IllegalArgumentException();
    }
    StreamStateHandle lockingHandle = new InterruptLockingStateHandle();
    Task task = createTask(taskConfig, lockingHandle, mode);
    // start the task and wait until it is in "restore"
    task.startTaskThread();
    IN_RESTORE_LATCH.await();
    // trigger cancellation and signal to continue
    task.cancelExecution();
    task.getExecutingThread().join(30000);
    if (task.getExecutionState() == ExecutionState.CANCELING) {
        fail("Task is stuck and not canceling");
    }
    assertEquals(ExecutionState.CANCELED, task.getExecutionState());
    assertNull(task.getFailureCause());
}
Also used : Task(org.apache.flink.runtime.taskmanager.Task) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) Configuration(org.apache.flink.configuration.Configuration) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig)

Aggregations

Task (org.apache.flink.runtime.taskmanager.Task)25 Configuration (org.apache.flink.configuration.Configuration)13 Test (org.junit.Test)10 StreamConfig (org.apache.flink.streaming.api.graph.StreamConfig)9 JobID (org.apache.flink.api.common.JobID)6 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)6 RpcMethod (org.apache.flink.runtime.rpc.RpcMethod)6 JobInformation (org.apache.flink.runtime.executiongraph.JobInformation)5 TaskInformation (org.apache.flink.runtime.executiongraph.TaskInformation)5 PartitionProducerStateChecker (org.apache.flink.runtime.io.network.netty.PartitionProducerStateChecker)5 ResultPartitionConsumableNotifier (org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier)5 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)5 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)4 BroadcastVariableManager (org.apache.flink.runtime.broadcast.BroadcastVariableManager)4 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)4 FileCache (org.apache.flink.runtime.filecache.FileCache)4 IOManager (org.apache.flink.runtime.io.disk.iomanager.IOManager)4 NetworkEnvironment (org.apache.flink.runtime.io.network.NetworkEnvironment)4 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)4 InputSplitProvider (org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider)4