Search in sources :

Example 6 with TaskInformation

use of org.apache.flink.runtime.executiongraph.TaskInformation in project flink by apache.

the class TaskDeploymentDescriptorTest method testSerialization.

@Test
public void testSerialization() {
    try {
        final JobID jobID = new JobID();
        final JobVertexID vertexID = new JobVertexID();
        final ExecutionAttemptID execId = new ExecutionAttemptID();
        final AllocationID allocationId = new AllocationID();
        final String jobName = "job name";
        final String taskName = "task name";
        final int numberOfKeyGroups = 1;
        final int indexInSubtaskGroup = 0;
        final int currentNumberOfSubtasks = 1;
        final int attemptNumber = 0;
        final Configuration jobConfiguration = new Configuration();
        final Configuration taskConfiguration = new Configuration();
        final Class<? extends AbstractInvokable> invokableClass = BatchTask.class;
        final List<ResultPartitionDeploymentDescriptor> producedResults = new ArrayList<ResultPartitionDeploymentDescriptor>(0);
        final List<InputGateDeploymentDescriptor> inputGates = new ArrayList<InputGateDeploymentDescriptor>(0);
        final List<BlobKey> requiredJars = new ArrayList<BlobKey>(0);
        final List<URL> requiredClasspaths = new ArrayList<URL>(0);
        final SerializedValue<ExecutionConfig> executionConfig = new SerializedValue<>(new ExecutionConfig());
        final SerializedValue<JobInformation> serializedJobInformation = new SerializedValue<>(new JobInformation(jobID, jobName, executionConfig, jobConfiguration, requiredJars, requiredClasspaths));
        final SerializedValue<TaskInformation> serializedJobVertexInformation = new SerializedValue<>(new TaskInformation(vertexID, taskName, currentNumberOfSubtasks, numberOfKeyGroups, invokableClass.getName(), taskConfiguration));
        final int targetSlotNumber = 47;
        final TaskStateHandles taskStateHandles = new TaskStateHandles();
        final TaskDeploymentDescriptor orig = new TaskDeploymentDescriptor(serializedJobInformation, serializedJobVertexInformation, execId, allocationId, indexInSubtaskGroup, attemptNumber, targetSlotNumber, taskStateHandles, producedResults, inputGates);
        final TaskDeploymentDescriptor copy = CommonTestUtils.createCopySerializable(orig);
        assertFalse(orig.getSerializedJobInformation() == copy.getSerializedJobInformation());
        assertFalse(orig.getSerializedTaskInformation() == copy.getSerializedTaskInformation());
        assertFalse(orig.getExecutionAttemptId() == copy.getExecutionAttemptId());
        assertFalse(orig.getTaskStateHandles() == copy.getTaskStateHandles());
        assertFalse(orig.getProducedPartitions() == copy.getProducedPartitions());
        assertFalse(orig.getInputGates() == copy.getInputGates());
        assertEquals(orig.getSerializedJobInformation(), copy.getSerializedJobInformation());
        assertEquals(orig.getSerializedTaskInformation(), copy.getSerializedTaskInformation());
        assertEquals(orig.getExecutionAttemptId(), copy.getExecutionAttemptId());
        assertEquals(orig.getAllocationId(), copy.getAllocationId());
        assertEquals(orig.getSubtaskIndex(), copy.getSubtaskIndex());
        assertEquals(orig.getAttemptNumber(), copy.getAttemptNumber());
        assertEquals(orig.getTargetSlotNumber(), copy.getTargetSlotNumber());
        assertEquals(orig.getTaskStateHandles(), copy.getTaskStateHandles());
        assertEquals(orig.getProducedPartitions(), copy.getProducedPartitions());
        assertEquals(orig.getInputGates(), copy.getInputGates());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) URL(java.net.URL) BlobKey(org.apache.flink.runtime.blob.BlobKey) JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) BatchTask(org.apache.flink.runtime.operators.BatchTask) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) SerializedValue(org.apache.flink.util.SerializedValue) TaskStateHandles(org.apache.flink.runtime.state.TaskStateHandles) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 7 with TaskInformation

use of org.apache.flink.runtime.executiongraph.TaskInformation in project flink by apache.

the class SavepointITCase method testTriggerSavepointAndResumeWithFileBasedCheckpoints.

/**
	 * Triggers a savepoint for a job that uses the FsStateBackend. We expect
	 * that all checkpoint files are written to a new savepoint directory.
	 *
	 * <ol>
	 * <li>Submit job, wait for some progress</li>
	 * <li>Trigger savepoint and verify that savepoint has been created</li>
	 * <li>Shut down the cluster, re-submit the job from the savepoint,
	 * verify that the initial state has been reset, and
	 * all tasks are running again</li>
	 * <li>Cancel job, dispose the savepoint, and verify that everything
	 * has been cleaned up</li>
	 * </ol>
	 */
@Test
public void testTriggerSavepointAndResumeWithFileBasedCheckpoints() throws Exception {
    // Config
    final int numTaskManagers = 2;
    final int numSlotsPerTaskManager = 2;
    final int parallelism = numTaskManagers * numSlotsPerTaskManager;
    final Deadline deadline = new FiniteDuration(5, TimeUnit.MINUTES).fromNow();
    final File testRoot = folder.newFolder();
    TestingCluster flink = null;
    try {
        // Create a test actor system
        ActorSystem testActorSystem = AkkaUtils.createDefaultActorSystem();
        // Flink configuration
        final Configuration config = new Configuration();
        config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTaskManagers);
        config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, numSlotsPerTaskManager);
        final File checkpointDir = new File(testRoot, "checkpoints");
        final File savepointRootDir = new File(testRoot, "savepoints");
        if (!checkpointDir.mkdir() || !savepointRootDir.mkdirs()) {
            fail("Test setup failed: failed to create temporary directories.");
        }
        // Use file based checkpoints
        config.setString(CoreOptions.STATE_BACKEND, "filesystem");
        config.setString(FsStateBackendFactory.CHECKPOINT_DIRECTORY_URI_CONF_KEY, checkpointDir.toURI().toString());
        config.setString(FsStateBackendFactory.MEMORY_THRESHOLD_CONF_KEY, "0");
        config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, savepointRootDir.toURI().toString());
        // Start Flink
        flink = new TestingCluster(config);
        flink.start(true);
        // Submit the job
        final JobGraph jobGraph = createJobGraph(parallelism, 0, 1000);
        final JobID jobId = jobGraph.getJobID();
        // Reset the static test job helpers
        StatefulCounter.resetForTest(parallelism);
        // Retrieve the job manager
        ActorGateway jobManager = Await.result(flink.leaderGateway().future(), deadline.timeLeft());
        LOG.info("Submitting job " + jobGraph.getJobID() + " in detached mode.");
        flink.submitJobDetached(jobGraph);
        LOG.info("Waiting for some progress.");
        // wait for the JobManager to be ready
        Future<Object> allRunning = jobManager.ask(new WaitForAllVerticesToBeRunning(jobId), deadline.timeLeft());
        Await.ready(allRunning, deadline.timeLeft());
        // wait for the Tasks to be ready
        StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
        LOG.info("Triggering a savepoint.");
        Future<Object> savepointPathFuture = jobManager.ask(new TriggerSavepoint(jobId, Option.<String>empty()), deadline.timeLeft());
        final String savepointPath = ((TriggerSavepointSuccess) Await.result(savepointPathFuture, deadline.timeLeft())).savepointPath();
        LOG.info("Retrieved savepoint path: " + savepointPath + ".");
        // Retrieve the savepoint from the testing job manager
        LOG.info("Requesting the savepoint.");
        Future<Object> savepointFuture = jobManager.ask(new RequestSavepoint(savepointPath), deadline.timeLeft());
        SavepointV1 savepoint = (SavepointV1) ((ResponseSavepoint) Await.result(savepointFuture, deadline.timeLeft())).savepoint();
        LOG.info("Retrieved savepoint: " + savepointPath + ".");
        // Shut down the Flink cluster (thereby canceling the job)
        LOG.info("Shutting down Flink cluster.");
        flink.shutdown();
        flink.awaitTermination();
        // - Verification START -------------------------------------------
        // Only one savepoint should exist
        File[] files = savepointRootDir.listFiles();
        if (files != null) {
            assertEquals("Savepoint not created in expected directory", 1, files.length);
            assertTrue("Savepoint did not create self-contained directory", files[0].isDirectory());
            File savepointDir = files[0];
            File[] savepointFiles = savepointDir.listFiles();
            assertNotNull(savepointFiles);
            // Expect one metadata file and one checkpoint file per stateful
            // parallel subtask
            String errMsg = "Did not write expected number of savepoint/checkpoint files to directory: " + Arrays.toString(savepointFiles);
            assertEquals(errMsg, 1 + parallelism, savepointFiles.length);
        } else {
            fail("Savepoint not created in expected directory");
        }
        // We currently have the following directory layout: checkpointDir/jobId/chk-ID
        File jobCheckpoints = new File(checkpointDir, jobId.toString());
        if (jobCheckpoints.exists()) {
            files = jobCheckpoints.listFiles();
            assertNotNull("Checkpoint directory empty", files);
            assertEquals("Checkpoints directory not clean: " + Arrays.toString(files), 0, files.length);
        }
        // - Verification END ---------------------------------------------
        // Restart the cluster
        LOG.info("Restarting Flink cluster.");
        flink.start();
        // Retrieve the job manager
        LOG.info("Retrieving JobManager.");
        jobManager = Await.result(flink.leaderGateway().future(), deadline.timeLeft());
        LOG.info("JobManager: " + jobManager + ".");
        // Reset static test helpers
        StatefulCounter.resetForTest(parallelism);
        // Gather all task deployment descriptors
        final Throwable[] error = new Throwable[1];
        final TestingCluster finalFlink = flink;
        final Multimap<JobVertexID, TaskDeploymentDescriptor> tdds = HashMultimap.create();
        new JavaTestKit(testActorSystem) {

            {
                new Within(deadline.timeLeft()) {

                    @Override
                    protected void run() {
                        try {
                            // Register to all submit task messages for job
                            for (ActorRef taskManager : finalFlink.getTaskManagersAsJava()) {
                                taskManager.tell(new TestingTaskManagerMessages.RegisterSubmitTaskListener(jobId), getTestActor());
                            }
                            // Set the savepoint path
                            jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
                            LOG.info("Resubmitting job " + jobGraph.getJobID() + " with " + "savepoint path " + savepointPath + " in detached mode.");
                            // Submit the job
                            finalFlink.submitJobDetached(jobGraph);
                            int numTasks = 0;
                            for (JobVertex jobVertex : jobGraph.getVertices()) {
                                numTasks += jobVertex.getParallelism();
                            }
                            // Gather the task deployment descriptors
                            LOG.info("Gathering " + numTasks + " submitted " + "TaskDeploymentDescriptor instances.");
                            for (int i = 0; i < numTasks; i++) {
                                ResponseSubmitTaskListener resp = (ResponseSubmitTaskListener) expectMsgAnyClassOf(getRemainingTime(), ResponseSubmitTaskListener.class);
                                TaskDeploymentDescriptor tdd = resp.tdd();
                                LOG.info("Received: " + tdd.toString() + ".");
                                TaskInformation taskInformation = tdd.getSerializedTaskInformation().deserializeValue(getClass().getClassLoader());
                                tdds.put(taskInformation.getJobVertexId(), tdd);
                            }
                        } catch (Throwable t) {
                            error[0] = t;
                        }
                    }
                };
            }
        };
        // - Verification START -------------------------------------------
        String errMsg = "Error during gathering of TaskDeploymentDescriptors";
        assertNull(errMsg, error[0]);
        // have a matching task deployment descriptor.
        for (TaskState taskState : savepoint.getTaskStates()) {
            Collection<TaskDeploymentDescriptor> taskTdds = tdds.get(taskState.getJobVertexID());
            errMsg = "Missing task for savepoint state for operator " + taskState.getJobVertexID() + ".";
            assertTrue(errMsg, taskTdds.size() > 0);
            assertEquals(taskState.getNumberCollectedStates(), taskTdds.size());
            for (TaskDeploymentDescriptor tdd : taskTdds) {
                SubtaskState subtaskState = taskState.getState(tdd.getSubtaskIndex());
                assertNotNull(subtaskState);
                errMsg = "Initial operator state mismatch.";
                assertEquals(errMsg, subtaskState.getLegacyOperatorState(), tdd.getTaskStateHandles().getLegacyOperatorState());
            }
        }
        // Await state is restored
        StatefulCounter.getRestoreLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
        // Await some progress after restore
        StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
        // - Verification END ---------------------------------------------
        LOG.info("Cancelling job " + jobId + ".");
        jobManager.tell(new CancelJob(jobId));
        LOG.info("Disposing savepoint " + savepointPath + ".");
        Future<Object> disposeFuture = jobManager.ask(new DisposeSavepoint(savepointPath), deadline.timeLeft());
        errMsg = "Failed to dispose savepoint " + savepointPath + ".";
        Object resp = Await.result(disposeFuture, deadline.timeLeft());
        assertTrue(errMsg, resp.getClass() == getDisposeSavepointSuccess().getClass());
        // - Verification START -------------------------------------------
        // The checkpoint files
        List<File> checkpointFiles = new ArrayList<>();
        for (TaskState stateForTaskGroup : savepoint.getTaskStates()) {
            for (SubtaskState subtaskState : stateForTaskGroup.getStates()) {
                ChainedStateHandle<StreamStateHandle> streamTaskState = subtaskState.getLegacyOperatorState();
                for (int i = 0; i < streamTaskState.getLength(); i++) {
                    if (streamTaskState.get(i) != null) {
                        FileStateHandle fileStateHandle = (FileStateHandle) streamTaskState.get(i);
                        checkpointFiles.add(new File(fileStateHandle.getFilePath().toUri()));
                    }
                }
            }
        }
        // The checkpoint files of the savepoint should have been discarded
        for (File f : checkpointFiles) {
            errMsg = "Checkpoint file " + f + " not cleaned up properly.";
            assertFalse(errMsg, f.exists());
        }
        if (checkpointFiles.size() > 0) {
            File parent = checkpointFiles.get(0).getParentFile();
            errMsg = "Checkpoint parent directory " + parent + " not cleaned up properly.";
            assertFalse(errMsg, parent.exists());
        }
        // All savepoints should have been cleaned up
        errMsg = "Savepoints directory not cleaned up properly: " + Arrays.toString(savepointRootDir.listFiles()) + ".";
        assertEquals(errMsg, 0, savepointRootDir.listFiles().length);
    // - Verification END ---------------------------------------------
    } finally {
        if (flink != null) {
            flink.shutdown();
        }
    }
}
Also used : ActorSystem(akka.actor.ActorSystem) RequestSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestSavepoint) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ArrayList(java.util.ArrayList) ResponseSubmitTaskListener(org.apache.flink.runtime.testingUtils.TestingTaskManagerMessages.ResponseSubmitTaskListener) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) SavepointV1(org.apache.flink.runtime.checkpoint.savepoint.SavepointV1) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) CancelJob(org.apache.flink.runtime.messages.JobManagerMessages.CancelJob) TestingTaskManagerMessages(org.apache.flink.runtime.testingUtils.TestingTaskManagerMessages) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) WaitForAllVerticesToBeRunning(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunning) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) FileStateHandle(org.apache.flink.runtime.state.filesystem.FileStateHandle) TriggerSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint) ResponseSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.ResponseSavepoint) RequestSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestSavepoint) DisposeSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.DisposeSavepoint) TriggerSavepointSuccess(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepointSuccess) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) DisposeSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.DisposeSavepoint) SubtaskState(org.apache.flink.runtime.checkpoint.SubtaskState) TriggerSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint) File(java.io.File) TaskState(org.apache.flink.runtime.checkpoint.TaskState) JobID(org.apache.flink.api.common.JobID) JavaTestKit(akka.testkit.JavaTestKit) Test(org.junit.Test)

Example 8 with TaskInformation

use of org.apache.flink.runtime.executiongraph.TaskInformation in project flink by apache.

the class TaskExecutorTest method testTaskSubmission.

/**
	 * Tests that we can submit a task to the TaskManager given that we've allocated a slot there.
	 */
@Test(timeout = 1000L)
public void testTaskSubmission() throws Exception {
    final Configuration configuration = new Configuration();
    final TestingSerialRpcService rpc = new TestingSerialRpcService();
    final TaskManagerConfiguration taskManagerConfiguration = TaskManagerConfiguration.fromConfiguration(configuration);
    final JobID jobId = new JobID();
    final AllocationID allocationId = new AllocationID();
    final UUID jobManagerLeaderId = UUID.randomUUID();
    final JobVertexID jobVertexId = new JobVertexID();
    JobInformation jobInformation = new JobInformation(jobId, name.getMethodName(), new SerializedValue<>(new ExecutionConfig()), new Configuration(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList());
    TaskInformation taskInformation = new TaskInformation(jobVertexId, "test task", 1, 1, TestInvokable.class.getName(), new Configuration());
    SerializedValue<JobInformation> serializedJobInformation = new SerializedValue<>(jobInformation);
    SerializedValue<TaskInformation> serializedJobVertexInformation = new SerializedValue<>(taskInformation);
    final TaskDeploymentDescriptor tdd = new TaskDeploymentDescriptor(serializedJobInformation, serializedJobVertexInformation, new ExecutionAttemptID(), allocationId, 0, 0, 0, null, Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList());
    final LibraryCacheManager libraryCacheManager = mock(LibraryCacheManager.class);
    when(libraryCacheManager.getClassLoader(eq(jobId))).thenReturn(getClass().getClassLoader());
    final JobManagerConnection jobManagerConnection = new JobManagerConnection(jobId, ResourceID.generate(), mock(JobMasterGateway.class), jobManagerLeaderId, mock(TaskManagerActions.class), mock(CheckpointResponder.class), libraryCacheManager, mock(ResultPartitionConsumableNotifier.class), mock(PartitionProducerStateChecker.class));
    final JobManagerTable jobManagerTable = new JobManagerTable();
    jobManagerTable.put(jobId, jobManagerConnection);
    final TaskSlotTable taskSlotTable = mock(TaskSlotTable.class);
    when(taskSlotTable.existsActiveSlot(eq(jobId), eq(allocationId))).thenReturn(true);
    when(taskSlotTable.addTask(any(Task.class))).thenReturn(true);
    final NetworkEnvironment networkEnvironment = mock(NetworkEnvironment.class);
    when(networkEnvironment.createKvStateTaskRegistry(eq(jobId), eq(jobVertexId))).thenReturn(mock(TaskKvStateRegistry.class));
    final TaskManagerMetricGroup taskManagerMetricGroup = mock(TaskManagerMetricGroup.class);
    when(taskManagerMetricGroup.addTaskForJob(any(JobID.class), anyString(), any(JobVertexID.class), any(ExecutionAttemptID.class), anyString(), anyInt(), anyInt())).thenReturn(mock(TaskMetricGroup.class));
    final HighAvailabilityServices haServices = mock(HighAvailabilityServices.class);
    when(haServices.getResourceManagerLeaderRetriever()).thenReturn(mock(LeaderRetrievalService.class));
    try {
        final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
        TaskExecutor taskManager = new TaskExecutor(taskManagerConfiguration, mock(TaskManagerLocation.class), rpc, mock(MemoryManager.class), mock(IOManager.class), networkEnvironment, haServices, mock(HeartbeatServices.class, RETURNS_MOCKS), mock(MetricRegistry.class), taskManagerMetricGroup, mock(BroadcastVariableManager.class), mock(FileCache.class), taskSlotTable, jobManagerTable, mock(JobLeaderService.class), testingFatalErrorHandler);
        taskManager.start();
        taskManager.submitTask(tdd, jobManagerLeaderId);
        Future<Boolean> completionFuture = TestInvokable.completableFuture;
        completionFuture.get();
        // check if a concurrent error occurred
        testingFatalErrorHandler.rethrowError();
    } finally {
        rpc.stopService();
    }
}
Also used : Task(org.apache.flink.runtime.taskmanager.Task) Configuration(org.apache.flink.configuration.Configuration) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) TaskKvStateRegistry(org.apache.flink.runtime.query.TaskKvStateRegistry) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) TaskManagerActions(org.apache.flink.runtime.taskmanager.TaskManagerActions) BroadcastVariableManager(org.apache.flink.runtime.broadcast.BroadcastVariableManager) TestingSerialRpcService(org.apache.flink.runtime.rpc.TestingSerialRpcService) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) PartitionProducerStateChecker(org.apache.flink.runtime.io.network.netty.PartitionProducerStateChecker) UUID(java.util.UUID) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) TaskMetricGroup(org.apache.flink.runtime.metrics.groups.TaskMetricGroup) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) LibraryCacheManager(org.apache.flink.runtime.execution.librarycache.LibraryCacheManager) SerializedValue(org.apache.flink.util.SerializedValue) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) FileCache(org.apache.flink.runtime.filecache.FileCache) TaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 9 with TaskInformation

use of org.apache.flink.runtime.executiongraph.TaskInformation in project flink by apache.

the class TaskTest method createTask.

private Task createTask(Class<? extends AbstractInvokable> invokable, LibraryCacheManager libCache, NetworkEnvironment networkEnvironment, ResultPartitionConsumableNotifier consumableNotifier, PartitionProducerStateChecker partitionProducerStateChecker, Executor executor, Configuration taskManagerConfig, ExecutionConfig execConfig) throws IOException {
    JobID jobId = new JobID();
    JobVertexID jobVertexId = new JobVertexID();
    ExecutionAttemptID executionAttemptId = new ExecutionAttemptID();
    InputSplitProvider inputSplitProvider = new TaskInputSplitProvider(jobManagerGateway, jobId, jobVertexId, executionAttemptId, new FiniteDuration(60, TimeUnit.SECONDS));
    CheckpointResponder checkpointResponder = new ActorGatewayCheckpointResponder(jobManagerGateway);
    SerializedValue<ExecutionConfig> serializedExecutionConfig = new SerializedValue<>(execConfig);
    JobInformation jobInformation = new JobInformation(jobId, "Test Job", serializedExecutionConfig, new Configuration(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList());
    TaskInformation taskInformation = new TaskInformation(jobVertexId, "Test Task", 1, 1, invokable.getName(), new Configuration());
    return new Task(jobInformation, taskInformation, executionAttemptId, new AllocationID(), 0, 0, Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), 0, null, mock(MemoryManager.class), mock(IOManager.class), networkEnvironment, mock(BroadcastVariableManager.class), taskManagerConnection, inputSplitProvider, checkpointResponder, libCache, mock(FileCache.class), new TestingTaskManagerRuntimeInfo(taskManagerConfig), mock(TaskMetricGroup.class), consumableNotifier, partitionProducerStateChecker, executor);
}
Also used : JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) Configuration(org.apache.flink.configuration.Configuration) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) TaskMetricGroup(org.apache.flink.runtime.metrics.groups.TaskMetricGroup) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) FiniteDuration(scala.concurrent.duration.FiniteDuration) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SerializedValue(org.apache.flink.util.SerializedValue) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) FileCache(org.apache.flink.runtime.filecache.FileCache) TestingTaskManagerRuntimeInfo(org.apache.flink.runtime.util.TestingTaskManagerRuntimeInfo) BroadcastVariableManager(org.apache.flink.runtime.broadcast.BroadcastVariableManager) InputSplitProvider(org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider) JobID(org.apache.flink.api.common.JobID)

Example 10 with TaskInformation

use of org.apache.flink.runtime.executiongraph.TaskInformation in project flink by apache.

the class TaskExecutor method submitTask.

// ======================================================================
//  RPC methods
// ======================================================================
// ----------------------------------------------------------------------
// Task lifecycle RPCs
// ----------------------------------------------------------------------
@RpcMethod
public Acknowledge submitTask(TaskDeploymentDescriptor tdd, UUID jobManagerLeaderId) throws TaskSubmissionException {
    // first, deserialize the pre-serialized information
    final JobInformation jobInformation;
    final TaskInformation taskInformation;
    try {
        jobInformation = tdd.getSerializedJobInformation().deserializeValue(getClass().getClassLoader());
        taskInformation = tdd.getSerializedTaskInformation().deserializeValue(getClass().getClassLoader());
    } catch (IOException | ClassNotFoundException e) {
        throw new TaskSubmissionException("Could not deserialize the job or task information.", e);
    }
    final JobID jobId = jobInformation.getJobId();
    final JobManagerConnection jobManagerConnection = jobManagerTable.get(jobId);
    if (jobManagerConnection == null) {
        final String message = "Could not submit task because there is no JobManager " + "associated for the job " + jobId + '.';
        log.debug(message);
        throw new TaskSubmissionException(message);
    }
    if (!jobManagerConnection.getLeaderId().equals(jobManagerLeaderId)) {
        final String message = "Rejecting the task submission because the job manager leader id " + jobManagerLeaderId + " does not match the expected job manager leader id " + jobManagerConnection.getLeaderId() + '.';
        log.debug(message);
        throw new TaskSubmissionException(message);
    }
    if (!taskSlotTable.existsActiveSlot(jobId, tdd.getAllocationId())) {
        final String message = "No task slot allocated for job ID " + jobId + " and allocation ID " + tdd.getAllocationId() + '.';
        log.debug(message);
        throw new TaskSubmissionException(message);
    }
    TaskMetricGroup taskMetricGroup = taskManagerMetricGroup.addTaskForJob(jobInformation.getJobId(), jobInformation.getJobName(), taskInformation.getJobVertexId(), tdd.getExecutionAttemptId(), taskInformation.getTaskName(), tdd.getSubtaskIndex(), tdd.getAttemptNumber());
    InputSplitProvider inputSplitProvider = new RpcInputSplitProvider(jobManagerConnection.getLeaderId(), jobManagerConnection.getJobManagerGateway(), jobInformation.getJobId(), taskInformation.getJobVertexId(), tdd.getExecutionAttemptId(), taskManagerConfiguration.getTimeout());
    TaskManagerActions taskManagerActions = jobManagerConnection.getTaskManagerActions();
    CheckpointResponder checkpointResponder = jobManagerConnection.getCheckpointResponder();
    LibraryCacheManager libraryCache = jobManagerConnection.getLibraryCacheManager();
    ResultPartitionConsumableNotifier resultPartitionConsumableNotifier = jobManagerConnection.getResultPartitionConsumableNotifier();
    PartitionProducerStateChecker partitionStateChecker = jobManagerConnection.getPartitionStateChecker();
    Task task = new Task(jobInformation, taskInformation, tdd.getExecutionAttemptId(), tdd.getAllocationId(), tdd.getSubtaskIndex(), tdd.getAttemptNumber(), tdd.getProducedPartitions(), tdd.getInputGates(), tdd.getTargetSlotNumber(), tdd.getTaskStateHandles(), memoryManager, ioManager, networkEnvironment, broadcastVariableManager, taskManagerActions, inputSplitProvider, checkpointResponder, libraryCache, fileCache, taskManagerConfiguration, taskMetricGroup, resultPartitionConsumableNotifier, partitionStateChecker, getRpcService().getExecutor());
    log.info("Received task {}.", task.getTaskInfo().getTaskNameWithSubtasks());
    boolean taskAdded;
    try {
        taskAdded = taskSlotTable.addTask(task);
    } catch (SlotNotFoundException | SlotNotActiveException e) {
        throw new TaskSubmissionException("Could not submit task.", e);
    }
    if (taskAdded) {
        task.startTaskThread();
        return Acknowledge.get();
    } else {
        final String message = "TaskManager already contains a task for id " + task.getExecutionId() + '.';
        log.debug(message);
        throw new TaskSubmissionException(message);
    }
}
Also used : SlotNotFoundException(org.apache.flink.runtime.taskexecutor.slot.SlotNotFoundException) JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) Task(org.apache.flink.runtime.taskmanager.Task) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) TaskMetricGroup(org.apache.flink.runtime.metrics.groups.TaskMetricGroup) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) RpcCheckpointResponder(org.apache.flink.runtime.taskexecutor.rpc.RpcCheckpointResponder) SlotNotActiveException(org.apache.flink.runtime.taskexecutor.slot.SlotNotActiveException) RpcInputSplitProvider(org.apache.flink.runtime.taskexecutor.rpc.RpcInputSplitProvider) IOException(java.io.IOException) BlobLibraryCacheManager(org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager) LibraryCacheManager(org.apache.flink.runtime.execution.librarycache.LibraryCacheManager) TaskManagerActions(org.apache.flink.runtime.taskmanager.TaskManagerActions) TaskSubmissionException(org.apache.flink.runtime.taskexecutor.exceptions.TaskSubmissionException) PartitionProducerStateChecker(org.apache.flink.runtime.io.network.netty.PartitionProducerStateChecker) InputSplitProvider(org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider) RpcInputSplitProvider(org.apache.flink.runtime.taskexecutor.rpc.RpcInputSplitProvider) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) RpcResultPartitionConsumableNotifier(org.apache.flink.runtime.taskexecutor.rpc.RpcResultPartitionConsumableNotifier) JobID(org.apache.flink.api.common.JobID) RpcMethod(org.apache.flink.runtime.rpc.RpcMethod)

Aggregations

TaskInformation (org.apache.flink.runtime.executiongraph.TaskInformation)12 JobID (org.apache.flink.api.common.JobID)10 Configuration (org.apache.flink.configuration.Configuration)10 JobInformation (org.apache.flink.runtime.executiongraph.JobInformation)10 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)10 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)9 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)8 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)8 BroadcastVariableManager (org.apache.flink.runtime.broadcast.BroadcastVariableManager)7 FileCache (org.apache.flink.runtime.filecache.FileCache)7 IOManager (org.apache.flink.runtime.io.disk.iomanager.IOManager)7 PartitionProducerStateChecker (org.apache.flink.runtime.io.network.netty.PartitionProducerStateChecker)7 ResultPartitionConsumableNotifier (org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier)7 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)7 NetworkEnvironment (org.apache.flink.runtime.io.network.NetworkEnvironment)6 InputSplitProvider (org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider)6 CheckpointResponder (org.apache.flink.runtime.taskmanager.CheckpointResponder)6 TaskManagerActions (org.apache.flink.runtime.taskmanager.TaskManagerActions)6 LibraryCacheManager (org.apache.flink.runtime.execution.librarycache.LibraryCacheManager)5 TaskKvStateRegistry (org.apache.flink.runtime.query.TaskKvStateRegistry)5