Search in sources :

Example 1 with TaskDeploymentDescriptor

use of org.apache.flink.runtime.deployment.TaskDeploymentDescriptor in project flink by apache.

the class Execution method deployToSlot.

public void deployToSlot(final SimpleSlot slot) throws JobException {
    checkNotNull(slot);
    // The more general check is the timeout of the deployment call
    if (!slot.isAlive()) {
        throw new JobException("Target slot (TaskManager) for deployment is no longer alive.");
    }
    // make sure exactly one deployment call happens from the correct state
    // note: the transition from CREATED to DEPLOYING is for testing purposes only
    ExecutionState previous = this.state;
    if (previous == SCHEDULED || previous == CREATED) {
        if (!transitionState(previous, DEPLOYING)) {
            // this should actually not happen and indicates a race somewhere else
            throw new IllegalStateException("Cannot deploy task: Concurrent deployment call race.");
        }
    } else {
        // vertex may have been cancelled, or it was already scheduled
        throw new IllegalStateException("The vertex must be in CREATED or SCHEDULED state to be deployed. Found state " + previous);
    }
    try {
        // good, we are allowed to deploy
        if (!slot.setExecutedVertex(this)) {
            throw new JobException("Could not assign the ExecutionVertex to the slot " + slot);
        }
        this.assignedResource = slot;
        // race double check, did we fail/cancel and do we need to release the slot?
        if (this.state != DEPLOYING) {
            slot.releaseSlot();
            return;
        }
        if (LOG.isInfoEnabled()) {
            LOG.info(String.format("Deploying %s (attempt #%d) to %s", vertex.getSimpleName(), attemptNumber, getAssignedResourceLocation().getHostname()));
        }
        final TaskDeploymentDescriptor deployment = vertex.createDeploymentDescriptor(attemptId, slot, taskState, attemptNumber);
        // register this execution at the execution graph, to receive call backs
        vertex.getExecutionGraph().registerExecution(this);
        final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway();
        final Future<Acknowledge> submitResultFuture = taskManagerGateway.submitTask(deployment, timeout);
        submitResultFuture.exceptionallyAsync(new ApplyFunction<Throwable, Void>() {

            @Override
            public Void apply(Throwable failure) {
                if (failure instanceof TimeoutException) {
                    String taskname = vertex.getTaskNameWithSubtaskIndex() + " (" + attemptId + ')';
                    markFailed(new Exception("Cannot deploy task " + taskname + " - TaskManager (" + getAssignedResourceLocation() + ") not responding after a timeout of " + timeout, failure));
                } else {
                    markFailed(failure);
                }
                return null;
            }
        }, executor);
    } catch (Throwable t) {
        markFailed(t);
        ExceptionUtils.rethrow(t);
    }
}
Also used : ExecutionState(org.apache.flink.runtime.execution.ExecutionState) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) TaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway) TimeoutException(java.util.concurrent.TimeoutException) JobException(org.apache.flink.runtime.JobException) JobException(org.apache.flink.runtime.JobException) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) TimeoutException(java.util.concurrent.TimeoutException)

Example 2 with TaskDeploymentDescriptor

use of org.apache.flink.runtime.deployment.TaskDeploymentDescriptor in project flink by apache.

the class TaskExecutorTest method testSubmitTaskBeforeAcceptSlot.

/**
	 * This tests task executor receive SubmitTask before OfferSlot response.
	 */
@Test
public void testSubmitTaskBeforeAcceptSlot() throws Exception {
    final JobID jobId = new JobID();
    final TestingSerialRpcService rpc = new TestingSerialRpcService();
    final Configuration configuration = new Configuration();
    final TaskManagerConfiguration taskManagerConfiguration = TaskManagerConfiguration.fromConfiguration(configuration);
    final ResourceID resourceId = new ResourceID("foobar");
    final TaskManagerLocation taskManagerLocation = new TaskManagerLocation(resourceId, InetAddress.getLoopbackAddress(), 1234);
    final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
    final TimerService<AllocationID> timerService = mock(TimerService.class);
    final TaskSlotTable taskSlotTable = new TaskSlotTable(Arrays.asList(mock(ResourceProfile.class), mock(ResourceProfile.class)), timerService);
    final JobManagerTable jobManagerTable = new JobManagerTable();
    final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation);
    final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
    final String resourceManagerAddress = "rm";
    final UUID resourceManagerLeaderId = UUID.randomUUID();
    final String jobManagerAddress = "jm";
    final UUID jobManagerLeaderId = UUID.randomUUID();
    final LeaderRetrievalService resourceManagerLeaderRetrievalService = new TestingLeaderRetrievalService(resourceManagerAddress, resourceManagerLeaderId);
    final LeaderRetrievalService jobManagerLeaderRetrievalService = new TestingLeaderRetrievalService(jobManagerAddress, jobManagerLeaderId);
    haServices.setResourceManagerLeaderRetriever(resourceManagerLeaderRetrievalService);
    haServices.setJobMasterLeaderRetriever(jobId, jobManagerLeaderRetrievalService);
    final ResourceManagerGateway resourceManagerGateway = mock(ResourceManagerGateway.class);
    final InstanceID registrationId = new InstanceID();
    when(resourceManagerGateway.registerTaskExecutor(eq(resourceManagerLeaderId), any(String.class), eq(resourceId), any(SlotReport.class), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new TaskExecutorRegistrationSuccess(registrationId, 1000L)));
    final ResourceID jmResourceId = new ResourceID(jobManagerAddress);
    final int blobPort = 42;
    final AllocationID allocationId1 = new AllocationID();
    final AllocationID allocationId2 = new AllocationID();
    final SlotOffer offer1 = new SlotOffer(allocationId1, 0, ResourceProfile.UNKNOWN);
    final JobMasterGateway jobMasterGateway = mock(JobMasterGateway.class);
    when(jobMasterGateway.registerTaskManager(any(String.class), eq(taskManagerLocation), eq(jobManagerLeaderId), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new JMTMRegistrationSuccess(jmResourceId, blobPort)));
    when(jobMasterGateway.getHostname()).thenReturn(jobManagerAddress);
    rpc.registerGateway(resourceManagerAddress, resourceManagerGateway);
    rpc.registerGateway(jobManagerAddress, jobMasterGateway);
    final LibraryCacheManager libraryCacheManager = mock(LibraryCacheManager.class);
    when(libraryCacheManager.getClassLoader(eq(jobId))).thenReturn(getClass().getClassLoader());
    final JobManagerConnection jobManagerConnection = new JobManagerConnection(jobId, jmResourceId, jobMasterGateway, jobManagerLeaderId, mock(TaskManagerActions.class), mock(CheckpointResponder.class), libraryCacheManager, mock(ResultPartitionConsumableNotifier.class), mock(PartitionProducerStateChecker.class));
    jobManagerTable.put(jobId, jobManagerConnection);
    try {
        final TaskExecutor taskManager = new TaskExecutor(taskManagerConfiguration, taskManagerLocation, rpc, mock(MemoryManager.class), mock(IOManager.class), mock(NetworkEnvironment.class), haServices, mock(HeartbeatServices.class, RETURNS_MOCKS), mock(MetricRegistry.class), mock(TaskManagerMetricGroup.class), mock(BroadcastVariableManager.class), mock(FileCache.class), taskSlotTable, jobManagerTable, jobLeaderService, testingFatalErrorHandler);
        taskManager.start();
        taskSlotTable.allocateSlot(0, jobId, allocationId1, Time.milliseconds(10000L));
        taskSlotTable.allocateSlot(1, jobId, allocationId2, Time.milliseconds(10000L));
        final JobVertexID jobVertexId = new JobVertexID();
        JobInformation jobInformation = new JobInformation(jobId, name.getMethodName(), new SerializedValue<>(new ExecutionConfig()), new Configuration(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList());
        TaskInformation taskInformation = new TaskInformation(jobVertexId, "test task", 1, 1, TestInvokable.class.getName(), new Configuration());
        SerializedValue<JobInformation> serializedJobInformation = new SerializedValue<>(jobInformation);
        SerializedValue<TaskInformation> serializedJobVertexInformation = new SerializedValue<>(taskInformation);
        final TaskDeploymentDescriptor tdd = new TaskDeploymentDescriptor(serializedJobInformation, serializedJobVertexInformation, new ExecutionAttemptID(), allocationId1, 0, 0, 0, null, Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList());
        CompletableFuture<Iterable<SlotOffer>> offerResultFuture = new FlinkCompletableFuture<>();
        // submit task first and then return acceptance response
        when(jobMasterGateway.offerSlots(any(ResourceID.class), any(Iterable.class), eq(jobManagerLeaderId), any(Time.class))).thenReturn(offerResultFuture);
        // we have to add the job after the TaskExecutor, because otherwise the service has not
        // been properly started. This will also offer the slots to the job master
        jobLeaderService.addJob(jobId, jobManagerAddress);
        verify(jobMasterGateway).offerSlots(any(ResourceID.class), any(Iterable.class), eq(jobManagerLeaderId), any(Time.class));
        // submit the task without having acknowledge the offered slots
        taskManager.submitTask(tdd, jobManagerLeaderId);
        // acknowledge the offered slots
        offerResultFuture.complete(Collections.singleton(offer1));
        verify(resourceManagerGateway).notifySlotAvailable(eq(resourceManagerLeaderId), eq(registrationId), eq(new SlotID(resourceId, 1)));
        assertTrue(taskSlotTable.existsActiveSlot(jobId, allocationId1));
        assertFalse(taskSlotTable.existsActiveSlot(jobId, allocationId2));
        assertTrue(taskSlotTable.isSlotFree(1));
        // check if a concurrent error occurred
        testingFatalErrorHandler.rethrowError();
    } finally {
        rpc.stopService();
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) InstanceID(org.apache.flink.runtime.instance.InstanceID) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) TaskManagerActions(org.apache.flink.runtime.taskmanager.TaskManagerActions) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) BroadcastVariableManager(org.apache.flink.runtime.broadcast.BroadcastVariableManager) TestingSerialRpcService(org.apache.flink.runtime.rpc.TestingSerialRpcService) PartitionProducerStateChecker(org.apache.flink.runtime.io.network.netty.PartitionProducerStateChecker) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) UUID(java.util.UUID) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) FileCache(org.apache.flink.runtime.filecache.FileCache) TaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) JobID(org.apache.flink.api.common.JobID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) Time(org.apache.flink.api.common.time.Time) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) FlinkCompletableFuture(org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) LibraryCacheManager(org.apache.flink.runtime.execution.librarycache.LibraryCacheManager) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) SerializedValue(org.apache.flink.util.SerializedValue) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) Test(org.junit.Test)

Example 3 with TaskDeploymentDescriptor

use of org.apache.flink.runtime.deployment.TaskDeploymentDescriptor in project flink by apache.

the class TaskManagerTest method testTriggerStackTraceSampleMessage.

// ------------------------------------------------------------------------
// Stack trace sample
// ------------------------------------------------------------------------
/**
	 * Tests sampling of task stack traces.
	 */
@Test
@SuppressWarnings("unchecked")
public void testTriggerStackTraceSampleMessage() throws Exception {
    new JavaTestKit(system) {

        {
            ActorGateway taskManagerActorGateway = null;
            // We need this to be a JM that answers to update messages for
            // robustness on Travis (if jobs need to be resubmitted in (4)).
            ActorRef jm = system.actorOf(Props.create(new SimpleLookupJobManagerCreator(null)));
            ActorGateway jobManagerActorGateway = new AkkaActorGateway(jm, null);
            final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), leaderSessionID);
            try {
                final ActorGateway jobManager = jobManagerActorGateway;
                final ActorGateway taskManager = TestingUtils.createTaskManager(system, jobManager, new Configuration(), true, false);
                final JobID jobId = new JobID();
                // Single blocking task
                final TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(jobId, "Job", new JobVertexID(), new ExecutionAttemptID(), new SerializedValue<>(new ExecutionConfig()), "Task", 1, 0, 1, 0, new Configuration(), new Configuration(), BlockingNoOpInvokable.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList(), 0);
                // Submit the task
                new Within(d) {

                    @Override
                    protected void run() {
                        try {
                            // Make sure to register
                            Future<?> connectFuture = taskManager.ask(new TestingTaskManagerMessages.NotifyWhenRegisteredAtJobManager(jobManager.actor()), remaining());
                            Await.ready(connectFuture, remaining());
                            Future<Object> taskRunningFuture = taskManager.ask(new TestingTaskManagerMessages.NotifyWhenTaskIsRunning(tdd.getExecutionAttemptId()), timeout);
                            taskManager.tell(new SubmitTask(tdd));
                            Await.ready(taskRunningFuture, d);
                        } catch (Exception e) {
                            e.printStackTrace();
                            fail(e.getMessage());
                        }
                    }
                };
                //
                // 1) Trigger sample for non-existing task
                //
                new Within(d) {

                    @Override
                    protected void run() {
                        try {
                            ExecutionAttemptID taskId = new ExecutionAttemptID();
                            taskManager.tell(new TriggerStackTraceSample(112223, taskId, 100, timeD, 0), testActorGateway);
                            // Receive the expected message (heartbeat races possible)
                            Object[] msg = receiveN(1);
                            while (!(msg[0] instanceof Status.Failure)) {
                                msg = receiveN(1);
                            }
                            Status.Failure response = (Status.Failure) msg[0];
                            assertEquals(IllegalStateException.class, response.cause().getClass());
                        } catch (Exception e) {
                            e.printStackTrace();
                            fail(e.getMessage());
                        }
                    }
                };
                //
                // 2) Trigger sample for the blocking task
                //
                new Within(d) {

                    @Override
                    protected void run() {
                        boolean success = false;
                        Throwable lastError = null;
                        for (int i = 0; i < 100 && !success; i++) {
                            try {
                                int numSamples = 5;
                                taskManager.tell(new TriggerStackTraceSample(19230, tdd.getExecutionAttemptId(), numSamples, Time.milliseconds(100L), 0), testActorGateway);
                                // Receive the expected message (heartbeat races possible)
                                Object[] msg = receiveN(1);
                                while (!(msg[0] instanceof StackTraceSampleResponse)) {
                                    msg = receiveN(1);
                                }
                                StackTraceSampleResponse response = (StackTraceSampleResponse) msg[0];
                                // ---- Verify response ----
                                assertEquals(19230, response.getSampleId());
                                assertEquals(tdd.getExecutionAttemptId(), response.getExecutionAttemptID());
                                List<StackTraceElement[]> traces = response.getSamples();
                                assertEquals("Number of samples", numSamples, traces.size());
                                for (StackTraceElement[] trace : traces) {
                                    // Look for BlockingNoOpInvokable#invoke
                                    for (StackTraceElement elem : trace) {
                                        if (elem.getClassName().equals(BlockingNoOpInvokable.class.getName())) {
                                            assertEquals("invoke", elem.getMethodName());
                                            success = true;
                                            break;
                                        }
                                    }
                                    assertTrue("Unexpected stack trace: " + Arrays.toString(trace), success);
                                }
                            } catch (Throwable t) {
                                lastError = t;
                                LOG.warn("Failed to find invokable.", t);
                            }
                            try {
                                Thread.sleep(100);
                            } catch (InterruptedException e) {
                                LOG.error("Interrupted while sleeping before retry.", e);
                                break;
                            }
                        }
                        if (!success) {
                            if (lastError == null) {
                                fail("Failed to find invokable");
                            } else {
                                fail(lastError.getMessage());
                            }
                        }
                    }
                };
                //
                // 3) Trigger sample for the blocking task with max depth
                //
                new Within(d) {

                    @Override
                    protected void run() {
                        try {
                            int numSamples = 5;
                            int maxDepth = 2;
                            taskManager.tell(new TriggerStackTraceSample(1337, tdd.getExecutionAttemptId(), numSamples, Time.milliseconds(100L), maxDepth), testActorGateway);
                            // Receive the expected message (heartbeat races possible)
                            Object[] msg = receiveN(1);
                            while (!(msg[0] instanceof StackTraceSampleResponse)) {
                                msg = receiveN(1);
                            }
                            StackTraceSampleResponse response = (StackTraceSampleResponse) msg[0];
                            // ---- Verify response ----
                            assertEquals(1337, response.getSampleId());
                            assertEquals(tdd.getExecutionAttemptId(), response.getExecutionAttemptID());
                            List<StackTraceElement[]> traces = response.getSamples();
                            assertEquals("Number of samples", numSamples, traces.size());
                            for (StackTraceElement[] trace : traces) {
                                assertEquals("Max depth", maxDepth, trace.length);
                            }
                        } catch (Exception e) {
                            e.printStackTrace();
                            fail(e.getMessage());
                        }
                    }
                };
                //
                // 4) Trigger sample for the blocking task, but cancel it during sampling
                //
                new Within(d) {

                    @Override
                    protected void run() {
                        try {
                            int maxAttempts = 10;
                            int sleepTime = 100;
                            for (int i = 0; i < maxAttempts; i++, sleepTime *= 2) {
                                // Trigger many samples in order to cancel the task
                                // during a sample
                                taskManager.tell(new TriggerStackTraceSample(44, tdd.getExecutionAttemptId(), Integer.MAX_VALUE, Time.milliseconds(10L), 0), testActorGateway);
                                Thread.sleep(sleepTime);
                                Future<?> removeFuture = taskManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobId), remaining());
                                // Cancel the task
                                taskManager.tell(new CancelTask(tdd.getExecutionAttemptId()));
                                // Receive the expected message (heartbeat races possible)
                                while (true) {
                                    Object[] msg = receiveN(1);
                                    if (msg[0] instanceof StackTraceSampleResponse) {
                                        StackTraceSampleResponse response = (StackTraceSampleResponse) msg[0];
                                        assertEquals(tdd.getExecutionAttemptId(), response.getExecutionAttemptID());
                                        assertEquals(44, response.getSampleId());
                                        // Done
                                        return;
                                    } else if (msg[0] instanceof Failure) {
                                        // Wait for removal before resubmitting
                                        Await.ready(removeFuture, remaining());
                                        Future<?> taskRunningFuture = taskManager.ask(new TestingTaskManagerMessages.NotifyWhenTaskIsRunning(tdd.getExecutionAttemptId()), timeout);
                                        // Resubmit
                                        taskManager.tell(new SubmitTask(tdd));
                                        Await.ready(taskRunningFuture, remaining());
                                        // Retry the sample message
                                        break;
                                    } else {
                                        // Different message
                                        continue;
                                    }
                                }
                            }
                        } catch (Exception e) {
                            e.printStackTrace();
                            fail(e.getMessage());
                        }
                    }
                };
            } finally {
                TestingUtils.stopActor(taskManagerActorGateway);
                TestingUtils.stopActor(jobManagerActorGateway);
            }
        }
    };
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) TriggerStackTraceSample(org.apache.flink.runtime.messages.StackTraceSampleMessages.TriggerStackTraceSample) TaskManagerServicesConfiguration(org.apache.flink.runtime.taskexecutor.TaskManagerServicesConfiguration) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) SubmitTask(org.apache.flink.runtime.messages.TaskMessages.SubmitTask) StackTraceSampleResponse(org.apache.flink.runtime.messages.StackTraceSampleResponse) CancelTask(org.apache.flink.runtime.messages.TaskMessages.CancelTask) TestingTaskManagerMessages(org.apache.flink.runtime.testingUtils.TestingTaskManagerMessages) Failure(scala.util.Failure) Status(akka.actor.Status) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) PartitionNotFoundException(org.apache.flink.runtime.io.network.partition.PartitionNotFoundException) IOException(java.io.IOException) BlockingNoOpInvokable(org.apache.flink.runtime.testtasks.BlockingNoOpInvokable) CompletableFuture(org.apache.flink.runtime.concurrent.CompletableFuture) FlinkCompletableFuture(org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture) Future(scala.concurrent.Future) JavaTestKit(akka.testkit.JavaTestKit) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 4 with TaskDeploymentDescriptor

use of org.apache.flink.runtime.deployment.TaskDeploymentDescriptor in project flink by apache.

the class TaskManagerTest method testCancellingDependentAndStateUpdateFails.

@Test
public void testCancellingDependentAndStateUpdateFails() {
    // this tests creates two tasks. the sender sends data, and fails to send the
    // state update back to the job manager
    // the second one blocks to be canceled
    new JavaTestKit(system) {

        {
            ActorGateway jobManager = null;
            ActorGateway taskManager = null;
            final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), leaderSessionID);
            try {
                final JobID jid = new JobID();
                JobVertexID vid1 = new JobVertexID();
                JobVertexID vid2 = new JobVertexID();
                final ExecutionAttemptID eid1 = new ExecutionAttemptID();
                final ExecutionAttemptID eid2 = new ExecutionAttemptID();
                ActorRef jm = system.actorOf(Props.create(new SimpleLookupFailingUpdateJobManagerCreator(leaderSessionID, eid2)));
                jobManager = new AkkaActorGateway(jm, leaderSessionID);
                taskManager = TestingUtils.createTaskManager(system, jobManager, new Configuration(), true, true);
                final ActorGateway tm = taskManager;
                IntermediateResultPartitionID partitionId = new IntermediateResultPartitionID();
                List<ResultPartitionDeploymentDescriptor> irpdd = new ArrayList<ResultPartitionDeploymentDescriptor>();
                irpdd.add(new ResultPartitionDeploymentDescriptor(new IntermediateDataSetID(), partitionId, ResultPartitionType.PIPELINED, 1, 1, true));
                InputGateDeploymentDescriptor ircdd = new InputGateDeploymentDescriptor(new IntermediateDataSetID(), ResultPartitionType.PIPELINED, 0, new InputChannelDeploymentDescriptor[] { new InputChannelDeploymentDescriptor(new ResultPartitionID(partitionId, eid1), ResultPartitionLocation.createLocal()) });
                final TaskDeploymentDescriptor tdd1 = createTaskDeploymentDescriptor(jid, "TestJob", vid1, eid1, new SerializedValue<>(new ExecutionConfig()), "Sender", 1, 0, 1, 0, new Configuration(), new Configuration(), Tasks.Sender.class.getName(), irpdd, Collections.<InputGateDeploymentDescriptor>emptyList(), new ArrayList<BlobKey>(), Collections.<URL>emptyList(), 0);
                final TaskDeploymentDescriptor tdd2 = createTaskDeploymentDescriptor(jid, "TestJob", vid2, eid2, new SerializedValue<>(new ExecutionConfig()), "Receiver", 7, 2, 7, 0, new Configuration(), new Configuration(), Tasks.BlockingReceiver.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.singletonList(ircdd), new ArrayList<BlobKey>(), Collections.<URL>emptyList(), 0);
                new Within(d) {

                    @Override
                    protected void run() {
                        try {
                            Future<Object> t1Running = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskIsRunning(eid1), timeout);
                            Future<Object> t2Running = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskIsRunning(eid2), timeout);
                            tm.tell(new SubmitTask(tdd2), testActorGateway);
                            tm.tell(new SubmitTask(tdd1), testActorGateway);
                            expectMsgEquals(Acknowledge.get());
                            expectMsgEquals(Acknowledge.get());
                            Await.ready(t1Running, d);
                            Await.ready(t2Running, d);
                            tm.tell(TestingTaskManagerMessages.getRequestRunningTasksMessage(), testActorGateway);
                            Map<ExecutionAttemptID, Task> tasks = expectMsgClass(TestingTaskManagerMessages.ResponseRunningTasks.class).asJava();
                            Task t1 = tasks.get(eid1);
                            Task t2 = tasks.get(eid2);
                            tm.tell(new CancelTask(eid2), testActorGateway);
                            expectMsgEquals(Acknowledge.get());
                            if (t2 != null) {
                                Future<Object> response = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskRemoved(eid2), timeout);
                                Await.ready(response, d);
                            }
                            if (t1 != null) {
                                if (t1.getExecutionState() == ExecutionState.RUNNING) {
                                    tm.tell(new CancelTask(eid1), testActorGateway);
                                    expectMsgEquals(Acknowledge.get());
                                }
                                Future<Object> response = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskRemoved(eid1), timeout);
                                Await.ready(response, d);
                            }
                            tm.tell(TestingTaskManagerMessages.getRequestRunningTasksMessage(), testActorGateway);
                            tasks = expectMsgClass(TestingTaskManagerMessages.ResponseRunningTasks.class).asJava();
                            assertEquals(0, tasks.size());
                        } catch (Exception e) {
                            e.printStackTrace();
                            fail(e.getMessage());
                        }
                    }
                };
            } catch (Exception e) {
                e.printStackTrace();
                fail(e.getMessage());
            } finally {
                // shut down the actors
                TestingUtils.stopActor(taskManager);
                TestingUtils.stopActor(jobManager);
            }
        }
    };
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) StopTask(org.apache.flink.runtime.messages.TaskMessages.StopTask) CancelTask(org.apache.flink.runtime.messages.TaskMessages.CancelTask) SubmitTask(org.apache.flink.runtime.messages.TaskMessages.SubmitTask) ResultPartitionDeploymentDescriptor(org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor) TaskManagerServicesConfiguration(org.apache.flink.runtime.taskexecutor.TaskManagerServicesConfiguration) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) BlobKey(org.apache.flink.runtime.blob.BlobKey) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) SubmitTask(org.apache.flink.runtime.messages.TaskMessages.SubmitTask) CancelTask(org.apache.flink.runtime.messages.TaskMessages.CancelTask) TestingTaskManagerMessages(org.apache.flink.runtime.testingUtils.TestingTaskManagerMessages) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) InputGateDeploymentDescriptor(org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor) PartitionNotFoundException(org.apache.flink.runtime.io.network.partition.PartitionNotFoundException) IOException(java.io.IOException) InputChannelDeploymentDescriptor(org.apache.flink.runtime.deployment.InputChannelDeploymentDescriptor) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) JavaTestKit(akka.testkit.JavaTestKit) JobID(org.apache.flink.api.common.JobID) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Example 5 with TaskDeploymentDescriptor

use of org.apache.flink.runtime.deployment.TaskDeploymentDescriptor in project flink by apache.

the class TaskManagerTest method testSubmitAndExecuteTask.

@Test
public void testSubmitAndExecuteTask() throws IOException {
    new JavaTestKit(system) {

        {
            ActorGateway taskManager = null;
            final ActorGateway jobManager = TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
            try {
                taskManager = TestingUtils.createTaskManager(system, jobManager, new Configuration(), true, false);
                final ActorGateway tm = taskManager;
                // handle the registration
                new Within(d) {

                    @Override
                    protected void run() {
                        expectMsgClass(RegistrationMessages.RegisterTaskManager.class);
                        final InstanceID iid = new InstanceID();
                        assertEquals(tm.actor(), getLastSender());
                        tm.tell(new RegistrationMessages.AcknowledgeRegistration(iid, 12345), jobManager);
                    }
                };
                final JobID jid = new JobID();
                final JobVertexID vid = new JobVertexID();
                final ExecutionAttemptID eid = new ExecutionAttemptID();
                final SerializedValue<ExecutionConfig> executionConfig = new SerializedValue<>(new ExecutionConfig());
                final TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(jid, "TestJob", vid, eid, executionConfig, "TestTask", 7, 2, 7, 0, new Configuration(), new Configuration(), TestInvokableCorrect.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), new ArrayList<BlobKey>(), Collections.<URL>emptyList(), 0);
                new Within(d) {

                    @Override
                    protected void run() {
                        tm.tell(new SubmitTask(tdd), jobManager);
                        // TaskManager should acknowledge the submission
                        // heartbeats may be interleaved
                        long deadline = System.currentTimeMillis() + 10000;
                        do {
                            Object message = receiveOne(d);
                            if (message.equals(Acknowledge.get())) {
                                break;
                            }
                        } while (System.currentTimeMillis() < deadline);
                        // task should have switched to running
                        Object toRunning = new TaskMessages.UpdateTaskExecutionState(new TaskExecutionState(jid, eid, ExecutionState.RUNNING));
                        // task should have switched to finished
                        Object toFinished = new TaskMessages.UpdateTaskExecutionState(new TaskExecutionState(jid, eid, ExecutionState.FINISHED));
                        deadline = System.currentTimeMillis() + 10000;
                        do {
                            Object message = receiveOne(d);
                            if (message.equals(toRunning)) {
                                break;
                            } else if (!(message instanceof TaskManagerMessages.Heartbeat)) {
                                fail("Unexpected message: " + message);
                            }
                        } while (System.currentTimeMillis() < deadline);
                        deadline = System.currentTimeMillis() + 10000;
                        do {
                            Object message = receiveOne(d);
                            if (message.equals(toFinished)) {
                                break;
                            } else if (!(message instanceof TaskManagerMessages.Heartbeat)) {
                                fail("Unexpected message: " + message);
                            }
                        } while (System.currentTimeMillis() < deadline);
                    }
                };
            } finally {
                // shut down the actors
                TestingUtils.stopActor(taskManager);
                TestingUtils.stopActor(jobManager);
            }
        }
    };
}
Also used : RegistrationMessages(org.apache.flink.runtime.messages.RegistrationMessages) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TaskManagerServicesConfiguration(org.apache.flink.runtime.taskexecutor.TaskManagerServicesConfiguration) Configuration(org.apache.flink.configuration.Configuration) InstanceID(org.apache.flink.runtime.instance.InstanceID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SerializedValue(org.apache.flink.util.SerializedValue) BlobKey(org.apache.flink.runtime.blob.BlobKey) TestingTaskManagerMessages(org.apache.flink.runtime.testingUtils.TestingTaskManagerMessages) TaskManagerMessages(org.apache.flink.runtime.messages.TaskManagerMessages) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) SubmitTask(org.apache.flink.runtime.messages.TaskMessages.SubmitTask) JavaTestKit(akka.testkit.JavaTestKit) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

TaskDeploymentDescriptor (org.apache.flink.runtime.deployment.TaskDeploymentDescriptor)21 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)18 Test (org.junit.Test)18 JobID (org.apache.flink.api.common.JobID)17 Configuration (org.apache.flink.configuration.Configuration)17 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)16 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)15 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)14 ActorRef (akka.actor.ActorRef)13 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)13 SubmitTask (org.apache.flink.runtime.messages.TaskMessages.SubmitTask)13 TaskManagerServicesConfiguration (org.apache.flink.runtime.taskexecutor.TaskManagerServicesConfiguration)13 JavaTestKit (akka.testkit.JavaTestKit)11 IOException (java.io.IOException)10 PartitionNotFoundException (org.apache.flink.runtime.io.network.partition.PartitionNotFoundException)9 IntermediateDataSetID (org.apache.flink.runtime.jobgraph.IntermediateDataSetID)8 TestingTaskManagerMessages (org.apache.flink.runtime.testingUtils.TestingTaskManagerMessages)8 BlobKey (org.apache.flink.runtime.blob.BlobKey)7 InputChannelDeploymentDescriptor (org.apache.flink.runtime.deployment.InputChannelDeploymentDescriptor)6 InputGateDeploymentDescriptor (org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor)6