Search in sources :

Example 81 with ActorGateway

use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.

the class TaskCancelAsyncProducerConsumerITCase method testCancelAsyncProducerAndConsumer.

/**
	 * Tests that a task waiting on an async producer/consumer that is stuck
	 * in a blocking buffer request can be properly cancelled.
	 *
	 * <p>This is currently required for the Flink Kafka sources, which spawn
	 * a separate Thread consuming from Kafka and producing the intermediate
	 * streams in the spawned Thread instead of the main task Thread.
	 */
@Test
public void testCancelAsyncProducerAndConsumer() throws Exception {
    Deadline deadline = new FiniteDuration(2, TimeUnit.MINUTES).fromNow();
    TestingCluster flink = null;
    try {
        // Cluster
        Configuration config = new Configuration();
        config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
        config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 1);
        config.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SEGMENT_SIZE_KEY, 4096);
        config.setInteger(ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY, 8);
        flink = new TestingCluster(config, true);
        flink.start();
        // Job with async producer and consumer
        JobVertex producer = new JobVertex("AsyncProducer");
        producer.setParallelism(1);
        producer.setInvokableClass(AsyncProducer.class);
        JobVertex consumer = new JobVertex("AsyncConsumer");
        consumer.setParallelism(1);
        consumer.setInvokableClass(AsyncConsumer.class);
        consumer.connectNewDataSetAsInput(producer, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
        SlotSharingGroup slot = new SlotSharingGroup(producer.getID(), consumer.getID());
        producer.setSlotSharingGroup(slot);
        consumer.setSlotSharingGroup(slot);
        JobGraph jobGraph = new JobGraph(producer, consumer);
        // Submit job and wait until running
        ActorGateway jobManager = flink.getLeaderGateway(deadline.timeLeft());
        flink.submitJobDetached(jobGraph);
        Object msg = new WaitForAllVerticesToBeRunning(jobGraph.getJobID());
        Future<?> runningFuture = jobManager.ask(msg, deadline.timeLeft());
        Await.ready(runningFuture, deadline.timeLeft());
        // Wait for blocking requests, cancel and wait for cancellation
        msg = new NotifyWhenJobStatus(jobGraph.getJobID(), JobStatus.CANCELED);
        Future<?> cancelledFuture = jobManager.ask(msg, deadline.timeLeft());
        boolean producerBlocked = false;
        for (int i = 0; i < 50; i++) {
            Thread thread = ASYNC_PRODUCER_THREAD;
            if (thread != null && thread.isAlive()) {
                StackTraceElement[] stackTrace = thread.getStackTrace();
                producerBlocked = isInBlockingBufferRequest(stackTrace);
            }
            if (producerBlocked) {
                break;
            } else {
                // Retry
                Thread.sleep(500);
            }
        }
        // Verify that async producer is in blocking request
        assertTrue("Producer thread is not blocked: " + Arrays.toString(ASYNC_CONSUMER_THREAD.getStackTrace()), producerBlocked);
        boolean consumerWaiting = false;
        for (int i = 0; i < 50; i++) {
            Thread thread = ASYNC_CONSUMER_THREAD;
            if (thread != null && thread.isAlive()) {
                consumerWaiting = thread.getState() == Thread.State.WAITING;
            }
            if (consumerWaiting) {
                break;
            } else {
                // Retry
                Thread.sleep(500);
            }
        }
        // Verify that async consumer is in blocking request
        assertTrue("Consumer thread is not blocked.", consumerWaiting);
        msg = new CancelJob(jobGraph.getJobID());
        Future<?> cancelFuture = jobManager.ask(msg, deadline.timeLeft());
        Await.ready(cancelFuture, deadline.timeLeft());
        Await.ready(cancelledFuture, deadline.timeLeft());
        // Verify the expected Exceptions
        assertNotNull(ASYNC_PRODUCER_EXCEPTION);
        assertEquals(IllegalStateException.class, ASYNC_PRODUCER_EXCEPTION.getClass());
        assertNotNull(ASYNC_CONSUMER_EXCEPTION);
        assertEquals(IllegalStateException.class, ASYNC_CONSUMER_EXCEPTION.getClass());
    } finally {
        if (flink != null) {
            flink.shutdown();
        }
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) WaitForAllVerticesToBeRunning(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunning) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) CancelJob(org.apache.flink.runtime.messages.JobManagerMessages.CancelJob) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup) NotifyWhenJobStatus(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.NotifyWhenJobStatus) Test(org.junit.Test)

Example 82 with ActorGateway

use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.

the class TaskInputSplitProviderTest method testRequestNextInputSplitWithInvalidExecutionID.

@Test
public void testRequestNextInputSplitWithInvalidExecutionID() throws InputSplitProviderException {
    final JobID jobID = new JobID();
    final JobVertexID vertexID = new JobVertexID();
    final ExecutionAttemptID executionID = new ExecutionAttemptID();
    final FiniteDuration timeout = new FiniteDuration(10, TimeUnit.SECONDS);
    final ActorGateway gateway = new NullInputSplitGateway();
    final TaskInputSplitProvider provider = new TaskInputSplitProvider(gateway, jobID, vertexID, executionID, timeout);
    // The jobManager will return a
    InputSplit nextInputSplit = provider.getNextInputSplit(getClass().getClassLoader());
    assertTrue(nextInputSplit == null);
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) BaseTestingActorGateway(org.apache.flink.runtime.instance.BaseTestingActorGateway) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) FiniteDuration(scala.concurrent.duration.FiniteDuration) InputSplit(org.apache.flink.core.io.InputSplit) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 83 with ActorGateway

use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.

the class TaskManagerRegistrationTest method testSimpleRegistration.

/**
	 * A test that verifies that two TaskManagers correctly register at the
	 * JobManager.
	 */
@Test
public void testSimpleRegistration() {
    new JavaTestKit(actorSystem) {

        {
            ActorGateway jobManager = null;
            ActorGateway taskManager1 = null;
            ActorGateway taskManager2 = null;
            try {
                // a simple JobManager
                jobManager = createJobManager(actorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), config);
                startResourceManager(config, jobManager.actor());
                // start two TaskManagers. it will automatically try to register
                taskManager1 = createTaskManager(actorSystem, jobManager, config, true, false);
                taskManager2 = createTaskManager(actorSystem, jobManager, config, true, false);
                // check that the TaskManagers are registered
                Future<Object> responseFuture1 = taskManager1.ask(TaskManagerMessages.getNotifyWhenRegisteredAtJobManagerMessage(), timeout);
                Future<Object> responseFuture2 = taskManager2.ask(TaskManagerMessages.getNotifyWhenRegisteredAtJobManagerMessage(), timeout);
                Object response1 = Await.result(responseFuture1, timeout);
                Object response2 = Await.result(responseFuture2, timeout);
                // this is a hack to work around the way Java can interact with scala case objects
                Class<?> confirmClass = TaskManagerMessages.getRegisteredAtJobManagerMessage().getClass();
                assertTrue(response1 != null && confirmClass.isAssignableFrom(response1.getClass()));
                assertTrue(response2 != null && confirmClass.isAssignableFrom(response2.getClass()));
                // check that the JobManager has 2 TaskManagers registered
                Future<Object> numTaskManagersFuture = jobManager.ask(JobManagerMessages.getRequestNumberRegisteredTaskManager(), timeout);
                Integer count = (Integer) Await.result(numTaskManagersFuture, timeout);
                assertEquals(2, count.intValue());
            } catch (Exception e) {
                e.printStackTrace();
                fail(e.getMessage());
            } finally {
                stopActor(taskManager1);
                stopActor(taskManager2);
                stopActor(jobManager);
            }
        }
    };
}
Also used : ActorGateway(org.apache.flink.runtime.instance.ActorGateway) JavaTestKit(akka.testkit.JavaTestKit) InvalidActorNameException(akka.actor.InvalidActorNameException) Test(org.junit.Test)

Example 84 with ActorGateway

use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.

the class TaskManagerRegistrationTest method testTaskManagerResumesConnectAfterJobManagerFailure.

/**
	 * Validate that the TaskManager attempts to re-connect after it lost the connection
	 * to the JobManager.
	 */
@Test
public void testTaskManagerResumesConnectAfterJobManagerFailure() {
    new JavaTestKit(actorSystem) {

        {
            ActorGateway fakeJobManager1Gateway = null;
            ActorGateway fakeJobManager2Gateway = null;
            ActorGateway taskManagerGateway = null;
            final String JOB_MANAGER_NAME = "ForwardingJobManager";
            try {
                fakeJobManager1Gateway = TestingUtils.createForwardingActor(actorSystem, getTestActor(), Option.apply(JOB_MANAGER_NAME));
                final ActorGateway fakeJM1Gateway = fakeJobManager1Gateway;
                // we make the test actor (the test kit) the JobManager to intercept
                // the messages
                taskManagerGateway = createTaskManager(actorSystem, fakeJobManager1Gateway, config, true, false);
                final ActorGateway tm = taskManagerGateway;
                // validate initial registration
                new Within(timeout) {

                    @Override
                    protected void run() {
                        // the TaskManager should try to register
                        expectMsgClass(RegisterTaskManager.class);
                        // we accept the registration
                        tm.tell(new AcknowledgeRegistration(new InstanceID(), 45234), fakeJM1Gateway);
                    }
                };
                // kill the first forwarding JobManager
                watch(fakeJobManager1Gateway.actor());
                stopActor(fakeJobManager1Gateway.actor());
                final ActorGateway gateway = fakeJobManager1Gateway;
                new Within(timeout) {

                    @Override
                    protected void run() {
                        Object message = null;
                        // are queued up in the testing actor's mailbox
                        while (message == null || !(message instanceof Terminated)) {
                            message = receiveOne(timeout);
                        }
                        Terminated terminatedMessage = (Terminated) message;
                        assertEquals(gateway.actor(), terminatedMessage.actor());
                    }
                };
                fakeJobManager1Gateway = null;
                // now start the second fake JobManager and expect that
                // the TaskManager registers again
                // the second fake JM needs to have the same actor URL
                // since we cannot reliably wait until the actor is unregistered (name is
                // available again) we loop with multiple tries for 20 seconds
                long deadline = 20000000000L + System.nanoTime();
                do {
                    try {
                        fakeJobManager2Gateway = TestingUtils.createForwardingActor(actorSystem, getTestActor(), Option.apply(JOB_MANAGER_NAME));
                    } catch (InvalidActorNameException e) {
                        // wait and retry
                        Thread.sleep(100);
                    }
                } while (fakeJobManager2Gateway == null && System.nanoTime() < deadline);
                final ActorGateway fakeJM2GatewayClosure = fakeJobManager2Gateway;
                // expect the next registration
                new Within(timeout) {

                    @Override
                    protected void run() {
                        expectMsgClass(RegisterTaskManager.class);
                        // we accept the registration
                        tm.tell(new AcknowledgeRegistration(new InstanceID(), 45234), fakeJM2GatewayClosure);
                    }
                };
            } catch (Throwable e) {
                e.printStackTrace();
                fail(e.getMessage());
            } finally {
                stopActor(taskManagerGateway);
                stopActor(fakeJobManager1Gateway);
                stopActor(fakeJobManager2Gateway);
            }
        }
    };
}
Also used : InvalidActorNameException(akka.actor.InvalidActorNameException) AcknowledgeRegistration(org.apache.flink.runtime.messages.RegistrationMessages.AcknowledgeRegistration) InstanceID(org.apache.flink.runtime.instance.InstanceID) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) Terminated(akka.actor.Terminated) JavaTestKit(akka.testkit.JavaTestKit) Test(org.junit.Test)

Example 85 with ActorGateway

use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.

the class TaskManagerRegistrationTest method testCheckForValidRegistrationSessionIDs.

@Test
public void testCheckForValidRegistrationSessionIDs() {
    new JavaTestKit(actorSystem) {

        {
            ActorGateway taskManagerGateway = null;
            try {
                // we make the test actor (the test kit) the JobManager to intercept
                // the messages
                taskManagerGateway = createTaskManager(actorSystem, getTestActor(), config, true, false);
                final ActorRef taskManager = taskManagerGateway.actor();
                final UUID falseLeaderSessionID = UUID.randomUUID();
                final UUID trueLeaderSessionID = null;
                new Within(timeout) {

                    @Override
                    protected void run() {
                        taskManager.tell(TaskManagerMessages.getNotifyWhenRegisteredAtJobManagerMessage(), getTestActor());
                        // the TaskManager should try to register
                        LeaderSessionMessage lsm = expectMsgClass(LeaderSessionMessage.class);
                        assertTrue(lsm.leaderSessionID() == trueLeaderSessionID);
                        assertTrue(lsm.message() instanceof RegisterTaskManager);
                        final ActorRef tm = getLastSender();
                        // This AcknowledgeRegistration message should be discarded because the
                        // registration session ID is wrong
                        tm.tell(new LeaderSessionMessage(falseLeaderSessionID, new AcknowledgeRegistration(new InstanceID(), 1)), getTestActor());
                        // Valid AcknowledgeRegistration message
                        tm.tell(new LeaderSessionMessage(trueLeaderSessionID, new AcknowledgeRegistration(new InstanceID(), 1)), getTestActor());
                        Object message = null;
                        Object confirmMessageClass = TaskManagerMessages.getRegisteredAtJobManagerMessage().getClass();
                        while (message == null || !(message.getClass().equals(confirmMessageClass))) {
                            message = receiveOne(TestingUtils.TESTING_DURATION());
                        }
                        tm.tell(JobManagerMessages.getRequestLeaderSessionID(), getTestActor());
                        expectMsgEquals(new JobManagerMessages.ResponseLeaderSessionID(trueLeaderSessionID));
                    }
                };
            } catch (Throwable e) {
                e.printStackTrace();
                fail(e.getMessage());
            } finally {
                stopActor(taskManagerGateway);
            }
        }
    };
}
Also used : LeaderSessionMessage(org.apache.flink.runtime.messages.JobManagerMessages.LeaderSessionMessage) RegisterTaskManager(org.apache.flink.runtime.messages.RegistrationMessages.RegisterTaskManager) InstanceID(org.apache.flink.runtime.instance.InstanceID) ActorRef(akka.actor.ActorRef) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) AcknowledgeRegistration(org.apache.flink.runtime.messages.RegistrationMessages.AcknowledgeRegistration) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) UUID(java.util.UUID) JavaTestKit(akka.testkit.JavaTestKit) Test(org.junit.Test)

Aggregations

ActorGateway (org.apache.flink.runtime.instance.ActorGateway)102 Test (org.junit.Test)81 Configuration (org.apache.flink.configuration.Configuration)44 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)41 FiniteDuration (scala.concurrent.duration.FiniteDuration)37 JobID (org.apache.flink.api.common.JobID)36 JavaTestKit (akka.testkit.JavaTestKit)34 ActorRef (akka.actor.ActorRef)30 IOException (java.io.IOException)26 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)25 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)22 JobManagerMessages (org.apache.flink.runtime.messages.JobManagerMessages)22 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)20 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)17 TaskManagerServicesConfiguration (org.apache.flink.runtime.taskexecutor.TaskManagerServicesConfiguration)16 SubmitJob (org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob)15 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)14 TaskDeploymentDescriptor (org.apache.flink.runtime.deployment.TaskDeploymentDescriptor)14 TriggerSavepoint (org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint)13 SubmitTask (org.apache.flink.runtime.messages.TaskMessages.SubmitTask)13