Search in sources :

Example 66 with ActorGateway

use of org.apache.flink.runtime.instance.ActorGateway in project zeppelin by apache.

the class FlinkInterpreter method cancelJobLocalMode.

private void cancelJobLocalMode(JobID jobID) {
    FiniteDuration timeout = AkkaUtils.getTimeout(this.localFlinkCluster.configuration());
    ActorGateway leader = this.localFlinkCluster.getLeaderGateway(timeout);
    leader.ask(new JobManagerMessages.CancelJob(jobID), timeout);
}
Also used : ActorGateway(org.apache.flink.runtime.instance.ActorGateway) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) FiniteDuration(scala.concurrent.duration.FiniteDuration)

Example 67 with ActorGateway

use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.

the class CoordinatorShutdownTest method testCoordinatorShutsDownOnFailure.

@Test
public void testCoordinatorShutsDownOnFailure() {
    LocalFlinkMiniCluster cluster = null;
    try {
        Configuration config = new Configuration();
        config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
        config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 1);
        cluster = new LocalFlinkMiniCluster(config, true);
        cluster.start();
        // build a test graph with snapshotting enabled
        JobVertex vertex = new JobVertex("Test Vertex");
        vertex.setInvokableClass(FailingBlockingInvokable.class);
        List<JobVertexID> vertexIdList = Collections.singletonList(vertex.getID());
        JobGraph testGraph = new JobGraph("test job", vertex);
        testGraph.setSnapshotSettings(new JobSnapshottingSettings(vertexIdList, vertexIdList, vertexIdList, 5000, 60000, 0L, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true));
        ActorGateway jmGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
        FiniteDuration timeout = new FiniteDuration(60, TimeUnit.SECONDS);
        JobManagerMessages.SubmitJob submitMessage = new JobManagerMessages.SubmitJob(testGraph, ListeningBehaviour.EXECUTION_RESULT);
        // submit is successful, but then the job blocks due to the invokable
        Future<Object> submitFuture = jmGateway.ask(submitMessage, timeout);
        Await.result(submitFuture, timeout);
        // get the execution graph and store the ExecutionGraph reference
        Future<Object> jobRequestFuture = jmGateway.ask(new JobManagerMessages.RequestJob(testGraph.getJobID()), timeout);
        ExecutionGraph graph = (ExecutionGraph) ((JobManagerMessages.JobFound) Await.result(jobRequestFuture, timeout)).executionGraph();
        assertNotNull(graph);
        FailingBlockingInvokable.unblock();
        graph.waitUntilFinished();
        // verify that the coordinator was shut down
        CheckpointCoordinator coord = graph.getCheckpointCoordinator();
        assertTrue(coord == null || coord.isShutdown());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    } finally {
        if (cluster != null) {
            cluster.shutdown();
            cluster.awaitTermination();
        }
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) JobSnapshottingSettings(org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) FiniteDuration(scala.concurrent.duration.FiniteDuration) LocalFlinkMiniCluster(org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Example 68 with ActorGateway

use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.

the class ClusterShutdownITCase method testClusterShutdownWithResourceManager.

/**
	 * Tests a faked cluster shutdown procedure with the ResourceManager.
	 */
@Test
public void testClusterShutdownWithResourceManager() {
    new JavaTestKit(system) {

        {
            new Within(duration("30 seconds")) {

                @Override
                protected void run() {
                    ActorGateway me = TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
                    // start job manager which doesn't shutdown the actor system
                    ActorGateway jobManager = TestingUtils.createJobManager(system, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), config, "jobmanager2");
                    // Tell the JobManager to inform us of shutdown actions
                    jobManager.tell(TestingMessages.getNotifyOfComponentShutdown(), me);
                    // Register a TaskManager
                    ActorGateway taskManager = TestingUtils.createTaskManager(system, jobManager, config, true, true);
                    // Tell the TaskManager to inform us of TaskManager shutdowns
                    taskManager.tell(TestingMessages.getNotifyOfComponentShutdown(), me);
                    // Start resource manager and let it register
                    ActorGateway resourceManager = TestingUtils.createResourceManager(system, jobManager.actor(), config);
                    // Tell the ResourceManager to inform us of ResourceManager shutdowns
                    resourceManager.tell(TestingMessages.getNotifyOfComponentShutdown(), me);
                    // notify about a resource manager registration at the job manager
                    resourceManager.tell(new TestingResourceManager.NotifyWhenResourceManagerConnected(), me);
                    // Wait for resource manager
                    expectMsgEquals(Acknowledge.get());
                    // Shutdown cluster with resource manager connected
                    jobManager.tell(new StopCluster(ApplicationStatus.SUCCEEDED, "Shutting down."), me);
                    expectMsgAllOf(new TestingMessages.ComponentShutdown(taskManager.actor()), new TestingMessages.ComponentShutdown(jobManager.actor()), new TestingMessages.ComponentShutdown(resourceManager.actor()), StopClusterSuccessful.getInstance());
                }
            };
        }
    };
}
Also used : TestingMessages(org.apache.flink.runtime.testingUtils.TestingMessages) TestingResourceManager(org.apache.flink.runtime.testutils.TestingResourceManager) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) StopCluster(org.apache.flink.runtime.clusterframework.messages.StopCluster) JavaTestKit(akka.testkit.JavaTestKit) Test(org.junit.Test)

Example 69 with ActorGateway

use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.

the class ResourceManagerITCase method testResourceManagerReconciliation.

/**
	 * Tests whether the resource manager connects and reconciles existing task managers.
	 */
@Test
public void testResourceManagerReconciliation() {
    new JavaTestKit(system) {

        {
            new Within(duration("10 seconds")) {

                @Override
                protected void run() {
                    ActorGateway jobManager = TestingUtils.createJobManager(system, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), config, "ReconciliationTest");
                    ActorGateway me = TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
                    // !! no resource manager started !!
                    ResourceID resourceID = ResourceID.generate();
                    TaskManagerLocation location = mock(TaskManagerLocation.class);
                    when(location.getResourceID()).thenReturn(resourceID);
                    HardwareDescription resourceProfile = HardwareDescription.extractFromSystem(1_000_000);
                    jobManager.tell(new RegistrationMessages.RegisterTaskManager(resourceID, location, resourceProfile, 1), me);
                    expectMsgClass(RegistrationMessages.AcknowledgeRegistration.class);
                    // now start the resource manager
                    ActorGateway resourceManager = TestingUtils.createResourceManager(system, jobManager.actor(), config);
                    // register at testing job manager to receive a message once a resource manager registers
                    resourceManager.tell(new TestingResourceManager.NotifyWhenResourceManagerConnected(), me);
                    // Wait for resource manager
                    expectMsgEquals(Acknowledge.get());
                    // check if we registered the task manager resource
                    resourceManager.tell(new TestingResourceManager.GetRegisteredResources(), me);
                    TestingResourceManager.GetRegisteredResourcesReply reply = expectMsgClass(TestingResourceManager.GetRegisteredResourcesReply.class);
                    assertEquals(1, reply.resources.size());
                    assertTrue(reply.resources.contains(resourceID));
                }
            };
        }
    };
}
Also used : RegistrationMessages(org.apache.flink.runtime.messages.RegistrationMessages) HardwareDescription(org.apache.flink.runtime.instance.HardwareDescription) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) TestingResourceManager(org.apache.flink.runtime.testutils.TestingResourceManager) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) JavaTestKit(akka.testkit.JavaTestKit) Test(org.junit.Test)

Example 70 with ActorGateway

use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.

the class LeaderChangeJobRecoveryTest method testNotRestartedWhenLosingLeadership.

/**
	 * Tests that the job is not restarted or at least terminates eventually in case that the
	 * JobManager loses its leadership.
	 *
	 * @throws Exception
	 */
@Test
public void testNotRestartedWhenLosingLeadership() throws Exception {
    UUID leaderSessionID = UUID.randomUUID();
    cluster.grantLeadership(0, leaderSessionID);
    cluster.notifyRetrievalListeners(0, leaderSessionID);
    cluster.waitForTaskManagersToBeRegistered(timeout);
    cluster.submitJobDetached(job);
    ActorGateway jm = cluster.getLeaderGateway(timeout);
    Future<Object> wait = jm.ask(new TestingJobManagerMessages.WaitForAllVerticesToBeRunningOrFinished(job.getJobID()), timeout);
    Await.ready(wait, timeout);
    Future<Object> futureExecutionGraph = jm.ask(new TestingJobManagerMessages.RequestExecutionGraph(job.getJobID()), timeout);
    TestingJobManagerMessages.ResponseExecutionGraph responseExecutionGraph = (TestingJobManagerMessages.ResponseExecutionGraph) Await.result(futureExecutionGraph, timeout);
    assertTrue(responseExecutionGraph instanceof TestingJobManagerMessages.ExecutionGraphFound);
    ExecutionGraph executionGraph = (ExecutionGraph) ((TestingJobManagerMessages.ExecutionGraphFound) responseExecutionGraph).executionGraph();
    TerminalJobStatusListener testListener = new TerminalJobStatusListener();
    executionGraph.registerJobStatusListener(testListener);
    cluster.revokeLeadership();
    testListener.waitForTerminalState(30000);
}
Also used : TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) TerminalJobStatusListener(org.apache.flink.runtime.executiongraph.TerminalJobStatusListener) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) UUID(java.util.UUID) Test(org.junit.Test)

Aggregations

ActorGateway (org.apache.flink.runtime.instance.ActorGateway)102 Test (org.junit.Test)81 Configuration (org.apache.flink.configuration.Configuration)44 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)41 FiniteDuration (scala.concurrent.duration.FiniteDuration)37 JobID (org.apache.flink.api.common.JobID)36 JavaTestKit (akka.testkit.JavaTestKit)34 ActorRef (akka.actor.ActorRef)30 IOException (java.io.IOException)26 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)25 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)22 JobManagerMessages (org.apache.flink.runtime.messages.JobManagerMessages)22 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)20 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)17 TaskManagerServicesConfiguration (org.apache.flink.runtime.taskexecutor.TaskManagerServicesConfiguration)16 SubmitJob (org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob)15 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)14 TaskDeploymentDescriptor (org.apache.flink.runtime.deployment.TaskDeploymentDescriptor)14 TriggerSavepoint (org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint)13 SubmitTask (org.apache.flink.runtime.messages.TaskMessages.SubmitTask)13