Search in sources :

Example 11 with InstanceID

use of org.apache.flink.runtime.instance.InstanceID in project flink by apache.

the class TaskManagerRegistrationTest method testTaskManagerResumesConnectAfterJobManagerFailure.

/**
	 * Validate that the TaskManager attempts to re-connect after it lost the connection
	 * to the JobManager.
	 */
@Test
public void testTaskManagerResumesConnectAfterJobManagerFailure() {
    new JavaTestKit(actorSystem) {

        {
            ActorGateway fakeJobManager1Gateway = null;
            ActorGateway fakeJobManager2Gateway = null;
            ActorGateway taskManagerGateway = null;
            final String JOB_MANAGER_NAME = "ForwardingJobManager";
            try {
                fakeJobManager1Gateway = TestingUtils.createForwardingActor(actorSystem, getTestActor(), Option.apply(JOB_MANAGER_NAME));
                final ActorGateway fakeJM1Gateway = fakeJobManager1Gateway;
                // we make the test actor (the test kit) the JobManager to intercept
                // the messages
                taskManagerGateway = createTaskManager(actorSystem, fakeJobManager1Gateway, config, true, false);
                final ActorGateway tm = taskManagerGateway;
                // validate initial registration
                new Within(timeout) {

                    @Override
                    protected void run() {
                        // the TaskManager should try to register
                        expectMsgClass(RegisterTaskManager.class);
                        // we accept the registration
                        tm.tell(new AcknowledgeRegistration(new InstanceID(), 45234), fakeJM1Gateway);
                    }
                };
                // kill the first forwarding JobManager
                watch(fakeJobManager1Gateway.actor());
                stopActor(fakeJobManager1Gateway.actor());
                final ActorGateway gateway = fakeJobManager1Gateway;
                new Within(timeout) {

                    @Override
                    protected void run() {
                        Object message = null;
                        // are queued up in the testing actor's mailbox
                        while (message == null || !(message instanceof Terminated)) {
                            message = receiveOne(timeout);
                        }
                        Terminated terminatedMessage = (Terminated) message;
                        assertEquals(gateway.actor(), terminatedMessage.actor());
                    }
                };
                fakeJobManager1Gateway = null;
                // now start the second fake JobManager and expect that
                // the TaskManager registers again
                // the second fake JM needs to have the same actor URL
                // since we cannot reliably wait until the actor is unregistered (name is
                // available again) we loop with multiple tries for 20 seconds
                long deadline = 20000000000L + System.nanoTime();
                do {
                    try {
                        fakeJobManager2Gateway = TestingUtils.createForwardingActor(actorSystem, getTestActor(), Option.apply(JOB_MANAGER_NAME));
                    } catch (InvalidActorNameException e) {
                        // wait and retry
                        Thread.sleep(100);
                    }
                } while (fakeJobManager2Gateway == null && System.nanoTime() < deadline);
                final ActorGateway fakeJM2GatewayClosure = fakeJobManager2Gateway;
                // expect the next registration
                new Within(timeout) {

                    @Override
                    protected void run() {
                        expectMsgClass(RegisterTaskManager.class);
                        // we accept the registration
                        tm.tell(new AcknowledgeRegistration(new InstanceID(), 45234), fakeJM2GatewayClosure);
                    }
                };
            } catch (Throwable e) {
                e.printStackTrace();
                fail(e.getMessage());
            } finally {
                stopActor(taskManagerGateway);
                stopActor(fakeJobManager1Gateway);
                stopActor(fakeJobManager2Gateway);
            }
        }
    };
}
Also used : InvalidActorNameException(akka.actor.InvalidActorNameException) AcknowledgeRegistration(org.apache.flink.runtime.messages.RegistrationMessages.AcknowledgeRegistration) InstanceID(org.apache.flink.runtime.instance.InstanceID) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) Terminated(akka.actor.Terminated) JavaTestKit(akka.testkit.JavaTestKit) Test(org.junit.Test)

Example 12 with InstanceID

use of org.apache.flink.runtime.instance.InstanceID in project flink by apache.

the class TaskManagerRegistrationTest method testCheckForValidRegistrationSessionIDs.

@Test
public void testCheckForValidRegistrationSessionIDs() {
    new JavaTestKit(actorSystem) {

        {
            ActorGateway taskManagerGateway = null;
            try {
                // we make the test actor (the test kit) the JobManager to intercept
                // the messages
                taskManagerGateway = createTaskManager(actorSystem, getTestActor(), config, true, false);
                final ActorRef taskManager = taskManagerGateway.actor();
                final UUID falseLeaderSessionID = UUID.randomUUID();
                final UUID trueLeaderSessionID = null;
                new Within(timeout) {

                    @Override
                    protected void run() {
                        taskManager.tell(TaskManagerMessages.getNotifyWhenRegisteredAtJobManagerMessage(), getTestActor());
                        // the TaskManager should try to register
                        LeaderSessionMessage lsm = expectMsgClass(LeaderSessionMessage.class);
                        assertTrue(lsm.leaderSessionID() == trueLeaderSessionID);
                        assertTrue(lsm.message() instanceof RegisterTaskManager);
                        final ActorRef tm = getLastSender();
                        // This AcknowledgeRegistration message should be discarded because the
                        // registration session ID is wrong
                        tm.tell(new LeaderSessionMessage(falseLeaderSessionID, new AcknowledgeRegistration(new InstanceID(), 1)), getTestActor());
                        // Valid AcknowledgeRegistration message
                        tm.tell(new LeaderSessionMessage(trueLeaderSessionID, new AcknowledgeRegistration(new InstanceID(), 1)), getTestActor());
                        Object message = null;
                        Object confirmMessageClass = TaskManagerMessages.getRegisteredAtJobManagerMessage().getClass();
                        while (message == null || !(message.getClass().equals(confirmMessageClass))) {
                            message = receiveOne(TestingUtils.TESTING_DURATION());
                        }
                        tm.tell(JobManagerMessages.getRequestLeaderSessionID(), getTestActor());
                        expectMsgEquals(new JobManagerMessages.ResponseLeaderSessionID(trueLeaderSessionID));
                    }
                };
            } catch (Throwable e) {
                e.printStackTrace();
                fail(e.getMessage());
            } finally {
                stopActor(taskManagerGateway);
            }
        }
    };
}
Also used : LeaderSessionMessage(org.apache.flink.runtime.messages.JobManagerMessages.LeaderSessionMessage) RegisterTaskManager(org.apache.flink.runtime.messages.RegistrationMessages.RegisterTaskManager) InstanceID(org.apache.flink.runtime.instance.InstanceID) ActorRef(akka.actor.ActorRef) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) AcknowledgeRegistration(org.apache.flink.runtime.messages.RegistrationMessages.AcknowledgeRegistration) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) UUID(java.util.UUID) JavaTestKit(akka.testkit.JavaTestKit) Test(org.junit.Test)

Example 13 with InstanceID

use of org.apache.flink.runtime.instance.InstanceID in project flink by apache.

the class SchedulerTestUtils method getRandomInstance.

// --------------------------------------------------------------------------------------------
public static Instance getRandomInstance(int numSlots) {
    if (numSlots <= 0) {
        throw new IllegalArgumentException();
    }
    final ResourceID resourceID = ResourceID.generate();
    final InetAddress address;
    try {
        address = InetAddress.getByName("127.0.0.1");
    } catch (UnknownHostException e) {
        throw new RuntimeException("Test could not create IP address for localhost loopback.");
    }
    int dataPort = port.getAndIncrement();
    TaskManagerLocation ci = new TaskManagerLocation(resourceID, address, dataPort);
    final long GB = 1024L * 1024 * 1024;
    HardwareDescription resources = new HardwareDescription(4, 4 * GB, 3 * GB, 2 * GB);
    return new Instance(new ActorTaskManagerGateway(DummyActorGateway.INSTANCE), ci, new InstanceID(), resources, numSlots);
}
Also used : HardwareDescription(org.apache.flink.runtime.instance.HardwareDescription) UnknownHostException(java.net.UnknownHostException) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Instance(org.apache.flink.runtime.instance.Instance) InstanceID(org.apache.flink.runtime.instance.InstanceID) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) InetAddress(java.net.InetAddress) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway)

Aggregations

InstanceID (org.apache.flink.runtime.instance.InstanceID)13 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)8 Test (org.junit.Test)8 JobID (org.apache.flink.api.common.JobID)5 Configuration (org.apache.flink.configuration.Configuration)5 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)5 Instance (org.apache.flink.runtime.instance.Instance)5 TaskManagerLocation (org.apache.flink.runtime.taskmanager.TaskManagerLocation)5 UUID (java.util.UUID)4 Time (org.apache.flink.api.common.time.Time)4 JobManagerMessages (org.apache.flink.runtime.messages.JobManagerMessages)4 JavaTestKit (akka.testkit.JavaTestKit)3 BlobKey (org.apache.flink.runtime.blob.BlobKey)3 FlinkCompletableFuture (org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture)3 ActorRef (akka.actor.ActorRef)2 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)2 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)2 BroadcastVariableManager (org.apache.flink.runtime.broadcast.BroadcastVariableManager)2