use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.
the class LeaderChangeStateCleanupTest method testReelectionOfSameJobManager.
/**
* Tests that the same JobManager can be reelected as the leader. Even though, the same JM
* is elected as the next leader, all currently running jobs should be canceled properly and
* all TMs should disconnect from the leader and then reconnect to it.
*/
@Test
public void testReelectionOfSameJobManager() throws Exception {
UUID leaderSessionID = UUID.randomUUID();
UUID newLeaderSessionID = UUID.randomUUID();
FiniteDuration shortTimeout = new FiniteDuration(10, TimeUnit.SECONDS);
cluster.grantLeadership(0, leaderSessionID);
cluster.notifyRetrievalListeners(0, leaderSessionID);
cluster.waitForTaskManagersToBeRegistered(timeout);
// submit blocking job
cluster.submitJobDetached(job);
ActorGateway jm = cluster.getLeaderGateway(timeout);
Future<Object> wait = jm.ask(new WaitForAllVerticesToBeRunningOrFinished(job.getJobID()), timeout);
Await.ready(wait, timeout);
Future<Object> jobRemoval = jm.ask(new NotifyWhenJobRemoved(job.getJobID()), timeout);
LOG.info("Make JM(0) again the leader. This should first revoke the leadership.");
// make JM(0) again the leader --> this implies first a leadership revocation
cluster.grantLeadership(0, newLeaderSessionID);
Await.ready(jobRemoval, timeout);
LOG.info("Job removed.");
// session ID
try {
cluster.waitForTaskManagersToBeRegistered(shortTimeout);
fail("TaskManager should not be able to register at JobManager.");
} catch (TimeoutException e) {
// expected exception since the TMs have still the old leader session ID
}
LOG.info("Notify TMs about the new (old) leader.");
// notify the TMs about the new (old) leader
cluster.notifyRetrievalListeners(0, newLeaderSessionID);
cluster.waitForTaskManagersToBeRegistered(timeout);
ActorGateway leaderGateway = cluster.getLeaderGateway(timeout);
// try to resubmit now the non-blocking job, it should complete successfully
Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(false);
cluster.submitJobAndWait(job, false, timeout, new TestingLeaderRetrievalService(leaderGateway.path(), leaderGateway.leaderSessionID()));
}
use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.
the class LeaderChangeStateCleanupTest method testStateCleanupAfterNewLeaderElectionAndListenerNotification.
/**
* Tests that a job is properly canceled in the case of a leader change. In such an event all
* TaskManagers have to disconnect from the previous leader and connect to the newly elected
* leader.
*/
@Test
public void testStateCleanupAfterNewLeaderElectionAndListenerNotification() throws Exception {
UUID leaderSessionID1 = UUID.randomUUID();
UUID leaderSessionID2 = UUID.randomUUID();
// first make JM(0) the leader
cluster.grantLeadership(0, leaderSessionID1);
// notify all listeners
cluster.notifyRetrievalListeners(0, leaderSessionID1);
cluster.waitForTaskManagersToBeRegistered(timeout);
// submit blocking job so that it is not finished when we cancel it
cluster.submitJobDetached(job);
ActorGateway jm = cluster.getLeaderGateway(timeout);
Future<Object> wait = jm.ask(new WaitForAllVerticesToBeRunningOrFinished(job.getJobID()), timeout);
Await.ready(wait, timeout);
Future<Object> jobRemoval = jm.ask(new NotifyWhenJobRemoved(job.getJobID()), timeout);
// make the JM(1) the new leader
cluster.grantLeadership(1, leaderSessionID2);
// notify all listeners about the event
cluster.notifyRetrievalListeners(1, leaderSessionID2);
Await.ready(jobRemoval, timeout);
cluster.waitForTaskManagersToBeRegistered(timeout);
ActorGateway jm2 = cluster.getLeaderGateway(timeout);
Future<Object> futureNumberSlots = jm2.ask(JobManagerMessages.getRequestTotalNumberOfSlots(), timeout);
// check that all TMs have registered at the new leader
int numberSlots = (Integer) Await.result(futureNumberSlots, timeout);
assertEquals(parallelism, numberSlots);
// try to resubmit now the non-blocking job, it should complete successfully
Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(false);
cluster.submitJobAndWait(job, false, timeout, new TestingLeaderRetrievalService(jm2.path(), jm2.leaderSessionID()));
}
use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.
the class TaskManagerTest method testUpdateTaskInputPartitionsFailure.
/**
* Tests that the TaskManager sends a proper exception back to the sender if the trigger stack
* trace message fails.
*/
@Test
public void testUpdateTaskInputPartitionsFailure() throws Exception {
ActorGateway jobManager = null;
ActorGateway taskManager = null;
try {
final ExecutionAttemptID executionAttemptId = new ExecutionAttemptID();
ActorRef jm = system.actorOf(Props.create(SimpleJobManager.class, leaderSessionID));
jobManager = new AkkaActorGateway(jm, leaderSessionID);
taskManager = TestingUtils.createTaskManager(system, jobManager, new Configuration(), true, true);
TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(new JobID(), "test job", new JobVertexID(), executionAttemptId, new SerializedValue<>(new ExecutionConfig()), "test task", 1, 0, 1, 0, new Configuration(), new Configuration(), BlockingNoOpInvokable.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList(), 0);
Future<Object> submitResponse = taskManager.ask(new SubmitTask(tdd), timeout);
Await.result(submitResponse, timeout);
Future<Object> partitionUpdateResponse = taskManager.ask(new TaskMessages.UpdateTaskSinglePartitionInfo(executionAttemptId, new IntermediateDataSetID(), new InputChannelDeploymentDescriptor(new ResultPartitionID(), ResultPartitionLocation.createLocal())), timeout);
try {
Await.result(partitionUpdateResponse, timeout);
fail("The update task input partitions message should have failed.");
} catch (Exception e) {
// expected
}
} finally {
TestingUtils.stopActor(jobManager);
TestingUtils.stopActor(taskManager);
}
}
use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.
the class TaskManagerTest method testGateChannelEdgeMismatch.
@Test
public void testGateChannelEdgeMismatch() {
new JavaTestKit(system) {
{
ActorGateway jobManager = null;
ActorGateway taskManager = null;
final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), leaderSessionID);
try {
ActorRef jm = system.actorOf(Props.create(SimpleJobManager.class, leaderSessionID));
jobManager = new AkkaActorGateway(jm, leaderSessionID);
taskManager = TestingUtils.createTaskManager(system, jobManager, new Configuration(), true, true);
final ActorGateway tm = taskManager;
final JobID jid = new JobID();
JobVertexID vid1 = new JobVertexID();
JobVertexID vid2 = new JobVertexID();
final ExecutionAttemptID eid1 = new ExecutionAttemptID();
final ExecutionAttemptID eid2 = new ExecutionAttemptID();
final TaskDeploymentDescriptor tdd1 = createTaskDeploymentDescriptor(jid, "TestJob", vid1, eid1, new SerializedValue<>(new ExecutionConfig()), "Sender", 1, 0, 1, 0, new Configuration(), new Configuration(), Tasks.Sender.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), new ArrayList<BlobKey>(), Collections.<URL>emptyList(), 0);
final TaskDeploymentDescriptor tdd2 = createTaskDeploymentDescriptor(jid, "TestJob", vid2, eid2, new SerializedValue<>(new ExecutionConfig()), "Receiver", 7, 2, 7, 0, new Configuration(), new Configuration(), Tasks.Receiver.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), new ArrayList<BlobKey>(), Collections.<URL>emptyList(), 0);
new Within(d) {
@Override
protected void run() {
try {
tm.tell(new SubmitTask(tdd1), testActorGateway);
tm.tell(new SubmitTask(tdd2), testActorGateway);
expectMsgEquals(Acknowledge.get());
expectMsgEquals(Acknowledge.get());
tm.tell(new TestingTaskManagerMessages.NotifyWhenTaskRemoved(eid1), testActorGateway);
tm.tell(new TestingTaskManagerMessages.NotifyWhenTaskRemoved(eid2), testActorGateway);
expectMsgEquals(true);
expectMsgEquals(true);
tm.tell(TestingTaskManagerMessages.getRequestRunningTasksMessage(), testActorGateway);
Map<ExecutionAttemptID, Task> tasks = expectMsgClass(TestingTaskManagerMessages.ResponseRunningTasks.class).asJava();
assertEquals(0, tasks.size());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
};
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
// shut down the actors
TestingUtils.stopActor(taskManager);
TestingUtils.stopActor(jobManager);
}
}
};
}
use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.
the class TaskManagerTest method testStackTraceSampleFailure.
/**
* Tests that the TaskManager sends a proper exception back to the sender if the trigger stack
* trace message fails.
*/
@Test
public void testStackTraceSampleFailure() throws Exception {
ActorGateway jobManager = null;
ActorGateway taskManager = null;
try {
ActorRef jm = system.actorOf(Props.create(SimpleJobManager.class, leaderSessionID));
jobManager = new AkkaActorGateway(jm, leaderSessionID);
taskManager = TestingUtils.createTaskManager(system, jobManager, new Configuration(), true, true);
Future<Object> stackTraceResponse = taskManager.ask(new TriggerStackTraceSample(0, new ExecutionAttemptID(), 0, Time.milliseconds(1L), 0), timeout);
try {
Await.result(stackTraceResponse, timeout);
fail("The trigger stack trace message should have failed.");
} catch (IllegalStateException e) {
// expected
}
} finally {
TestingUtils.stopActor(jobManager);
TestingUtils.stopActor(taskManager);
}
}
Aggregations