use of scala.concurrent.duration.FiniteDuration in project flink by apache.
the class JobClientActorTest method testConnectionTimeoutAfterJobRegistration.
/** Tests that a {@link JobClientActorConnectionTimeoutException}
* is thrown after a successful registration of the client at the JobManager.
*/
@Test(expected = JobClientActorConnectionTimeoutException.class)
public void testConnectionTimeoutAfterJobRegistration() throws Exception {
FiniteDuration jobClientActorTimeout = new FiniteDuration(5, TimeUnit.SECONDS);
FiniteDuration timeout = jobClientActorTimeout.$times(2);
UUID leaderSessionID = UUID.randomUUID();
ActorRef jobManager = system.actorOf(Props.create(JobAcceptingActor.class, leaderSessionID));
TestingLeaderRetrievalService testingLeaderRetrievalService = new TestingLeaderRetrievalService(jobManager.path().toString(), leaderSessionID);
Props jobClientActorProps = JobAttachmentClientActor.createActorProps(testingLeaderRetrievalService, jobClientActorTimeout, false);
ActorRef jobClientActor = system.actorOf(jobClientActorProps);
Future<Object> jobExecutionResult = Patterns.ask(jobClientActor, new AttachToJobAndWait(testJobGraph.getJobID()), new Timeout(timeout));
Future<Object> waitFuture = Patterns.ask(jobManager, new RegisterTest(), new Timeout(timeout));
Await.result(waitFuture, timeout);
jobManager.tell(PoisonPill.getInstance(), ActorRef.noSender());
Await.result(jobExecutionResult, timeout);
}
use of scala.concurrent.duration.FiniteDuration in project flink by apache.
the class JobClientActorTest method testGuaranteedAnswerIfJobClientDies.
/** Tests that JobClient throws an Exception if the JobClientActor dies and can't answer to
* {@link akka.actor.Identify} message anymore.
*/
@Test
public void testGuaranteedAnswerIfJobClientDies() throws Exception {
FiniteDuration timeout = new FiniteDuration(2, TimeUnit.SECONDS);
UUID leaderSessionID = UUID.randomUUID();
ActorRef jobManager = system.actorOf(Props.create(JobAcceptingActor.class, leaderSessionID));
TestingLeaderRetrievalService testingLeaderRetrievalService = new TestingLeaderRetrievalService(jobManager.path().toString(), leaderSessionID);
JobListeningContext jobListeningContext = JobClient.submitJob(system, clientConfig, testingLeaderRetrievalService, testJobGraph, timeout, false, getClass().getClassLoader());
Future<Object> waitFuture = Patterns.ask(jobManager, new RegisterTest(), new Timeout(timeout));
Await.result(waitFuture, timeout);
// kill the job client actor which has been registered at the JobManager
jobListeningContext.getJobClientActor().tell(PoisonPill.getInstance(), ActorRef.noSender());
try {
// should not block but return an error
JobClient.awaitJobResult(jobListeningContext);
Assert.fail();
} catch (JobExecutionException e) {
// this is what we want
}
}
use of scala.concurrent.duration.FiniteDuration in project flink by apache.
the class JobClientActorTest method testConnectionTimeoutWithoutJobManagerForSubmission.
/** Tests that a {@link org.apache.flink.runtime.client.JobClientActorConnectionTimeoutException}
* is thrown when the JobSubmissionClientActor wants to submit a job but has not connected to a JobManager.
*
* @throws Exception
*/
@Test(expected = JobClientActorConnectionTimeoutException.class)
public void testConnectionTimeoutWithoutJobManagerForSubmission() throws Exception {
FiniteDuration jobClientActorTimeout = new FiniteDuration(5, TimeUnit.SECONDS);
FiniteDuration timeout = jobClientActorTimeout.$times(2);
TestingLeaderRetrievalService testingLeaderRetrievalService = new TestingLeaderRetrievalService();
Props jobClientActorProps = JobSubmissionClientActor.createActorProps(testingLeaderRetrievalService, jobClientActorTimeout, false, clientConfig);
ActorRef jobClientActor = system.actorOf(jobClientActorProps);
Future<Object> jobExecutionResult = Patterns.ask(jobClientActor, new JobClientMessages.SubmitJobAndWait(testJobGraph), new Timeout(timeout));
Await.result(jobExecutionResult, timeout);
}
use of scala.concurrent.duration.FiniteDuration in project flink by apache.
the class CoordinatorShutdownTest method testCoordinatorShutsDownOnFailure.
@Test
public void testCoordinatorShutsDownOnFailure() {
LocalFlinkMiniCluster cluster = null;
try {
Configuration config = new Configuration();
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 1);
cluster = new LocalFlinkMiniCluster(config, true);
cluster.start();
// build a test graph with snapshotting enabled
JobVertex vertex = new JobVertex("Test Vertex");
vertex.setInvokableClass(FailingBlockingInvokable.class);
List<JobVertexID> vertexIdList = Collections.singletonList(vertex.getID());
JobGraph testGraph = new JobGraph("test job", vertex);
testGraph.setSnapshotSettings(new JobSnapshottingSettings(vertexIdList, vertexIdList, vertexIdList, 5000, 60000, 0L, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true));
ActorGateway jmGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
FiniteDuration timeout = new FiniteDuration(60, TimeUnit.SECONDS);
JobManagerMessages.SubmitJob submitMessage = new JobManagerMessages.SubmitJob(testGraph, ListeningBehaviour.EXECUTION_RESULT);
// submit is successful, but then the job blocks due to the invokable
Future<Object> submitFuture = jmGateway.ask(submitMessage, timeout);
Await.result(submitFuture, timeout);
// get the execution graph and store the ExecutionGraph reference
Future<Object> jobRequestFuture = jmGateway.ask(new JobManagerMessages.RequestJob(testGraph.getJobID()), timeout);
ExecutionGraph graph = (ExecutionGraph) ((JobManagerMessages.JobFound) Await.result(jobRequestFuture, timeout)).executionGraph();
assertNotNull(graph);
FailingBlockingInvokable.unblock();
graph.waitUntilFinished();
// verify that the coordinator was shut down
CheckpointCoordinator coord = graph.getCheckpointCoordinator();
assertTrue(coord == null || coord.isShutdown());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
if (cluster != null) {
cluster.shutdown();
cluster.awaitTermination();
}
}
}
use of scala.concurrent.duration.FiniteDuration in project flink by apache.
the class LeaderChangeStateCleanupTest method testReelectionOfSameJobManager.
/**
* Tests that the same JobManager can be reelected as the leader. Even though, the same JM
* is elected as the next leader, all currently running jobs should be canceled properly and
* all TMs should disconnect from the leader and then reconnect to it.
*/
@Test
public void testReelectionOfSameJobManager() throws Exception {
UUID leaderSessionID = UUID.randomUUID();
UUID newLeaderSessionID = UUID.randomUUID();
FiniteDuration shortTimeout = new FiniteDuration(10, TimeUnit.SECONDS);
cluster.grantLeadership(0, leaderSessionID);
cluster.notifyRetrievalListeners(0, leaderSessionID);
cluster.waitForTaskManagersToBeRegistered(timeout);
// submit blocking job
cluster.submitJobDetached(job);
ActorGateway jm = cluster.getLeaderGateway(timeout);
Future<Object> wait = jm.ask(new WaitForAllVerticesToBeRunningOrFinished(job.getJobID()), timeout);
Await.ready(wait, timeout);
Future<Object> jobRemoval = jm.ask(new NotifyWhenJobRemoved(job.getJobID()), timeout);
LOG.info("Make JM(0) again the leader. This should first revoke the leadership.");
// make JM(0) again the leader --> this implies first a leadership revocation
cluster.grantLeadership(0, newLeaderSessionID);
Await.ready(jobRemoval, timeout);
LOG.info("Job removed.");
// session ID
try {
cluster.waitForTaskManagersToBeRegistered(shortTimeout);
fail("TaskManager should not be able to register at JobManager.");
} catch (TimeoutException e) {
// expected exception since the TMs have still the old leader session ID
}
LOG.info("Notify TMs about the new (old) leader.");
// notify the TMs about the new (old) leader
cluster.notifyRetrievalListeners(0, newLeaderSessionID);
cluster.waitForTaskManagersToBeRegistered(timeout);
ActorGateway leaderGateway = cluster.getLeaderGateway(timeout);
// try to resubmit now the non-blocking job, it should complete successfully
Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(false);
cluster.submitJobAndWait(job, false, timeout, new TestingLeaderRetrievalService(leaderGateway.path(), leaderGateway.leaderSessionID()));
}
Aggregations