use of scala.concurrent.duration.FiniteDuration in project flink by apache.
the class JobClientActorRecoveryITCase method testJobClientRecovery.
/**
* Tests wether the JobClientActor can connect to a newly elected leading job manager to obtain
* the JobExecutionResult. The submitted job blocks for the first execution attempt. The
* leading job manager will be killed so that the second job manager will be elected as the
* leader. The newly elected leader has to retrieve the checkpointed job from ZooKeeper
* and continue its execution. This time, the job does not block and, thus, can be finished.
* The execution result should be sent to the JobClientActor which originally submitted the
* job.
*
* @throws Exception
*/
@Test
public void testJobClientRecovery() throws Exception {
File rootFolder = tempFolder.getRoot();
Configuration config = ZooKeeperTestUtils.createZooKeeperHAConfig(zkServer.getConnectString(), rootFolder.getPath());
config.setInteger(ConfigConstants.LOCAL_NUMBER_JOB_MANAGER, 2);
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
final TestingCluster cluster = new TestingCluster(config);
cluster.start();
JobVertex blockingVertex = new JobVertex("Blocking Vertex");
blockingVertex.setInvokableClass(BlockingTask.class);
blockingVertex.setParallelism(1);
final JobGraph jobGraph = new JobGraph("Blocking Test Job", blockingVertex);
final Promise<JobExecutionResult> promise = new scala.concurrent.impl.Promise.DefaultPromise<>();
Deadline deadline = new FiniteDuration(2, TimeUnit.MINUTES).fromNow();
try {
Thread submitter = new Thread(new Runnable() {
@Override
public void run() {
try {
JobExecutionResult result = cluster.submitJobAndWait(jobGraph, false);
promise.success(result);
} catch (Exception e) {
promise.failure(e);
}
}
});
submitter.start();
synchronized (BlockingTask.waitLock) {
while (BlockingTask.HasBlockedExecution < 1 && deadline.hasTimeLeft()) {
BlockingTask.waitLock.wait(deadline.timeLeft().toMillis());
}
}
if (deadline.isOverdue()) {
Assert.fail("The job has not blocked within the given deadline.");
}
ActorGateway gateway = cluster.getLeaderGateway(deadline.timeLeft());
gateway.tell(TestingJobManagerMessages.getDisablePostStop());
gateway.tell(PoisonPill.getInstance());
// if the job fails then an exception is thrown here
Await.result(promise.future(), deadline.timeLeft());
} finally {
cluster.shutdown();
}
}
use of scala.concurrent.duration.FiniteDuration in project flink by apache.
the class JobClientActorTest method testConnectionTimeoutAfterJobSubmission.
/** Tests that a {@link org.apache.flink.runtime.client.JobClientActorConnectionTimeoutException}
* is thrown after a successful job submission if the JobManager dies.
*
* @throws Exception
*/
@Test(expected = JobClientActorConnectionTimeoutException.class)
public void testConnectionTimeoutAfterJobSubmission() throws Exception {
FiniteDuration jobClientActorTimeout = new FiniteDuration(5, TimeUnit.SECONDS);
FiniteDuration timeout = jobClientActorTimeout.$times(2);
UUID leaderSessionID = UUID.randomUUID();
ActorRef jobManager = system.actorOf(Props.create(JobAcceptingActor.class, leaderSessionID));
TestingLeaderRetrievalService testingLeaderRetrievalService = new TestingLeaderRetrievalService(jobManager.path().toString(), leaderSessionID);
Props jobClientActorProps = JobSubmissionClientActor.createActorProps(testingLeaderRetrievalService, jobClientActorTimeout, false, clientConfig);
ActorRef jobClientActor = system.actorOf(jobClientActorProps);
Future<Object> jobExecutionResult = Patterns.ask(jobClientActor, new JobClientMessages.SubmitJobAndWait(testJobGraph), new Timeout(timeout));
Future<Object> waitFuture = Patterns.ask(jobManager, new RegisterTest(), new Timeout(timeout));
Await.result(waitFuture, timeout);
jobManager.tell(PoisonPill.getInstance(), ActorRef.noSender());
Await.result(jobExecutionResult, timeout);
}
use of scala.concurrent.duration.FiniteDuration in project flink by apache.
the class JobClientActorTest method testSubmissionTimeout.
/** Tests that a {@link JobClientActorSubmissionTimeoutException} is thrown when the job cannot
* be submitted by the JobSubmissionClientActor. This is here the case, because the started JobManager
* never replies to a {@link SubmitJob} message.
*
* @throws Exception
*/
@Test(expected = JobClientActorSubmissionTimeoutException.class)
public void testSubmissionTimeout() throws Exception {
FiniteDuration jobClientActorTimeout = new FiniteDuration(5, TimeUnit.SECONDS);
FiniteDuration timeout = jobClientActorTimeout.$times(2);
UUID leaderSessionID = UUID.randomUUID();
ActorRef jobManager = system.actorOf(Props.create(PlainActor.class, leaderSessionID));
TestingLeaderRetrievalService testingLeaderRetrievalService = new TestingLeaderRetrievalService(jobManager.path().toString(), leaderSessionID);
Props jobClientActorProps = JobSubmissionClientActor.createActorProps(testingLeaderRetrievalService, jobClientActorTimeout, false, clientConfig);
ActorRef jobClientActor = system.actorOf(jobClientActorProps);
Future<Object> jobExecutionResult = Patterns.ask(jobClientActor, new JobClientMessages.SubmitJobAndWait(testJobGraph), new Timeout(timeout));
Await.result(jobExecutionResult, timeout);
}
use of scala.concurrent.duration.FiniteDuration in project flink by apache.
the class CliFrontend method triggerSavepoint.
/**
* Sends a {@link org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint}
* message to the job manager.
*/
private int triggerSavepoint(SavepointOptions options, JobID jobId, String savepointDirectory) {
try {
ActorGateway jobManager = getJobManagerGateway(options);
logAndSysout("Triggering savepoint for job " + jobId + ".");
Future<Object> response = jobManager.ask(new TriggerSavepoint(jobId, Option.apply(savepointDirectory)), new FiniteDuration(1, TimeUnit.HOURS));
Object result;
try {
logAndSysout("Waiting for response...");
result = Await.result(response, FiniteDuration.Inf());
} catch (Exception e) {
throw new Exception("Triggering a savepoint for the job " + jobId + " failed.", e);
}
if (result instanceof TriggerSavepointSuccess) {
TriggerSavepointSuccess success = (TriggerSavepointSuccess) result;
logAndSysout("Savepoint completed. Path: " + success.savepointPath());
logAndSysout("You can resume your program from this savepoint with the run command.");
return 0;
} else if (result instanceof TriggerSavepointFailure) {
TriggerSavepointFailure failure = (TriggerSavepointFailure) result;
throw failure.cause();
} else {
throw new IllegalStateException("Unknown JobManager response of type " + result.getClass());
}
} catch (Throwable t) {
return handleError(t);
}
}
use of scala.concurrent.duration.FiniteDuration in project flink by apache.
the class FlinkClient method getTopologyJobId.
// Flink specific additional methods
/**
* Package internal method to get a Flink {@link JobID} from a Storm topology name.
*
* @param id
* The Storm topology name.
* @return Flink's internally used {@link JobID}.
*/
JobID getTopologyJobId(final String id) {
final Configuration configuration = GlobalConfiguration.loadConfiguration();
if (this.timeout != null) {
configuration.setString(ConfigConstants.AKKA_ASK_TIMEOUT, this.timeout);
}
try {
final ActorRef jobManager = this.getJobManager();
final FiniteDuration askTimeout = this.getTimeout();
final Future<Object> response = Patterns.ask(jobManager, JobManagerMessages.getRequestRunningJobsStatus(), new Timeout(askTimeout));
final Object result;
try {
result = Await.result(response, askTimeout);
} catch (final Exception e) {
throw new RuntimeException("Could not retrieve running jobs from the JobManager", e);
}
if (result instanceof RunningJobsStatus) {
final List<JobStatusMessage> jobs = ((RunningJobsStatus) result).getStatusMessages();
for (final JobStatusMessage status : jobs) {
if (status.getJobName().equals(id)) {
return status.getJobId();
}
}
} else {
throw new RuntimeException("ReqeustRunningJobs requires a response of type " + "RunningJobs. Instead the response is of type " + result.getClass() + ".");
}
} catch (final IOException e) {
throw new RuntimeException("Could not connect to Flink JobManager with address " + this.jobManagerHost + ":" + this.jobManagerPort, e);
}
return null;
}
Aggregations