use of org.apache.flink.runtime.messages.JobManagerMessages.JobStatusResponse in project flink by apache.
the class JobManagerHAJobGraphRecoveryITCase method testSubmitJobToNonLeader.
/**
* Tests that submissions to non-leaders are handled.
*/
@Test
public void testSubmitJobToNonLeader() throws Exception {
Configuration config = ZooKeeperTestUtils.createZooKeeperHAConfig(ZooKeeper.getConnectString(), FileStateBackendBasePath.getPath());
// Configure the cluster
config.setInteger(ConfigConstants.LOCAL_NUMBER_JOB_MANAGER, 2);
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
TestingCluster flink = new TestingCluster(config, false, false);
try {
final Deadline deadline = TestTimeOut.fromNow();
// Start the JobManager and TaskManager
flink.start(true);
JobGraph jobGraph = createBlockingJobGraph();
List<ActorRef> bothJobManagers = flink.getJobManagersAsJava();
ActorGateway leadingJobManager = flink.getLeaderGateway(deadline.timeLeft());
ActorGateway nonLeadingJobManager;
if (bothJobManagers.get(0).equals(leadingJobManager.actor())) {
nonLeadingJobManager = new AkkaActorGateway(bothJobManagers.get(1), null);
} else {
nonLeadingJobManager = new AkkaActorGateway(bothJobManagers.get(0), null);
}
log.info("Leading job manager: " + leadingJobManager);
log.info("Non-leading job manager: " + nonLeadingJobManager);
// Submit the job
nonLeadingJobManager.tell(new SubmitJob(jobGraph, ListeningBehaviour.DETACHED));
log.info("Submitted job graph to " + nonLeadingJobManager);
// Wait for the job to start. We are asking the *leading** JM here although we've
// submitted the job to the non-leading JM. This is the behaviour under test.
JobManagerActorTestUtils.waitForJobStatus(jobGraph.getJobID(), JobStatus.RUNNING, leadingJobManager, deadline.timeLeft());
log.info("Wait that the non-leader removes the submitted job.");
// Make sure that the **non-leading** JM has actually removed the job graph from its
// local state.
boolean success = false;
while (!success && deadline.hasTimeLeft()) {
JobStatusResponse jobStatusResponse = JobManagerActorTestUtils.requestJobStatus(jobGraph.getJobID(), nonLeadingJobManager, deadline.timeLeft());
if (jobStatusResponse instanceof JobManagerMessages.JobNotFound) {
success = true;
} else {
log.info(((JobManagerMessages.CurrentJobStatus) jobStatusResponse).status().toString());
Thread.sleep(100);
}
}
if (!success) {
fail("Non-leading JM was still holding reference to the job graph.");
}
Future<Object> jobRemoved = leadingJobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobGraph.getJobID()), deadline.timeLeft());
leadingJobManager.tell(new JobManagerMessages.CancelJob(jobGraph.getJobID()));
Await.ready(jobRemoved, deadline.timeLeft());
} finally {
flink.shutdown();
}
// Verify that everything is clean
verifyCleanRecoveryState(config);
}
use of org.apache.flink.runtime.messages.JobManagerMessages.JobStatusResponse in project flink by apache.
the class JobManagerActorTestUtils method requestJobStatus.
/**
* Request a {@link JobStatusResponse}.
*
* @param jobId Job ID of the job to request the status of
* @param jobManager Job manager actor to ask
* @param timeout Timeout after which the operation fails
* @return The {@link JobStatusResponse} from the job manager
* @throws Exception If there is no answer within the timeout.
*/
public static JobStatusResponse requestJobStatus(JobID jobId, ActorGateway jobManager, FiniteDuration timeout) throws Exception {
checkNotNull(jobId, "Job ID");
checkNotNull(jobManager, "Job manager");
checkNotNull(timeout, "Timeout");
// Ask the JobManager
RequestJobStatus request = (RequestJobStatus) getRequestJobStatus(jobId);
Future<Object> ask = jobManager.ask(request, timeout);
Object response = Await.result(ask, timeout);
if (response instanceof JobStatusResponse) {
return (JobStatusResponse) response;
}
throw new IllegalStateException("Unexpected response.");
}
use of org.apache.flink.runtime.messages.JobManagerMessages.JobStatusResponse in project flink by apache.
the class JobManagerActorTestUtils method waitForJobStatus.
/**
* Waits for the expected {@link JobStatus}.
*
* <p>Repeatedly queries the JobManager via {@link RequestJobStatus} messages.
*
* @param jobId Job ID of the job to wait for
* @param expectedJobStatus Expected job status
* @param jobManager Job manager actor to ask
* @param timeout Timeout after which the operation fails
* @throws Exception If the job is not found within the timeout or the job is in another state.
*/
public static void waitForJobStatus(JobID jobId, JobStatus expectedJobStatus, ActorGateway jobManager, FiniteDuration timeout) throws Exception {
checkNotNull(jobId, "Job ID");
checkNotNull(expectedJobStatus, "Expected job status");
checkNotNull(jobManager, "Job manager");
checkNotNull(timeout, "Timeout");
final Deadline deadline = timeout.fromNow();
while (deadline.hasTimeLeft()) {
// Request the job status
JobStatusResponse response = requestJobStatus(jobId, jobManager, deadline.timeLeft());
// Found the job
if (response instanceof CurrentJobStatus) {
JobStatus jobStatus = ((CurrentJobStatus) response).status();
// OK, that's what we were waiting for
if (jobStatus == expectedJobStatus) {
return;
} else if (jobStatus.isGloballyTerminalState()) {
throw new IllegalStateException("Job is in terminal state " + jobStatus + ", " + "but was waiting for " + expectedJobStatus + ".");
}
} else // Did not find the job... retry
if (response instanceof JobNotFound) {
Thread.sleep(Math.min(100, deadline.timeLeft().toMillis()));
} else {
throw new IllegalStateException("Unexpected response.");
}
}
throw new IllegalStateException("Job not found within deadline.");
}
Aggregations