use of org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService in project flink by apache.
the class MiniCluster method waitUntilTaskManagerRegistrationsComplete.
public void waitUntilTaskManagerRegistrationsComplete() throws Exception {
LeaderRetrievalService rmMasterListener = null;
Future<LeaderAddressAndId> addressAndIdFuture;
try {
synchronized (lock) {
checkState(running, "FlinkMiniCluster is not running");
OneTimeLeaderListenerFuture listenerFuture = new OneTimeLeaderListenerFuture();
rmMasterListener = haServices.getResourceManagerLeaderRetriever();
rmMasterListener.start(listenerFuture);
addressAndIdFuture = listenerFuture.future();
}
final LeaderAddressAndId addressAndId = addressAndIdFuture.get();
final ResourceManagerGateway resourceManager = commonRpcService.connect(addressAndId.leaderAddress(), ResourceManagerGateway.class).get();
final int numTaskManagersToWaitFor = taskManagerRunners.length;
// poll and wait until enough TaskManagers are available
while (true) {
int numTaskManagersAvailable = resourceManager.getNumberOfRegisteredTaskManagers(addressAndId.leaderId()).get();
if (numTaskManagersAvailable >= numTaskManagersToWaitFor) {
break;
}
Thread.sleep(2);
}
} finally {
try {
if (rmMasterListener != null) {
rmMasterListener.stop();
}
} catch (Exception e) {
LOG.warn("Error shutting down leader listener for ResourceManager");
}
}
}
use of org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService in project flink by apache.
the class ClusterClient method retrieveJob.
/**
* Reattaches to a running from from the supplied job id
* @param jobID The job id of the job to attach to
* @return The JobExecutionResult for the jobID
* @throws JobExecutionException if an error occurs during monitoring the job execution
*/
public JobExecutionResult retrieveJob(JobID jobID) throws JobExecutionException {
final LeaderRetrievalService leaderRetrievalService;
try {
leaderRetrievalService = LeaderRetrievalUtils.createLeaderRetrievalService(flinkConfig);
} catch (Exception e) {
throw new JobRetrievalException(jobID, "Could not create the leader retrieval service", e);
}
ActorGateway jobManagerGateway;
try {
jobManagerGateway = getJobManagerGateway();
} catch (Exception e) {
throw new JobRetrievalException(jobID, "Could not retrieve the JobManager Gateway");
}
final JobListeningContext listeningContext = JobClient.attachToRunningJob(jobID, jobManagerGateway, flinkConfig, actorSystemLoader.get(), leaderRetrievalService, timeout, printStatusDuringExecution);
return JobClient.awaitJobResult(listeningContext);
}
use of org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService in project flink by apache.
the class ClusterClient method run.
/**
* Submits a JobGraph blocking.
* @param jobGraph The JobGraph
* @param classLoader User code class loader to deserialize the results and errors (may contain custom classes).
* @return JobExecutionResult
* @throws ProgramInvocationException
*/
public JobExecutionResult run(JobGraph jobGraph, ClassLoader classLoader) throws ProgramInvocationException {
waitForClusterToBeReady();
final LeaderRetrievalService leaderRetrievalService;
try {
leaderRetrievalService = LeaderRetrievalUtils.createLeaderRetrievalService(flinkConfig, true);
} catch (Exception e) {
throw new ProgramInvocationException("Could not create the leader retrieval service", e);
}
try {
logAndSysout("Submitting job with JobID: " + jobGraph.getJobID() + ". Waiting for job completion.");
this.lastJobExecutionResult = JobClient.submitJobAndWait(actorSystemLoader.get(), flinkConfig, leaderRetrievalService, jobGraph, timeout, printStatusDuringExecution, classLoader);
return this.lastJobExecutionResult;
} catch (JobExecutionException e) {
throw new ProgramInvocationException("The program execution failed: " + e.getMessage(), e);
}
}
use of org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService in project flink by apache.
the class WebRuntimeMonitorITCase method testRedirectToLeader.
/**
* Tests that the monitor associated with the following job manager redirects to the leader.
*/
@Test
public void testRedirectToLeader() throws Exception {
final Deadline deadline = TestTimeout.fromNow();
ActorSystem[] jobManagerSystem = new ActorSystem[2];
WebRuntimeMonitor[] webMonitor = new WebRuntimeMonitor[2];
List<LeaderRetrievalService> leaderRetrievalServices = new ArrayList<>();
try (TestingServer zooKeeper = new TestingServer()) {
final Configuration config = ZooKeeperTestUtils.createZooKeeperHAConfig(zooKeeper.getConnectString(), temporaryFolder.getRoot().getPath());
File logDir = temporaryFolder.newFolder();
Path logFile = Files.createFile(new File(logDir, "jobmanager.log").toPath());
Files.createFile(new File(logDir, "jobmanager.out").toPath());
config.setInteger(ConfigConstants.JOB_MANAGER_WEB_PORT_KEY, 0);
config.setString(ConfigConstants.JOB_MANAGER_WEB_LOG_PATH_KEY, logFile.toString());
for (int i = 0; i < jobManagerSystem.length; i++) {
jobManagerSystem[i] = AkkaUtils.createActorSystem(new Configuration(), new Some<>(new Tuple2<String, Object>("localhost", 0)));
}
for (int i = 0; i < webMonitor.length; i++) {
LeaderRetrievalService lrs = ZooKeeperUtils.createLeaderRetrievalService(config);
leaderRetrievalServices.add(lrs);
webMonitor[i] = new WebRuntimeMonitor(config, lrs, jobManagerSystem[i]);
}
ActorRef[] jobManager = new ActorRef[2];
String[] jobManagerAddress = new String[2];
for (int i = 0; i < jobManager.length; i++) {
Configuration jmConfig = config.clone();
jmConfig.setInteger(ConfigConstants.JOB_MANAGER_WEB_PORT_KEY, webMonitor[i].getServerPort());
jobManager[i] = JobManager.startJobManagerActors(jmConfig, jobManagerSystem[i], TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), JobManager.class, MemoryArchivist.class)._1();
jobManagerAddress[i] = AkkaUtils.getAkkaURL(jobManagerSystem[i], jobManager[i]);
webMonitor[i].start(jobManagerAddress[i]);
}
LeaderRetrievalService lrs = ZooKeeperUtils.createLeaderRetrievalService(config);
leaderRetrievalServices.add(lrs);
TestingListener leaderListener = new TestingListener();
lrs.start(leaderListener);
leaderListener.waitForNewLeader(deadline.timeLeft().toMillis());
String leaderAddress = leaderListener.getAddress();
int leaderIndex = leaderAddress.equals(jobManagerAddress[0]) ? 0 : 1;
int followerIndex = (leaderIndex + 1) % 2;
ActorSystem leadingSystem = jobManagerSystem[leaderIndex];
ActorSystem followerSystem = jobManagerSystem[followerIndex];
WebMonitor leadingWebMonitor = webMonitor[leaderIndex];
WebMonitor followerWebMonitor = webMonitor[followerIndex];
// For test stability reason we have to wait until we are sure that both leader
// listeners have been notified.
JobManagerRetriever leadingRetriever = Whitebox.getInternalState(leadingWebMonitor, "retriever");
JobManagerRetriever followerRetriever = Whitebox.getInternalState(followerWebMonitor, "retriever");
// Wait for the initial notifications
waitForLeaderNotification(leadingSystem, jobManager[leaderIndex], leadingRetriever, deadline);
waitForLeaderNotification(leadingSystem, jobManager[leaderIndex], followerRetriever, deadline);
try (HttpTestClient leaderClient = new HttpTestClient("localhost", leadingWebMonitor.getServerPort());
HttpTestClient followingClient = new HttpTestClient("localhost", followerWebMonitor.getServerPort())) {
String expected = new Scanner(new File(MAIN_RESOURCES_PATH + "/index.html")).useDelimiter("\\A").next();
// Request the file from the leading web server
leaderClient.sendGetRequest("index.html", deadline.timeLeft());
HttpTestClient.SimpleHttpResponse response = leaderClient.getNextResponse(deadline.timeLeft());
assertEquals(HttpResponseStatus.OK, response.getStatus());
assertEquals(response.getType(), MimeTypes.getMimeTypeForExtension("html"));
assertEquals(expected, response.getContent());
// Request the file from the following web server
followingClient.sendGetRequest("index.html", deadline.timeLeft());
response = followingClient.getNextResponse(deadline.timeLeft());
assertEquals(HttpResponseStatus.TEMPORARY_REDIRECT, response.getStatus());
assertTrue(response.getLocation().contains(String.valueOf(leadingWebMonitor.getServerPort())));
// Kill the leader
leadingSystem.shutdown();
// Wait for the notification of the follower
waitForLeaderNotification(followerSystem, jobManager[followerIndex], followerRetriever, deadline);
// Same request to the new leader
followingClient.sendGetRequest("index.html", deadline.timeLeft());
response = followingClient.getNextResponse(deadline.timeLeft());
assertEquals(HttpResponseStatus.OK, response.getStatus());
assertEquals(response.getType(), MimeTypes.getMimeTypeForExtension("html"));
assertEquals(expected, response.getContent());
// Simple overview request
followingClient.sendGetRequest("/overview", deadline.timeLeft());
response = followingClient.getNextResponse(deadline.timeLeft());
assertEquals(HttpResponseStatus.OK, response.getStatus());
assertEquals(response.getType(), MimeTypes.getMimeTypeForExtension("json"));
assertTrue(response.getContent().contains("\"taskmanagers\":1") || response.getContent().contains("\"taskmanagers\":0"));
}
} finally {
for (ActorSystem system : jobManagerSystem) {
if (system != null) {
system.shutdown();
}
}
for (WebMonitor monitor : webMonitor) {
monitor.stop();
}
for (LeaderRetrievalService lrs : leaderRetrievalServices) {
lrs.stop();
}
}
}
use of org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService in project flink by apache.
the class WebRuntimeMonitorITCase method testLeaderNotAvailable.
@Test
public void testLeaderNotAvailable() throws Exception {
final Deadline deadline = TestTimeout.fromNow();
ActorSystem actorSystem = null;
WebRuntimeMonitor webRuntimeMonitor = null;
try (TestingServer zooKeeper = new TestingServer()) {
File logDir = temporaryFolder.newFolder();
Path logFile = Files.createFile(new File(logDir, "jobmanager.log").toPath());
Files.createFile(new File(logDir, "jobmanager.out").toPath());
final Configuration config = new Configuration();
config.setInteger(ConfigConstants.JOB_MANAGER_WEB_PORT_KEY, 0);
config.setString(ConfigConstants.JOB_MANAGER_WEB_LOG_PATH_KEY, logFile.toString());
config.setString(HighAvailabilityOptions.HA_MODE, "ZOOKEEPER");
config.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, zooKeeper.getConnectString());
actorSystem = AkkaUtils.createDefaultActorSystem();
LeaderRetrievalService leaderRetrievalService = mock(LeaderRetrievalService.class);
webRuntimeMonitor = new WebRuntimeMonitor(config, leaderRetrievalService, actorSystem);
webRuntimeMonitor.start("akka://schmakka");
try (HttpTestClient client = new HttpTestClient("localhost", webRuntimeMonitor.getServerPort())) {
client.sendGetRequest("index.html", deadline.timeLeft());
HttpTestClient.SimpleHttpResponse response = client.getNextResponse();
assertEquals(HttpResponseStatus.SERVICE_UNAVAILABLE, response.getStatus());
assertEquals(MimeTypes.getMimeTypeForExtension("txt"), response.getType());
assertTrue(response.getContent().contains("refresh"));
}
} finally {
if (actorSystem != null) {
actorSystem.shutdown();
}
if (webRuntimeMonitor != null) {
webRuntimeMonitor.stop();
}
}
}
Aggregations