use of akka.util.Timeout in project flink by apache.
the class YarnClusterClient method getNewMessages.
@Override
public List<String> getNewMessages() {
if (hasBeenShutdown()) {
throw new RuntimeException("The YarnClusterClient has already been stopped");
}
if (!isConnected) {
throw new IllegalStateException("The cluster has been connected to the ApplicationMaster.");
}
List<String> ret = new ArrayList<String>();
// get messages from ApplicationClient (locally)
while (true) {
Object result;
try {
Future<Object> response = Patterns.ask(applicationClient.get(), YarnMessages.getLocalGetYarnMessage(), new Timeout(akkaDuration));
result = Await.result(response, akkaDuration);
} catch (Exception ioe) {
LOG.warn("Error retrieving the YARN messages locally", ioe);
break;
}
if (!(result instanceof Option)) {
throw new RuntimeException("LocalGetYarnMessage requires a response of type " + "Option. Instead the response is of type " + result.getClass() + ".");
} else {
Option messageOption = (Option) result;
LOG.debug("Received message option {}", messageOption);
if (messageOption.isEmpty()) {
break;
} else {
Object obj = messageOption.get();
if (obj instanceof InfoMessage) {
InfoMessage msg = (InfoMessage) obj;
ret.add("[" + msg.date() + "] " + msg.message());
} else {
LOG.warn("LocalGetYarnMessage returned unexpected type: " + messageOption);
}
}
}
}
return ret;
}
use of akka.util.Timeout in project flink by apache.
the class YarnClusterClient method shutdownCluster.
/**
* Shuts down the Yarn application
*/
public void shutdownCluster() {
if (hasBeenShutDown.getAndSet(true)) {
return;
}
if (!isConnected) {
throw new IllegalStateException("The cluster has been not been connected to the ApplicationMaster.");
}
try {
Runtime.getRuntime().removeShutdownHook(clientShutdownHook);
} catch (IllegalStateException e) {
// we are already in the shutdown hook
}
LOG.info("Sending shutdown request to the Application Master");
try {
Future<Object> response = Patterns.ask(applicationClient.get(), new YarnMessages.LocalStopYarnSession(getApplicationStatus(), "Flink YARN Client requested shutdown"), new Timeout(akkaDuration));
Await.ready(response, akkaDuration);
} catch (Exception e) {
LOG.warn("Error while stopping YARN cluster.", e);
}
try {
File propertiesFile = FlinkYarnSessionCli.getYarnPropertiesLocation(flinkConfig);
if (propertiesFile.isFile()) {
if (propertiesFile.delete()) {
LOG.info("Deleted Yarn properties file at {}", propertiesFile.getAbsoluteFile().toString());
} else {
LOG.warn("Couldn't delete Yarn properties file at {}", propertiesFile.getAbsoluteFile().toString());
}
}
} catch (Exception e) {
LOG.warn("Exception while deleting the JobManager address file", e);
}
if (sessionFilesDir != null) {
LOG.info("Deleting files in " + sessionFilesDir);
try {
FileSystem shutFS = FileSystem.get(hadoopConfig);
// delete conf and jar file.
shutFS.delete(sessionFilesDir, true);
shutFS.close();
} catch (IOException e) {
LOG.error("Could not delete the Flink jar and configuration files in HDFS..", e);
}
} else {
LOG.warn("Session file directory not set. Not deleting session files");
}
try {
pollingRunner.stopRunner();
pollingRunner.join(1000);
} catch (InterruptedException e) {
LOG.warn("Shutdown of the polling runner was interrupted", e);
Thread.currentThread().interrupt();
}
try {
ApplicationReport appReport = yarnClient.getApplicationReport(appId);
LOG.info("Application " + appId + " finished with state " + appReport.getYarnApplicationState() + " and final state " + appReport.getFinalApplicationStatus() + " at " + appReport.getFinishTime());
if (appReport.getYarnApplicationState() == YarnApplicationState.FAILED || appReport.getYarnApplicationState() == YarnApplicationState.KILLED) {
LOG.warn("Application failed. Diagnostics " + appReport.getDiagnostics());
LOG.warn("If log aggregation is activated in the Hadoop cluster, we recommend to retrieve " + "the full application log using this command:" + System.lineSeparator() + "\tyarn logs -applicationId " + appReport.getApplicationId() + System.lineSeparator() + "(It sometimes takes a few seconds until the logs are aggregated)");
}
} catch (Exception e) {
LOG.warn("Couldn't get final report", e);
}
LOG.info("YARN Client is shutting down");
// actorRunner is using the yarnClient.
yarnClient.stop();
// set null to clearly see if somebody wants to access it afterwards.
yarnClient = null;
}
use of akka.util.Timeout in project flink by apache.
the class JobClientActorTest method testConnectionTimeoutAfterJobSubmission.
/** Tests that a {@link org.apache.flink.runtime.client.JobClientActorConnectionTimeoutException}
* is thrown after a successful job submission if the JobManager dies.
*
* @throws Exception
*/
@Test(expected = JobClientActorConnectionTimeoutException.class)
public void testConnectionTimeoutAfterJobSubmission() throws Exception {
FiniteDuration jobClientActorTimeout = new FiniteDuration(5, TimeUnit.SECONDS);
FiniteDuration timeout = jobClientActorTimeout.$times(2);
UUID leaderSessionID = UUID.randomUUID();
ActorRef jobManager = system.actorOf(Props.create(JobAcceptingActor.class, leaderSessionID));
TestingLeaderRetrievalService testingLeaderRetrievalService = new TestingLeaderRetrievalService(jobManager.path().toString(), leaderSessionID);
Props jobClientActorProps = JobSubmissionClientActor.createActorProps(testingLeaderRetrievalService, jobClientActorTimeout, false, clientConfig);
ActorRef jobClientActor = system.actorOf(jobClientActorProps);
Future<Object> jobExecutionResult = Patterns.ask(jobClientActor, new JobClientMessages.SubmitJobAndWait(testJobGraph), new Timeout(timeout));
Future<Object> waitFuture = Patterns.ask(jobManager, new RegisterTest(), new Timeout(timeout));
Await.result(waitFuture, timeout);
jobManager.tell(PoisonPill.getInstance(), ActorRef.noSender());
Await.result(jobExecutionResult, timeout);
}
use of akka.util.Timeout in project flink by apache.
the class JobClientActorTest method testSubmissionTimeout.
/** Tests that a {@link JobClientActorSubmissionTimeoutException} is thrown when the job cannot
* be submitted by the JobSubmissionClientActor. This is here the case, because the started JobManager
* never replies to a {@link SubmitJob} message.
*
* @throws Exception
*/
@Test(expected = JobClientActorSubmissionTimeoutException.class)
public void testSubmissionTimeout() throws Exception {
FiniteDuration jobClientActorTimeout = new FiniteDuration(5, TimeUnit.SECONDS);
FiniteDuration timeout = jobClientActorTimeout.$times(2);
UUID leaderSessionID = UUID.randomUUID();
ActorRef jobManager = system.actorOf(Props.create(PlainActor.class, leaderSessionID));
TestingLeaderRetrievalService testingLeaderRetrievalService = new TestingLeaderRetrievalService(jobManager.path().toString(), leaderSessionID);
Props jobClientActorProps = JobSubmissionClientActor.createActorProps(testingLeaderRetrievalService, jobClientActorTimeout, false, clientConfig);
ActorRef jobClientActor = system.actorOf(jobClientActorProps);
Future<Object> jobExecutionResult = Patterns.ask(jobClientActor, new JobClientMessages.SubmitJobAndWait(testJobGraph), new Timeout(timeout));
Await.result(jobExecutionResult, timeout);
}
use of akka.util.Timeout in project flink by apache.
the class FlinkClient method getTopologyJobId.
// Flink specific additional methods
/**
* Package internal method to get a Flink {@link JobID} from a Storm topology name.
*
* @param id
* The Storm topology name.
* @return Flink's internally used {@link JobID}.
*/
JobID getTopologyJobId(final String id) {
final Configuration configuration = GlobalConfiguration.loadConfiguration();
if (this.timeout != null) {
configuration.setString(ConfigConstants.AKKA_ASK_TIMEOUT, this.timeout);
}
try {
final ActorRef jobManager = this.getJobManager();
final FiniteDuration askTimeout = this.getTimeout();
final Future<Object> response = Patterns.ask(jobManager, JobManagerMessages.getRequestRunningJobsStatus(), new Timeout(askTimeout));
final Object result;
try {
result = Await.result(response, askTimeout);
} catch (final Exception e) {
throw new RuntimeException("Could not retrieve running jobs from the JobManager", e);
}
if (result instanceof RunningJobsStatus) {
final List<JobStatusMessage> jobs = ((RunningJobsStatus) result).getStatusMessages();
for (final JobStatusMessage status : jobs) {
if (status.getJobName().equals(id)) {
return status.getJobId();
}
}
} else {
throw new RuntimeException("ReqeustRunningJobs requires a response of type " + "RunningJobs. Instead the response is of type " + result.getClass() + ".");
}
} catch (final IOException e) {
throw new RuntimeException("Could not connect to Flink JobManager with address " + this.jobManagerHost + ":" + this.jobManagerPort, e);
}
return null;
}
Aggregations