use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.
the class JobSubmissionClientActor method tryToSubmitJob.
private void tryToSubmitJob() {
LOG.info("Sending message to JobManager {} to submit job {} ({}) and wait for progress", jobManager.path().toString(), jobGraph.getName(), jobGraph.getJobID());
Futures.future(new Callable<Object>() {
@Override
public Object call() throws Exception {
ActorGateway jobManagerGateway = new AkkaActorGateway(jobManager, leaderSessionID);
LOG.info("Upload jar files to job manager {}.", jobManager.path());
try {
jobGraph.uploadUserJars(jobManagerGateway, timeout, clientConfig);
} catch (IOException exception) {
getSelf().tell(decorateMessage(new JobManagerMessages.JobResultFailure(new SerializedThrowable(new JobSubmissionException(jobGraph.getJobID(), "Could not upload the jar files to the job manager.", exception)))), ActorRef.noSender());
}
LOG.info("Submit job to the job manager {}.", jobManager.path());
jobManager.tell(decorateMessage(new JobManagerMessages.SubmitJob(jobGraph, ListeningBehaviour.EXECUTION_RESULT_AND_STATE_CHANGES)), getSelf());
// issue a SubmissionTimeout message to check that we submit the job within
// the given timeout
getContext().system().scheduler().scheduleOnce(timeout, getSelf(), decorateMessage(JobClientMessages.getSubmissionTimeout()), getContext().dispatcher(), ActorRef.noSender());
return null;
}
}, getContext().dispatcher());
}
use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.
the class CliFrontend method triggerSavepoint.
/**
* Sends a {@link org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint}
* message to the job manager.
*/
private int triggerSavepoint(SavepointOptions options, JobID jobId, String savepointDirectory) {
try {
ActorGateway jobManager = getJobManagerGateway(options);
logAndSysout("Triggering savepoint for job " + jobId + ".");
Future<Object> response = jobManager.ask(new TriggerSavepoint(jobId, Option.apply(savepointDirectory)), new FiniteDuration(1, TimeUnit.HOURS));
Object result;
try {
logAndSysout("Waiting for response...");
result = Await.result(response, FiniteDuration.Inf());
} catch (Exception e) {
throw new Exception("Triggering a savepoint for the job " + jobId + " failed.", e);
}
if (result instanceof TriggerSavepointSuccess) {
TriggerSavepointSuccess success = (TriggerSavepointSuccess) result;
logAndSysout("Savepoint completed. Path: " + success.savepointPath());
logAndSysout("You can resume your program from this savepoint with the run command.");
return 0;
} else if (result instanceof TriggerSavepointFailure) {
TriggerSavepointFailure failure = (TriggerSavepointFailure) result;
throw failure.cause();
} else {
throw new IllegalStateException("Unknown JobManager response of type " + result.getClass());
}
} catch (Throwable t) {
return handleError(t);
}
}
use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.
the class CliFrontend method stop.
/**
* Executes the STOP action.
*
* @param args Command line arguments for the stop action.
*/
protected int stop(String[] args) {
LOG.info("Running 'stop' command.");
StopOptions options;
try {
options = CliFrontendParser.parseStopCommand(args);
} catch (CliArgsException e) {
return handleArgException(e);
} catch (Throwable t) {
return handleError(t);
}
// evaluate help flag
if (options.isPrintHelp()) {
CliFrontendParser.printHelpForStop();
return 0;
}
String[] stopArgs = options.getArgs();
JobID jobId;
if (stopArgs.length > 0) {
String jobIdString = stopArgs[0];
try {
jobId = new JobID(StringUtils.hexStringToByte(jobIdString));
} catch (Exception e) {
return handleError(e);
}
} else {
return handleArgException(new CliArgsException("Missing JobID"));
}
try {
ActorGateway jobManager = getJobManagerGateway(options);
Future<Object> response = jobManager.ask(new StopJob(jobId), clientTimeout);
final Object rc = Await.result(response, clientTimeout);
if (rc instanceof StoppingFailure) {
throw new Exception("Stopping the job with ID " + jobId + " failed.", ((StoppingFailure) rc).cause());
}
return 0;
} catch (Throwable t) {
return handleError(t);
}
}
use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.
the class ClusterClient method getAccumulators.
/**
* Requests and returns the accumulators for the given job identifier. Accumulators can be
* requested while a is running or after it has finished.
* @param jobID The job identifier of a job.
* @param loader The class loader for deserializing the accumulator results.
* @return A Map containing the accumulator's name and its value.
*/
public Map<String, Object> getAccumulators(JobID jobID, ClassLoader loader) throws Exception {
ActorGateway jobManagerGateway = getJobManagerGateway();
Future<Object> response;
try {
response = jobManagerGateway.ask(new RequestAccumulatorResults(jobID), timeout);
} catch (Exception e) {
throw new Exception("Failed to query the job manager gateway for accumulators.", e);
}
Object result = Await.result(response, timeout);
if (result instanceof AccumulatorResultsFound) {
Map<String, SerializedValue<Object>> serializedAccumulators = ((AccumulatorResultsFound) result).result();
return AccumulatorHelper.deserializeAccumulators(serializedAccumulators, loader);
} else if (result instanceof AccumulatorResultsErroneous) {
throw ((AccumulatorResultsErroneous) result).cause();
} else {
throw new Exception("Failed to fetch accumulators for the job " + jobID + ".");
}
}
use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.
the class ClusterClient method stop.
/**
* Stops a program on Flink cluster whose job-manager is configured in this client's configuration.
* Stopping works only for streaming programs. Be aware, that the program might continue to run for
* a while after sending the stop command, because after sources stopped to emit data all operators
* need to finish processing.
*
* @param jobId
* the job ID of the streaming program to stop
* @throws Exception
* If the job ID is invalid (ie, is unknown or refers to a batch job) or if sending the stop signal
* failed. That might be due to an I/O problem, ie, the job-manager is unreachable.
*/
public void stop(final JobID jobId) throws Exception {
final ActorGateway jobManagerGateway = getJobManagerGateway();
final Future<Object> response;
try {
response = jobManagerGateway.ask(new JobManagerMessages.StopJob(jobId), timeout);
} catch (final Exception e) {
throw new ProgramInvocationException("Failed to query the job manager gateway.", e);
}
final Object result = Await.result(response, timeout);
if (result instanceof JobManagerMessages.StoppingSuccess) {
LOG.info("Job stopping with ID " + jobId + " succeeded.");
} else if (result instanceof JobManagerMessages.StoppingFailure) {
final Throwable t = ((JobManagerMessages.StoppingFailure) result).cause();
LOG.info("Job stopping with ID " + jobId + " failed.", t);
throw new Exception("Failed to stop the job because of \n" + t.getMessage());
} else {
throw new Exception("Unknown message received while stopping: " + result.getClass().getName());
}
}
Aggregations