Search in sources :

Example 51 with ActorGateway

use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.

the class JobSubmissionClientActor method tryToSubmitJob.

private void tryToSubmitJob() {
    LOG.info("Sending message to JobManager {} to submit job {} ({}) and wait for progress", jobManager.path().toString(), jobGraph.getName(), jobGraph.getJobID());
    Futures.future(new Callable<Object>() {

        @Override
        public Object call() throws Exception {
            ActorGateway jobManagerGateway = new AkkaActorGateway(jobManager, leaderSessionID);
            LOG.info("Upload jar files to job manager {}.", jobManager.path());
            try {
                jobGraph.uploadUserJars(jobManagerGateway, timeout, clientConfig);
            } catch (IOException exception) {
                getSelf().tell(decorateMessage(new JobManagerMessages.JobResultFailure(new SerializedThrowable(new JobSubmissionException(jobGraph.getJobID(), "Could not upload the jar files to the job manager.", exception)))), ActorRef.noSender());
            }
            LOG.info("Submit job to the job manager {}.", jobManager.path());
            jobManager.tell(decorateMessage(new JobManagerMessages.SubmitJob(jobGraph, ListeningBehaviour.EXECUTION_RESULT_AND_STATE_CHANGES)), getSelf());
            // issue a SubmissionTimeout message to check that we submit the job within
            // the given timeout
            getContext().system().scheduler().scheduleOnce(timeout, getSelf(), decorateMessage(JobClientMessages.getSubmissionTimeout()), getContext().dispatcher(), ActorRef.noSender());
            return null;
        }
    }, getContext().dispatcher());
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) IOException(java.io.IOException) IOException(java.io.IOException) SerializedThrowable(org.apache.flink.runtime.util.SerializedThrowable)

Example 52 with ActorGateway

use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.

the class CliFrontend method triggerSavepoint.

/**
	 * Sends a {@link org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint}
	 * message to the job manager.
	 */
private int triggerSavepoint(SavepointOptions options, JobID jobId, String savepointDirectory) {
    try {
        ActorGateway jobManager = getJobManagerGateway(options);
        logAndSysout("Triggering savepoint for job " + jobId + ".");
        Future<Object> response = jobManager.ask(new TriggerSavepoint(jobId, Option.apply(savepointDirectory)), new FiniteDuration(1, TimeUnit.HOURS));
        Object result;
        try {
            logAndSysout("Waiting for response...");
            result = Await.result(response, FiniteDuration.Inf());
        } catch (Exception e) {
            throw new Exception("Triggering a savepoint for the job " + jobId + " failed.", e);
        }
        if (result instanceof TriggerSavepointSuccess) {
            TriggerSavepointSuccess success = (TriggerSavepointSuccess) result;
            logAndSysout("Savepoint completed. Path: " + success.savepointPath());
            logAndSysout("You can resume your program from this savepoint with the run command.");
            return 0;
        } else if (result instanceof TriggerSavepointFailure) {
            TriggerSavepointFailure failure = (TriggerSavepointFailure) result;
            throw failure.cause();
        } else {
            throw new IllegalStateException("Unknown JobManager response of type " + result.getClass());
        }
    } catch (Throwable t) {
        return handleError(t);
    }
}
Also used : TriggerSavepointFailure(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepointFailure) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) TriggerSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint) FiniteDuration(scala.concurrent.duration.FiniteDuration) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) ProgramMissingJobException(org.apache.flink.client.program.ProgramMissingJobException) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) ProgramParametrizationException(org.apache.flink.client.program.ProgramParametrizationException) FileNotFoundException(java.io.FileNotFoundException) InvocationTargetException(java.lang.reflect.InvocationTargetException) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) CliArgsException(org.apache.flink.client.cli.CliArgsException) IOException(java.io.IOException) TriggerSavepointSuccess(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepointSuccess)

Example 53 with ActorGateway

use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.

the class CliFrontend method stop.

/**
	 * Executes the STOP action.
	 * 
	 * @param args Command line arguments for the stop action.
	 */
protected int stop(String[] args) {
    LOG.info("Running 'stop' command.");
    StopOptions options;
    try {
        options = CliFrontendParser.parseStopCommand(args);
    } catch (CliArgsException e) {
        return handleArgException(e);
    } catch (Throwable t) {
        return handleError(t);
    }
    // evaluate help flag
    if (options.isPrintHelp()) {
        CliFrontendParser.printHelpForStop();
        return 0;
    }
    String[] stopArgs = options.getArgs();
    JobID jobId;
    if (stopArgs.length > 0) {
        String jobIdString = stopArgs[0];
        try {
            jobId = new JobID(StringUtils.hexStringToByte(jobIdString));
        } catch (Exception e) {
            return handleError(e);
        }
    } else {
        return handleArgException(new CliArgsException("Missing JobID"));
    }
    try {
        ActorGateway jobManager = getJobManagerGateway(options);
        Future<Object> response = jobManager.ask(new StopJob(jobId), clientTimeout);
        final Object rc = Await.result(response, clientTimeout);
        if (rc instanceof StoppingFailure) {
            throw new Exception("Stopping the job with ID " + jobId + " failed.", ((StoppingFailure) rc).cause());
        }
        return 0;
    } catch (Throwable t) {
        return handleError(t);
    }
}
Also used : StopOptions(org.apache.flink.client.cli.StopOptions) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) StoppingFailure(org.apache.flink.runtime.messages.JobManagerMessages.StoppingFailure) CliArgsException(org.apache.flink.client.cli.CliArgsException) StopJob(org.apache.flink.runtime.messages.JobManagerMessages.StopJob) JobID(org.apache.flink.api.common.JobID) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) ProgramMissingJobException(org.apache.flink.client.program.ProgramMissingJobException) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) ProgramParametrizationException(org.apache.flink.client.program.ProgramParametrizationException) FileNotFoundException(java.io.FileNotFoundException) InvocationTargetException(java.lang.reflect.InvocationTargetException) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) CliArgsException(org.apache.flink.client.cli.CliArgsException) IOException(java.io.IOException)

Example 54 with ActorGateway

use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.

the class ClusterClient method getAccumulators.

/**
	 * Requests and returns the accumulators for the given job identifier. Accumulators can be
	 * requested while a is running or after it has finished.
	 * @param jobID The job identifier of a job.
	 * @param loader The class loader for deserializing the accumulator results.
	 * @return A Map containing the accumulator's name and its value.
	 */
public Map<String, Object> getAccumulators(JobID jobID, ClassLoader loader) throws Exception {
    ActorGateway jobManagerGateway = getJobManagerGateway();
    Future<Object> response;
    try {
        response = jobManagerGateway.ask(new RequestAccumulatorResults(jobID), timeout);
    } catch (Exception e) {
        throw new Exception("Failed to query the job manager gateway for accumulators.", e);
    }
    Object result = Await.result(response, timeout);
    if (result instanceof AccumulatorResultsFound) {
        Map<String, SerializedValue<Object>> serializedAccumulators = ((AccumulatorResultsFound) result).result();
        return AccumulatorHelper.deserializeAccumulators(serializedAccumulators, loader);
    } else if (result instanceof AccumulatorResultsErroneous) {
        throw ((AccumulatorResultsErroneous) result).cause();
    } else {
        throw new Exception("Failed to fetch accumulators for the job " + jobID + ".");
    }
}
Also used : RequestAccumulatorResults(org.apache.flink.runtime.messages.accumulators.RequestAccumulatorResults) AccumulatorResultsFound(org.apache.flink.runtime.messages.accumulators.AccumulatorResultsFound) AccumulatorResultsErroneous(org.apache.flink.runtime.messages.accumulators.AccumulatorResultsErroneous) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) SerializedValue(org.apache.flink.util.SerializedValue) JobRetrievalException(org.apache.flink.runtime.client.JobRetrievalException) URISyntaxException(java.net.URISyntaxException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) CompilerException(org.apache.flink.optimizer.CompilerException)

Example 55 with ActorGateway

use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.

the class ClusterClient method stop.

/**
	 * Stops a program on Flink cluster whose job-manager is configured in this client's configuration.
	 * Stopping works only for streaming programs. Be aware, that the program might continue to run for
	 * a while after sending the stop command, because after sources stopped to emit data all operators
	 * need to finish processing.
	 * 
	 * @param jobId
	 *            the job ID of the streaming program to stop
	 * @throws Exception
	 *             If the job ID is invalid (ie, is unknown or refers to a batch job) or if sending the stop signal
	 *             failed. That might be due to an I/O problem, ie, the job-manager is unreachable.
	 */
public void stop(final JobID jobId) throws Exception {
    final ActorGateway jobManagerGateway = getJobManagerGateway();
    final Future<Object> response;
    try {
        response = jobManagerGateway.ask(new JobManagerMessages.StopJob(jobId), timeout);
    } catch (final Exception e) {
        throw new ProgramInvocationException("Failed to query the job manager gateway.", e);
    }
    final Object result = Await.result(response, timeout);
    if (result instanceof JobManagerMessages.StoppingSuccess) {
        LOG.info("Job stopping with ID " + jobId + " succeeded.");
    } else if (result instanceof JobManagerMessages.StoppingFailure) {
        final Throwable t = ((JobManagerMessages.StoppingFailure) result).cause();
        LOG.info("Job stopping with ID " + jobId + " failed.", t);
        throw new Exception("Failed to stop the job because of \n" + t.getMessage());
    } else {
        throw new Exception("Unknown message received while stopping: " + result.getClass().getName());
    }
}
Also used : ActorGateway(org.apache.flink.runtime.instance.ActorGateway) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) JobRetrievalException(org.apache.flink.runtime.client.JobRetrievalException) URISyntaxException(java.net.URISyntaxException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) CompilerException(org.apache.flink.optimizer.CompilerException)

Aggregations

ActorGateway (org.apache.flink.runtime.instance.ActorGateway)102 Test (org.junit.Test)81 Configuration (org.apache.flink.configuration.Configuration)44 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)41 FiniteDuration (scala.concurrent.duration.FiniteDuration)37 JobID (org.apache.flink.api.common.JobID)36 JavaTestKit (akka.testkit.JavaTestKit)34 ActorRef (akka.actor.ActorRef)30 IOException (java.io.IOException)26 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)25 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)22 JobManagerMessages (org.apache.flink.runtime.messages.JobManagerMessages)22 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)20 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)17 TaskManagerServicesConfiguration (org.apache.flink.runtime.taskexecutor.TaskManagerServicesConfiguration)16 SubmitJob (org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob)15 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)14 TaskDeploymentDescriptor (org.apache.flink.runtime.deployment.TaskDeploymentDescriptor)14 TriggerSavepoint (org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint)13 SubmitTask (org.apache.flink.runtime.messages.TaskMessages.SubmitTask)13