use of org.apache.flink.runtime.messages.JobManagerMessages.CancellationFailure in project flink by apache.
the class CliFrontend method cancel.
/**
* Executes the CANCEL action.
*
* @param args Command line arguments for the cancel action.
*/
protected int cancel(String[] args) {
LOG.info("Running 'cancel' command.");
CancelOptions options;
try {
options = CliFrontendParser.parseCancelCommand(args);
} catch (CliArgsException e) {
return handleArgException(e);
} catch (Throwable t) {
return handleError(t);
}
// evaluate help flag
if (options.isPrintHelp()) {
CliFrontendParser.printHelpForCancel();
return 0;
}
String[] cleanedArgs = options.getArgs();
boolean withSavepoint = options.isWithSavepoint();
String targetDirectory = options.getSavepointTargetDirectory();
JobID jobId;
// - cancel -s <targetDir> <jobID> => custom target dir (parsed correctly)
if (cleanedArgs.length > 0) {
String jobIdString = cleanedArgs[0];
try {
jobId = new JobID(StringUtils.hexStringToByte(jobIdString));
} catch (Exception e) {
LOG.error("Error: The value for the Job ID is not a valid ID.");
System.out.println("Error: The value for the Job ID is not a valid ID.");
return 1;
}
} else if (targetDirectory != null) {
// Try this for case: cancel -s <jobID> (default savepoint target dir)
String jobIdString = targetDirectory;
try {
jobId = new JobID(StringUtils.hexStringToByte(jobIdString));
targetDirectory = null;
} catch (Exception e) {
LOG.error("Missing JobID in the command line arguments.");
System.out.println("Error: Specify a Job ID to cancel a job.");
return 1;
}
} else {
LOG.error("Missing JobID in the command line arguments.");
System.out.println("Error: Specify a Job ID to cancel a job.");
return 1;
}
try {
ActorGateway jobManager = getJobManagerGateway(options);
Object cancelMsg;
if (withSavepoint) {
if (targetDirectory == null) {
logAndSysout("Cancelling job " + jobId + " with savepoint to default savepoint directory.");
} else {
logAndSysout("Cancelling job " + jobId + " with savepoint to " + targetDirectory + ".");
}
cancelMsg = new CancelJobWithSavepoint(jobId, targetDirectory);
} else {
logAndSysout("Cancelling job " + jobId + ".");
cancelMsg = new CancelJob(jobId);
}
Future<Object> response = jobManager.ask(cancelMsg, clientTimeout);
final Object rc = Await.result(response, clientTimeout);
if (rc instanceof CancellationSuccess) {
if (withSavepoint) {
CancellationSuccess success = (CancellationSuccess) rc;
String savepointPath = success.savepointPath();
logAndSysout("Cancelled job " + jobId + ". Savepoint stored in " + savepointPath + ".");
} else {
logAndSysout("Cancelled job " + jobId + ".");
}
} else if (rc instanceof CancellationFailure) {
throw new Exception("Canceling the job with ID " + jobId + " failed.", ((CancellationFailure) rc).cause());
} else {
throw new IllegalStateException("Unexpected response: " + rc);
}
return 0;
} catch (Throwable t) {
return handleError(t);
}
}
use of org.apache.flink.runtime.messages.JobManagerMessages.CancellationFailure in project flink by apache.
the class JobManagerTest method testCancelWithSavepoint.
@Test
public void testCancelWithSavepoint() throws Exception {
File defaultSavepointDir = tmpFolder.newFolder();
FiniteDuration timeout = new FiniteDuration(30, TimeUnit.SECONDS);
Configuration config = new Configuration();
config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, defaultSavepointDir.getAbsolutePath());
ActorSystem actorSystem = null;
ActorGateway jobManager = null;
ActorGateway archiver = null;
ActorGateway taskManager = null;
try {
actorSystem = AkkaUtils.createLocalActorSystem(new Configuration());
Tuple2<ActorRef, ActorRef> master = JobManager.startJobManagerActors(config, actorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), Option.apply("jm"), Option.apply("arch"), TestingJobManager.class, TestingMemoryArchivist.class);
jobManager = new AkkaActorGateway(master._1(), null);
archiver = new AkkaActorGateway(master._2(), null);
ActorRef taskManagerRef = TaskManager.startTaskManagerComponentsAndActor(config, ResourceID.generate(), actorSystem, "localhost", Option.apply("tm"), Option.<LeaderRetrievalService>apply(new StandaloneLeaderRetrievalService(jobManager.path())), true, TestingTaskManager.class);
taskManager = new AkkaActorGateway(taskManagerRef, null);
// Wait until connected
Object msg = new TestingTaskManagerMessages.NotifyWhenRegisteredAtJobManager(jobManager.actor());
Await.ready(taskManager.ask(msg, timeout), timeout);
// Create job graph
JobVertex sourceVertex = new JobVertex("Source");
sourceVertex.setInvokableClass(BlockingStatefulInvokable.class);
sourceVertex.setParallelism(1);
JobGraph jobGraph = new JobGraph("TestingJob", sourceVertex);
JobSnapshottingSettings snapshottingSettings = new JobSnapshottingSettings(Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), 3600000, 3600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true);
jobGraph.setSnapshotSettings(snapshottingSettings);
// Submit job graph
msg = new JobManagerMessages.SubmitJob(jobGraph, ListeningBehaviour.DETACHED);
Await.result(jobManager.ask(msg, timeout), timeout);
// Wait for all tasks to be running
msg = new TestingJobManagerMessages.WaitForAllVerticesToBeRunning(jobGraph.getJobID());
Await.result(jobManager.ask(msg, timeout), timeout);
// Notify when canelled
msg = new NotifyWhenJobStatus(jobGraph.getJobID(), JobStatus.CANCELED);
Future<Object> cancelled = jobManager.ask(msg, timeout);
// Cancel with savepoint
String savepointPath = null;
for (int i = 0; i < 10; i++) {
msg = new JobManagerMessages.CancelJobWithSavepoint(jobGraph.getJobID(), null);
CancellationResponse cancelResp = (CancellationResponse) Await.result(jobManager.ask(msg, timeout), timeout);
if (cancelResp instanceof CancellationFailure) {
CancellationFailure failure = (CancellationFailure) cancelResp;
if (failure.cause().getMessage().contains(CheckpointDeclineReason.NOT_ALL_REQUIRED_TASKS_RUNNING.message())) {
// wait and retry
Thread.sleep(200);
} else {
failure.cause().printStackTrace();
fail("Failed to cancel job: " + failure.cause().getMessage());
}
} else {
savepointPath = ((CancellationSuccess) cancelResp).savepointPath();
break;
}
}
// Verify savepoint path
assertNotEquals("Savepoint not triggered", null, savepointPath);
// Wait for job status change
Await.ready(cancelled, timeout);
File savepointFile = new File(savepointPath);
assertEquals(true, savepointFile.exists());
} finally {
if (actorSystem != null) {
actorSystem.shutdown();
}
if (archiver != null) {
archiver.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
if (jobManager != null) {
jobManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
if (taskManager != null) {
taskManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
}
}
use of org.apache.flink.runtime.messages.JobManagerMessages.CancellationFailure in project flink by apache.
the class JobManagerTest method testCancelWithSavepointNoDirectoriesConfigured.
/**
* Tests that a meaningful exception is returned if no savepoint directory is
* configured.
*/
@Test
public void testCancelWithSavepointNoDirectoriesConfigured() throws Exception {
FiniteDuration timeout = new FiniteDuration(30, TimeUnit.SECONDS);
Configuration config = new Configuration();
ActorSystem actorSystem = null;
ActorGateway jobManager = null;
ActorGateway archiver = null;
ActorGateway taskManager = null;
try {
actorSystem = AkkaUtils.createLocalActorSystem(new Configuration());
Tuple2<ActorRef, ActorRef> master = JobManager.startJobManagerActors(config, actorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), Option.apply("jm"), Option.apply("arch"), TestingJobManager.class, TestingMemoryArchivist.class);
jobManager = new AkkaActorGateway(master._1(), null);
archiver = new AkkaActorGateway(master._2(), null);
ActorRef taskManagerRef = TaskManager.startTaskManagerComponentsAndActor(config, ResourceID.generate(), actorSystem, "localhost", Option.apply("tm"), Option.<LeaderRetrievalService>apply(new StandaloneLeaderRetrievalService(jobManager.path())), true, TestingTaskManager.class);
taskManager = new AkkaActorGateway(taskManagerRef, null);
// Wait until connected
Object msg = new TestingTaskManagerMessages.NotifyWhenRegisteredAtJobManager(jobManager.actor());
Await.ready(taskManager.ask(msg, timeout), timeout);
// Create job graph
JobVertex sourceVertex = new JobVertex("Source");
sourceVertex.setInvokableClass(BlockingStatefulInvokable.class);
sourceVertex.setParallelism(1);
JobGraph jobGraph = new JobGraph("TestingJob", sourceVertex);
JobSnapshottingSettings snapshottingSettings = new JobSnapshottingSettings(Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), 3600000, 3600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true);
jobGraph.setSnapshotSettings(snapshottingSettings);
// Submit job graph
msg = new JobManagerMessages.SubmitJob(jobGraph, ListeningBehaviour.DETACHED);
Await.result(jobManager.ask(msg, timeout), timeout);
// Wait for all tasks to be running
msg = new TestingJobManagerMessages.WaitForAllVerticesToBeRunning(jobGraph.getJobID());
Await.result(jobManager.ask(msg, timeout), timeout);
// Cancel with savepoint
msg = new JobManagerMessages.CancelJobWithSavepoint(jobGraph.getJobID(), null);
CancellationResponse cancelResp = (CancellationResponse) Await.result(jobManager.ask(msg, timeout), timeout);
if (cancelResp instanceof CancellationFailure) {
CancellationFailure failure = (CancellationFailure) cancelResp;
assertTrue(failure.cause() instanceof IllegalStateException);
assertTrue(failure.cause().getMessage().contains("savepoint directory"));
} else {
fail("Unexpected cancellation response from JobManager: " + cancelResp);
}
} finally {
if (actorSystem != null) {
actorSystem.shutdown();
}
if (archiver != null) {
archiver.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
if (jobManager != null) {
jobManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
if (taskManager != null) {
taskManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
}
}
Aggregations