use of org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess in project flink by apache.
the class CliFrontend method cancel.
/**
* Executes the CANCEL action.
*
* @param args Command line arguments for the cancel action.
*/
protected int cancel(String[] args) {
LOG.info("Running 'cancel' command.");
CancelOptions options;
try {
options = CliFrontendParser.parseCancelCommand(args);
} catch (CliArgsException e) {
return handleArgException(e);
} catch (Throwable t) {
return handleError(t);
}
// evaluate help flag
if (options.isPrintHelp()) {
CliFrontendParser.printHelpForCancel();
return 0;
}
String[] cleanedArgs = options.getArgs();
boolean withSavepoint = options.isWithSavepoint();
String targetDirectory = options.getSavepointTargetDirectory();
JobID jobId;
// - cancel -s <targetDir> <jobID> => custom target dir (parsed correctly)
if (cleanedArgs.length > 0) {
String jobIdString = cleanedArgs[0];
try {
jobId = new JobID(StringUtils.hexStringToByte(jobIdString));
} catch (Exception e) {
LOG.error("Error: The value for the Job ID is not a valid ID.");
System.out.println("Error: The value for the Job ID is not a valid ID.");
return 1;
}
} else if (targetDirectory != null) {
// Try this for case: cancel -s <jobID> (default savepoint target dir)
String jobIdString = targetDirectory;
try {
jobId = new JobID(StringUtils.hexStringToByte(jobIdString));
targetDirectory = null;
} catch (Exception e) {
LOG.error("Missing JobID in the command line arguments.");
System.out.println("Error: Specify a Job ID to cancel a job.");
return 1;
}
} else {
LOG.error("Missing JobID in the command line arguments.");
System.out.println("Error: Specify a Job ID to cancel a job.");
return 1;
}
try {
ActorGateway jobManager = getJobManagerGateway(options);
Object cancelMsg;
if (withSavepoint) {
if (targetDirectory == null) {
logAndSysout("Cancelling job " + jobId + " with savepoint to default savepoint directory.");
} else {
logAndSysout("Cancelling job " + jobId + " with savepoint to " + targetDirectory + ".");
}
cancelMsg = new CancelJobWithSavepoint(jobId, targetDirectory);
} else {
logAndSysout("Cancelling job " + jobId + ".");
cancelMsg = new CancelJob(jobId);
}
Future<Object> response = jobManager.ask(cancelMsg, clientTimeout);
final Object rc = Await.result(response, clientTimeout);
if (rc instanceof CancellationSuccess) {
if (withSavepoint) {
CancellationSuccess success = (CancellationSuccess) rc;
String savepointPath = success.savepointPath();
logAndSysout("Cancelled job " + jobId + ". Savepoint stored in " + savepointPath + ".");
} else {
logAndSysout("Cancelled job " + jobId + ".");
}
} else if (rc instanceof CancellationFailure) {
throw new Exception("Canceling the job with ID " + jobId + " failed.", ((CancellationFailure) rc).cause());
} else {
throw new IllegalStateException("Unexpected response: " + rc);
}
return 0;
} catch (Throwable t) {
return handleError(t);
}
}
use of org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess in project flink by apache.
the class JobCancellationWithSavepointHandlersTest method testTriggerNewRequest.
/**
* Tests triggering a new request and monitoring it.
*/
@Test
public void testTriggerNewRequest() throws Exception {
JobID jobId = new JobID();
ExecutionGraphHolder holder = mock(ExecutionGraphHolder.class);
ExecutionGraph graph = mock(ExecutionGraph.class);
CheckpointCoordinator coord = mock(CheckpointCoordinator.class);
when(holder.getExecutionGraph(eq(jobId), any(ActorGateway.class))).thenReturn(graph);
when(graph.getCheckpointCoordinator()).thenReturn(coord);
JobCancellationWithSavepointHandlers handlers = new JobCancellationWithSavepointHandlers(holder, EC);
JobCancellationWithSavepointHandlers.TriggerHandler trigger = handlers.getTriggerHandler();
JobCancellationWithSavepointHandlers.InProgressHandler progress = handlers.getInProgressHandler();
Map<String, String> params = new HashMap<>();
params.put("jobid", jobId.toString());
params.put("targetDirectory", "custom-directory");
ActorGateway jobManager = mock(ActorGateway.class);
// Successful
Promise<Object> promise = new Promise.DefaultPromise<>();
when(jobManager.ask(any(Object.class), any(FiniteDuration.class))).thenReturn(promise);
// Trigger
FullHttpResponse response = trigger.handleRequest(params, Collections.<String, String>emptyMap(), jobManager);
verify(jobManager).ask(eq(new CancelJobWithSavepoint(jobId, "custom-directory")), any(FiniteDuration.class));
String location = String.format("/jobs/%s/cancel-with-savepoint/in-progress/1", jobId);
assertEquals(HttpResponseStatus.ACCEPTED, response.getStatus());
assertEquals("application/json", response.headers().get(HttpHeaders.Names.CONTENT_TYPE));
assertEquals(Integer.toString(response.content().readableBytes()), response.headers().get(HttpHeaders.Names.CONTENT_LENGTH));
assertEquals(location, response.headers().get(HttpHeaders.Names.LOCATION));
String json = response.content().toString(Charset.forName("UTF-8"));
JsonNode root = new ObjectMapper().readTree(json);
assertEquals("accepted", root.get("status").getValueAsText());
assertEquals("1", root.get("request-id").getValueAsText());
assertEquals(location, root.get("location").getValueAsText());
// Trigger again
response = trigger.handleRequest(params, Collections.<String, String>emptyMap(), jobManager);
assertEquals(HttpResponseStatus.ACCEPTED, response.getStatus());
assertEquals("application/json", response.headers().get(HttpHeaders.Names.CONTENT_TYPE));
assertEquals(Integer.toString(response.content().readableBytes()), response.headers().get(HttpHeaders.Names.CONTENT_LENGTH));
assertEquals(location, response.headers().get(HttpHeaders.Names.LOCATION));
json = response.content().toString(Charset.forName("UTF-8"));
root = new ObjectMapper().readTree(json);
assertEquals("accepted", root.get("status").getValueAsText());
assertEquals("1", root.get("request-id").getValueAsText());
assertEquals(location, root.get("location").getValueAsText());
// Only single actual request
verify(jobManager).ask(eq(new CancelJobWithSavepoint(jobId, "custom-directory")), any(FiniteDuration.class));
// Query progress
params.put("requestId", "1");
response = progress.handleRequest(params, Collections.<String, String>emptyMap(), jobManager);
assertEquals(HttpResponseStatus.ACCEPTED, response.getStatus());
assertEquals("application/json", response.headers().get(HttpHeaders.Names.CONTENT_TYPE));
assertEquals(Integer.toString(response.content().readableBytes()), response.headers().get(HttpHeaders.Names.CONTENT_LENGTH));
json = response.content().toString(Charset.forName("UTF-8"));
root = new ObjectMapper().readTree(json);
assertEquals("in-progress", root.get("status").getValueAsText());
assertEquals("1", root.get("request-id").getValueAsText());
// Complete
promise.success(new CancellationSuccess(jobId, "_path-savepoint_"));
response = progress.handleRequest(params, Collections.<String, String>emptyMap(), jobManager);
assertEquals(HttpResponseStatus.CREATED, response.getStatus());
assertEquals("application/json", response.headers().get(HttpHeaders.Names.CONTENT_TYPE));
assertEquals(Integer.toString(response.content().readableBytes()), response.headers().get(HttpHeaders.Names.CONTENT_LENGTH));
json = response.content().toString(Charset.forName("UTF-8"));
root = new ObjectMapper().readTree(json);
assertEquals("success", root.get("status").getValueAsText());
assertEquals("1", root.get("request-id").getValueAsText());
assertEquals("_path-savepoint_", root.get("savepoint-path").getValueAsText());
// Query again, keep recent history
response = progress.handleRequest(params, Collections.<String, String>emptyMap(), jobManager);
assertEquals(HttpResponseStatus.CREATED, response.getStatus());
assertEquals("application/json", response.headers().get(HttpHeaders.Names.CONTENT_TYPE));
assertEquals(Integer.toString(response.content().readableBytes()), response.headers().get(HttpHeaders.Names.CONTENT_LENGTH));
json = response.content().toString(Charset.forName("UTF-8"));
root = new ObjectMapper().readTree(json);
assertEquals("success", root.get("status").getValueAsText());
assertEquals("1", root.get("request-id").getValueAsText());
assertEquals("_path-savepoint_", root.get("savepoint-path").getValueAsText());
// Query for unknown request
params.put("requestId", "9929");
response = progress.handleRequest(params, Collections.<String, String>emptyMap(), jobManager);
assertEquals(HttpResponseStatus.BAD_REQUEST, response.getStatus());
assertEquals("application/json", response.headers().get(HttpHeaders.Names.CONTENT_TYPE));
assertEquals(Integer.toString(response.content().readableBytes()), response.headers().get(HttpHeaders.Names.CONTENT_LENGTH));
json = response.content().toString(Charset.forName("UTF-8"));
root = new ObjectMapper().readTree(json);
assertEquals("failed", root.get("status").getValueAsText());
assertEquals("9929", root.get("request-id").getValueAsText());
assertEquals("Unknown job/request ID", root.get("cause").getValueAsText());
}
use of org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess in project flink by apache.
the class JobManagerTest method testSavepointRestoreSettings.
/**
* Tests that configured {@link SavepointRestoreSettings} are respected.
*/
@Test
public void testSavepointRestoreSettings() throws Exception {
FiniteDuration timeout = new FiniteDuration(30, TimeUnit.SECONDS);
ActorSystem actorSystem = null;
ActorGateway jobManager = null;
ActorGateway archiver = null;
ActorGateway taskManager = null;
try {
actorSystem = AkkaUtils.createLocalActorSystem(new Configuration());
Tuple2<ActorRef, ActorRef> master = JobManager.startJobManagerActors(new Configuration(), actorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), Option.apply("jm"), Option.apply("arch"), TestingJobManager.class, TestingMemoryArchivist.class);
jobManager = new AkkaActorGateway(master._1(), null);
archiver = new AkkaActorGateway(master._2(), null);
Configuration tmConfig = new Configuration();
tmConfig.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 4);
ActorRef taskManagerRef = TaskManager.startTaskManagerComponentsAndActor(tmConfig, ResourceID.generate(), actorSystem, "localhost", Option.apply("tm"), Option.<LeaderRetrievalService>apply(new StandaloneLeaderRetrievalService(jobManager.path())), true, TestingTaskManager.class);
taskManager = new AkkaActorGateway(taskManagerRef, null);
// Wait until connected
Object msg = new TestingTaskManagerMessages.NotifyWhenRegisteredAtJobManager(jobManager.actor());
Await.ready(taskManager.ask(msg, timeout), timeout);
// Create job graph
JobVertex sourceVertex = new JobVertex("Source");
sourceVertex.setInvokableClass(BlockingStatefulInvokable.class);
sourceVertex.setParallelism(1);
JobGraph jobGraph = new JobGraph("TestingJob", sourceVertex);
JobSnapshottingSettings snapshottingSettings = new JobSnapshottingSettings(Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), // deactivated checkpointing
Long.MAX_VALUE, 360000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true);
jobGraph.setSnapshotSettings(snapshottingSettings);
// Submit job graph
msg = new JobManagerMessages.SubmitJob(jobGraph, ListeningBehaviour.DETACHED);
Await.result(jobManager.ask(msg, timeout), timeout);
// Wait for all tasks to be running
msg = new TestingJobManagerMessages.WaitForAllVerticesToBeRunning(jobGraph.getJobID());
Await.result(jobManager.ask(msg, timeout), timeout);
// Trigger savepoint
File targetDirectory = tmpFolder.newFolder();
msg = new TriggerSavepoint(jobGraph.getJobID(), Option.apply(targetDirectory.getAbsolutePath()));
Future<Object> future = jobManager.ask(msg, timeout);
Object result = Await.result(future, timeout);
String savepointPath = ((TriggerSavepointSuccess) result).savepointPath();
// Cancel because of restarts
msg = new TestingJobManagerMessages.NotifyWhenJobRemoved(jobGraph.getJobID());
Future<?> removedFuture = jobManager.ask(msg, timeout);
Future<?> cancelFuture = jobManager.ask(new CancelJob(jobGraph.getJobID()), timeout);
Object response = Await.result(cancelFuture, timeout);
assertTrue("Unexpected response: " + response, response instanceof CancellationSuccess);
Await.ready(removedFuture, timeout);
// Adjust the job (we need a new operator ID)
JobVertex newSourceVertex = new JobVertex("NewSource");
newSourceVertex.setInvokableClass(BlockingStatefulInvokable.class);
newSourceVertex.setParallelism(1);
JobGraph newJobGraph = new JobGraph("NewTestingJob", newSourceVertex);
JobSnapshottingSettings newSnapshottingSettings = new JobSnapshottingSettings(Collections.singletonList(newSourceVertex.getID()), Collections.singletonList(newSourceVertex.getID()), Collections.singletonList(newSourceVertex.getID()), // deactivated checkpointing
Long.MAX_VALUE, 360000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true);
newJobGraph.setSnapshotSettings(newSnapshottingSettings);
SavepointRestoreSettings restoreSettings = SavepointRestoreSettings.forPath(savepointPath, false);
newJobGraph.setSavepointRestoreSettings(restoreSettings);
msg = new JobManagerMessages.SubmitJob(newJobGraph, ListeningBehaviour.DETACHED);
response = Await.result(jobManager.ask(msg, timeout), timeout);
assertTrue("Unexpected response: " + response, response instanceof JobManagerMessages.JobResultFailure);
JobManagerMessages.JobResultFailure failure = (JobManagerMessages.JobResultFailure) response;
Throwable cause = failure.cause().deserializeError(ClassLoader.getSystemClassLoader());
assertTrue(cause instanceof IllegalStateException);
assertTrue(cause.getMessage().contains("allowNonRestoredState"));
// Wait until removed
msg = new TestingJobManagerMessages.NotifyWhenJobRemoved(newJobGraph.getJobID());
Await.ready(jobManager.ask(msg, timeout), timeout);
// Resubmit, but allow non restored state now
restoreSettings = SavepointRestoreSettings.forPath(savepointPath, true);
newJobGraph.setSavepointRestoreSettings(restoreSettings);
msg = new JobManagerMessages.SubmitJob(newJobGraph, ListeningBehaviour.DETACHED);
response = Await.result(jobManager.ask(msg, timeout), timeout);
assertTrue("Unexpected response: " + response, response instanceof JobManagerMessages.JobSubmitSuccess);
} finally {
if (actorSystem != null) {
actorSystem.shutdown();
}
if (archiver != null) {
archiver.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
if (jobManager != null) {
jobManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
if (taskManager != null) {
taskManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
}
}
use of org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess in project flink by apache.
the class AbstractQueryableStateITCase method testQueryNonStartedJobState.
/**
* Similar tests as {@link #testValueState()} but before submitting the
* job, we already issue one request which fails.
*/
@Test
public void testQueryNonStartedJobState() throws Exception {
// Config
final Deadline deadline = TEST_TIMEOUT.fromNow();
final int numElements = 1024;
final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
JobID jobId = null;
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(NUM_SLOTS);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
// Value state
ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", source.getType(), null);
QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
@Override
public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
return value.f0;
}
}).asQueryableState("hakuna", valueState);
// Submit the job graph
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
jobId = jobGraph.getJobID();
// Now query
long expected = numElements;
// query once
client.getKvState(jobId, queryableState.getQueryableStateName(), 0, KvStateRequestSerializer.serializeKeyAndNamespace(0, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE));
cluster.submitJobDetached(jobGraph);
executeValueQuery(deadline, client, jobId, queryableState, expected);
} finally {
// Free cluster resources
if (jobId != null) {
Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
Await.ready(cancellation, deadline.timeLeft());
}
client.shutDown();
}
}
use of org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess in project flink by apache.
the class AbstractQueryableStateITCase method testValueStateDefault.
/**
* Tests simple value state queryable state instance with a default value
* set. Each source emits (subtaskIndex, 0)..(subtaskIndex, numElements)
* tuples, the key is mapped to 1 but key 0 is queried which should throw
* a {@link UnknownKeyOrNamespace} exception.
*
* @throws UnknownKeyOrNamespace thrown due querying a non-existent key
*/
@Test(expected = UnknownKeyOrNamespace.class)
public void testValueStateDefault() throws Exception, UnknownKeyOrNamespace {
// Config
final Deadline deadline = TEST_TIMEOUT.fromNow();
final int numElements = 1024;
final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
JobID jobId = null;
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(NUM_SLOTS);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
// Value state
ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", source.getType(), Tuple2.of(0, 1337l));
// only expose key "1"
QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
@Override
public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
return 1;
}
}).asQueryableState("hakuna", valueState);
// Submit the job graph
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
jobId = jobGraph.getJobID();
cluster.submitJobDetached(jobGraph);
// Now query
int key = 0;
final byte[] serializedKey = KvStateRequestSerializer.serializeKeyAndNamespace(key, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE);
Future<byte[]> future = getKvStateWithRetries(client, jobId, queryableState.getQueryableStateName(), key, serializedKey, QUERY_RETRY_DELAY, true);
Await.result(future, deadline.timeLeft());
} finally {
// Free cluster resources
if (jobId != null) {
Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
Await.ready(cancellation, deadline.timeLeft());
}
client.shutDown();
}
}
Aggregations