use of org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess in project flink by apache.
the class CliFrontend method cancel.
/**
* Executes the CANCEL action.
*
* @param args Command line arguments for the cancel action.
*/
protected int cancel(String[] args) {
LOG.info("Running 'cancel' command.");
CancelOptions options;
try {
options = CliFrontendParser.parseCancelCommand(args);
} catch (CliArgsException e) {
return handleArgException(e);
} catch (Throwable t) {
return handleError(t);
}
// evaluate help flag
if (options.isPrintHelp()) {
CliFrontendParser.printHelpForCancel();
return 0;
}
String[] cleanedArgs = options.getArgs();
boolean withSavepoint = options.isWithSavepoint();
String targetDirectory = options.getSavepointTargetDirectory();
JobID jobId;
// - cancel -s <targetDir> <jobID> => custom target dir (parsed correctly)
if (cleanedArgs.length > 0) {
String jobIdString = cleanedArgs[0];
try {
jobId = new JobID(StringUtils.hexStringToByte(jobIdString));
} catch (Exception e) {
LOG.error("Error: The value for the Job ID is not a valid ID.");
System.out.println("Error: The value for the Job ID is not a valid ID.");
return 1;
}
} else if (targetDirectory != null) {
// Try this for case: cancel -s <jobID> (default savepoint target dir)
String jobIdString = targetDirectory;
try {
jobId = new JobID(StringUtils.hexStringToByte(jobIdString));
targetDirectory = null;
} catch (Exception e) {
LOG.error("Missing JobID in the command line arguments.");
System.out.println("Error: Specify a Job ID to cancel a job.");
return 1;
}
} else {
LOG.error("Missing JobID in the command line arguments.");
System.out.println("Error: Specify a Job ID to cancel a job.");
return 1;
}
try {
ActorGateway jobManager = getJobManagerGateway(options);
Object cancelMsg;
if (withSavepoint) {
if (targetDirectory == null) {
logAndSysout("Cancelling job " + jobId + " with savepoint to default savepoint directory.");
} else {
logAndSysout("Cancelling job " + jobId + " with savepoint to " + targetDirectory + ".");
}
cancelMsg = new CancelJobWithSavepoint(jobId, targetDirectory);
} else {
logAndSysout("Cancelling job " + jobId + ".");
cancelMsg = new CancelJob(jobId);
}
Future<Object> response = jobManager.ask(cancelMsg, clientTimeout);
final Object rc = Await.result(response, clientTimeout);
if (rc instanceof CancellationSuccess) {
if (withSavepoint) {
CancellationSuccess success = (CancellationSuccess) rc;
String savepointPath = success.savepointPath();
logAndSysout("Cancelled job " + jobId + ". Savepoint stored in " + savepointPath + ".");
} else {
logAndSysout("Cancelled job " + jobId + ".");
}
} else if (rc instanceof CancellationFailure) {
throw new Exception("Canceling the job with ID " + jobId + " failed.", ((CancellationFailure) rc).cause());
} else {
throw new IllegalStateException("Unexpected response: " + rc);
}
return 0;
} catch (Throwable t) {
return handleError(t);
}
}
use of org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess in project flink by apache.
the class AbstractQueryableStateITCase method testFoldingState.
/**
* Tests simple folding state queryable state instance. Each source emits
* (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
* queried. The folding state sums these up and maps them to Strings. The
* test succeeds after each subtask index is queried with result n*(n+1)/2
* (as a String).
*/
@Test
public void testFoldingState() throws Exception {
// Config
final Deadline deadline = TEST_TIMEOUT.fromNow();
final int numElements = 1024;
final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
JobID jobId = null;
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(NUM_SLOTS);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
// Folding state
FoldingStateDescriptor<Tuple2<Integer, Long>, String> foldingState = new FoldingStateDescriptor<>("any", "0", new SumFold(), StringSerializer.INSTANCE);
QueryableStateStream<Integer, String> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
@Override
public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
return value.f0;
}
}).asQueryableState("pumba", foldingState);
// Submit the job graph
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
jobId = jobGraph.getJobID();
cluster.submitJobDetached(jobGraph);
// Now query
String expected = Integer.toString(numElements * (numElements + 1) / 2);
for (int key = 0; key < NUM_SLOTS; key++) {
final byte[] serializedKey = KvStateRequestSerializer.serializeKeyAndNamespace(key, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE);
boolean success = false;
while (deadline.hasTimeLeft() && !success) {
Future<byte[]> future = getKvStateWithRetries(client, jobId, queryableState.getQueryableStateName(), key, serializedKey, QUERY_RETRY_DELAY, false);
byte[] serializedValue = Await.result(future, deadline.timeLeft());
String value = KvStateRequestSerializer.deserializeValue(serializedValue, queryableState.getValueSerializer());
if (expected.equals(value)) {
success = true;
} else {
// Retry
Thread.sleep(50);
}
}
assertTrue("Did not succeed query", success);
}
} finally {
// Free cluster resources
if (jobId != null) {
Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
Await.ready(cancellation, deadline.timeLeft());
}
client.shutDown();
}
}
use of org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess in project flink by apache.
the class AbstractQueryableStateITCase method testReducingState.
/**
* Tests simple reducing state queryable state instance. Each source emits
* (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
* queried. The reducing state instance sums these up. The test succeeds
* after each subtask index is queried with result n*(n+1)/2.
*/
@Test
public void testReducingState() throws Exception {
// Config
final Deadline deadline = TEST_TIMEOUT.fromNow();
final int numElements = 1024;
final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
JobID jobId = null;
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(NUM_SLOTS);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
// Reducing state
ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any", new SumReduce(), source.getType());
QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
@Override
public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
return value.f0;
}
}).asQueryableState("jungle", reducingState);
// Submit the job graph
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
jobId = jobGraph.getJobID();
cluster.submitJobDetached(jobGraph);
// Wait until job is running
// Now query
long expected = numElements * (numElements + 1) / 2;
executeValueQuery(deadline, client, jobId, queryableState, expected);
} finally {
// Free cluster resources
if (jobId != null) {
Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
Await.ready(cancellation, deadline.timeLeft());
}
client.shutDown();
}
}
use of org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess in project flink by apache.
the class JobCancellationWithSavepointHandlersTest method testSavepointDirectoryConfiguration.
/**
* Tests that the savepoint directory configuration is respected.
*/
@Test
public void testSavepointDirectoryConfiguration() throws Exception {
long timeout = 128288238L;
JobID jobId = new JobID();
ExecutionGraphHolder holder = mock(ExecutionGraphHolder.class);
ExecutionGraph graph = mock(ExecutionGraph.class);
CheckpointCoordinator coord = mock(CheckpointCoordinator.class);
when(holder.getExecutionGraph(eq(jobId), any(ActorGateway.class))).thenReturn(graph);
when(graph.getCheckpointCoordinator()).thenReturn(coord);
when(coord.getCheckpointTimeout()).thenReturn(timeout);
JobCancellationWithSavepointHandlers handlers = new JobCancellationWithSavepointHandlers(holder, EC, "the-default-directory");
JobCancellationWithSavepointHandlers.TriggerHandler handler = handlers.getTriggerHandler();
Map<String, String> params = new HashMap<>();
params.put("jobid", jobId.toString());
ActorGateway jobManager = mock(ActorGateway.class);
Future<Object> future = Futures.successful((Object) new CancellationSuccess(jobId, null));
when(jobManager.ask(any(Object.class), any(FiniteDuration.class))).thenReturn(future);
// 1. Use targetDirectory path param
params.put("targetDirectory", "custom-directory");
handler.handleRequest(params, Collections.<String, String>emptyMap(), jobManager);
verify(jobManager).ask(eq(new CancelJobWithSavepoint(jobId, "custom-directory")), eq(FiniteDuration.apply(timeout, "ms")));
// 2. Use default
params.remove("targetDirectory");
handler.handleRequest(params, Collections.<String, String>emptyMap(), jobManager);
verify(jobManager).ask(eq(new CancelJobWithSavepoint(jobId, "the-default-directory")), eq(FiniteDuration.apply(timeout, "ms")));
// 3. Throw Exception
handlers = new JobCancellationWithSavepointHandlers(holder, EC, null);
handler = handlers.getTriggerHandler();
try {
handler.handleRequest(params, Collections.<String, String>emptyMap(), jobManager);
fail("Did not throw expected test Exception");
} catch (Exception e) {
IllegalStateException cause = (IllegalStateException) e.getCause();
assertEquals(true, cause.getMessage().contains(ConfigConstants.SAVEPOINT_DIRECTORY_KEY));
}
}
use of org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess in project flink by apache.
the class JobCancellationWithSavepointHandlersTest method testAskTimeoutEqualsCheckpointTimeout.
/**
* Tests that the cancellation ask timeout respects the checkpoint timeout.
* Otherwise, AskTimeoutExceptions are bound to happen for large state.
*/
@Test
public void testAskTimeoutEqualsCheckpointTimeout() throws Exception {
long timeout = 128288238L;
JobID jobId = new JobID();
ExecutionGraphHolder holder = mock(ExecutionGraphHolder.class);
ExecutionGraph graph = mock(ExecutionGraph.class);
CheckpointCoordinator coord = mock(CheckpointCoordinator.class);
when(holder.getExecutionGraph(eq(jobId), any(ActorGateway.class))).thenReturn(graph);
when(graph.getCheckpointCoordinator()).thenReturn(coord);
when(coord.getCheckpointTimeout()).thenReturn(timeout);
JobCancellationWithSavepointHandlers handlers = new JobCancellationWithSavepointHandlers(holder, EC);
JobCancellationWithSavepointHandlers.TriggerHandler handler = handlers.getTriggerHandler();
Map<String, String> params = new HashMap<>();
params.put("jobid", jobId.toString());
params.put("targetDirectory", "placeholder");
ActorGateway jobManager = mock(ActorGateway.class);
Future<Object> future = Futures.successful((Object) new CancellationSuccess(jobId, null));
when(jobManager.ask(any(Object.class), any(FiniteDuration.class))).thenReturn(future);
handler.handleRequest(params, Collections.<String, String>emptyMap(), jobManager);
verify(jobManager).ask(any(CancelJobWithSavepoint.class), eq(FiniteDuration.apply(timeout, "ms")));
}
Aggregations