Search in sources :

Example 1 with CancelJobWithSavepoint

use of org.apache.flink.runtime.messages.JobManagerMessages.CancelJobWithSavepoint in project flink by apache.

the class JobCancellationWithSavepointHandlersTest method testSavepointDirectoryConfiguration.

/**
	 * Tests that the savepoint directory configuration is respected.
	 */
@Test
public void testSavepointDirectoryConfiguration() throws Exception {
    long timeout = 128288238L;
    JobID jobId = new JobID();
    ExecutionGraphHolder holder = mock(ExecutionGraphHolder.class);
    ExecutionGraph graph = mock(ExecutionGraph.class);
    CheckpointCoordinator coord = mock(CheckpointCoordinator.class);
    when(holder.getExecutionGraph(eq(jobId), any(ActorGateway.class))).thenReturn(graph);
    when(graph.getCheckpointCoordinator()).thenReturn(coord);
    when(coord.getCheckpointTimeout()).thenReturn(timeout);
    JobCancellationWithSavepointHandlers handlers = new JobCancellationWithSavepointHandlers(holder, EC, "the-default-directory");
    JobCancellationWithSavepointHandlers.TriggerHandler handler = handlers.getTriggerHandler();
    Map<String, String> params = new HashMap<>();
    params.put("jobid", jobId.toString());
    ActorGateway jobManager = mock(ActorGateway.class);
    Future<Object> future = Futures.successful((Object) new CancellationSuccess(jobId, null));
    when(jobManager.ask(any(Object.class), any(FiniteDuration.class))).thenReturn(future);
    // 1. Use targetDirectory path param
    params.put("targetDirectory", "custom-directory");
    handler.handleRequest(params, Collections.<String, String>emptyMap(), jobManager);
    verify(jobManager).ask(eq(new CancelJobWithSavepoint(jobId, "custom-directory")), eq(FiniteDuration.apply(timeout, "ms")));
    // 2. Use default
    params.remove("targetDirectory");
    handler.handleRequest(params, Collections.<String, String>emptyMap(), jobManager);
    verify(jobManager).ask(eq(new CancelJobWithSavepoint(jobId, "the-default-directory")), eq(FiniteDuration.apply(timeout, "ms")));
    // 3. Throw Exception
    handlers = new JobCancellationWithSavepointHandlers(holder, EC, null);
    handler = handlers.getTriggerHandler();
    try {
        handler.handleRequest(params, Collections.<String, String>emptyMap(), jobManager);
        fail("Did not throw expected test Exception");
    } catch (Exception e) {
        IllegalStateException cause = (IllegalStateException) e.getCause();
        assertEquals(true, cause.getMessage().contains(ConfigConstants.SAVEPOINT_DIRECTORY_KEY));
    }
}
Also used : HashMap(java.util.HashMap) FiniteDuration(scala.concurrent.duration.FiniteDuration) CancelJobWithSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.CancelJobWithSavepoint) ExecutionGraphHolder(org.apache.flink.runtime.webmonitor.ExecutionGraphHolder) CheckpointCoordinator(org.apache.flink.runtime.checkpoint.CheckpointCoordinator) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 2 with CancelJobWithSavepoint

use of org.apache.flink.runtime.messages.JobManagerMessages.CancelJobWithSavepoint in project flink by apache.

the class JobCancellationWithSavepointHandlersTest method testFailedCancellation.

/**
	 * Tests response when a request fails.
	 */
@Test
public void testFailedCancellation() throws Exception {
    JobID jobId = new JobID();
    ExecutionGraphHolder holder = mock(ExecutionGraphHolder.class);
    ExecutionGraph graph = mock(ExecutionGraph.class);
    CheckpointCoordinator coord = mock(CheckpointCoordinator.class);
    when(holder.getExecutionGraph(eq(jobId), any(ActorGateway.class))).thenReturn(graph);
    when(graph.getCheckpointCoordinator()).thenReturn(coord);
    JobCancellationWithSavepointHandlers handlers = new JobCancellationWithSavepointHandlers(holder, EC);
    JobCancellationWithSavepointHandlers.TriggerHandler trigger = handlers.getTriggerHandler();
    JobCancellationWithSavepointHandlers.InProgressHandler progress = handlers.getInProgressHandler();
    Map<String, String> params = new HashMap<>();
    params.put("jobid", jobId.toString());
    params.put("targetDirectory", "custom-directory");
    ActorGateway jobManager = mock(ActorGateway.class);
    // Successful
    Future<Object> future = Futures.failed(new Exception("Test Exception"));
    when(jobManager.ask(any(Object.class), any(FiniteDuration.class))).thenReturn(future);
    // Trigger
    trigger.handleRequest(params, Collections.<String, String>emptyMap(), jobManager);
    verify(jobManager).ask(eq(new CancelJobWithSavepoint(jobId, "custom-directory")), any(FiniteDuration.class));
    // Query progress
    params.put("requestId", "1");
    FullHttpResponse response = progress.handleRequest(params, Collections.<String, String>emptyMap(), jobManager);
    assertEquals(HttpResponseStatus.INTERNAL_SERVER_ERROR, response.getStatus());
    assertEquals("application/json", response.headers().get(HttpHeaders.Names.CONTENT_TYPE));
    assertEquals(Integer.toString(response.content().readableBytes()), response.headers().get(HttpHeaders.Names.CONTENT_LENGTH));
    String json = response.content().toString(Charset.forName("UTF-8"));
    JsonNode root = new ObjectMapper().readTree(json);
    assertEquals("failed", root.get("status").getValueAsText());
    assertEquals("1", root.get("request-id").getValueAsText());
    assertEquals("Test Exception", root.get("cause").getValueAsText());
}
Also used : HashMap(java.util.HashMap) FiniteDuration(scala.concurrent.duration.FiniteDuration) CancelJobWithSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.CancelJobWithSavepoint) JsonNode(org.codehaus.jackson.JsonNode) ExecutionGraphHolder(org.apache.flink.runtime.webmonitor.ExecutionGraphHolder) CheckpointCoordinator(org.apache.flink.runtime.checkpoint.CheckpointCoordinator) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) FullHttpResponse(io.netty.handler.codec.http.FullHttpResponse) JobID(org.apache.flink.api.common.JobID) ObjectMapper(org.codehaus.jackson.map.ObjectMapper) Test(org.junit.Test)

Example 3 with CancelJobWithSavepoint

use of org.apache.flink.runtime.messages.JobManagerMessages.CancelJobWithSavepoint in project flink by apache.

the class CliFrontend method cancel.

/**
	 * Executes the CANCEL action.
	 * 
	 * @param args Command line arguments for the cancel action.
	 */
protected int cancel(String[] args) {
    LOG.info("Running 'cancel' command.");
    CancelOptions options;
    try {
        options = CliFrontendParser.parseCancelCommand(args);
    } catch (CliArgsException e) {
        return handleArgException(e);
    } catch (Throwable t) {
        return handleError(t);
    }
    // evaluate help flag
    if (options.isPrintHelp()) {
        CliFrontendParser.printHelpForCancel();
        return 0;
    }
    String[] cleanedArgs = options.getArgs();
    boolean withSavepoint = options.isWithSavepoint();
    String targetDirectory = options.getSavepointTargetDirectory();
    JobID jobId;
    // - cancel -s <targetDir> <jobID> => custom target dir (parsed correctly)
    if (cleanedArgs.length > 0) {
        String jobIdString = cleanedArgs[0];
        try {
            jobId = new JobID(StringUtils.hexStringToByte(jobIdString));
        } catch (Exception e) {
            LOG.error("Error: The value for the Job ID is not a valid ID.");
            System.out.println("Error: The value for the Job ID is not a valid ID.");
            return 1;
        }
    } else if (targetDirectory != null) {
        // Try this for case: cancel -s <jobID> (default savepoint target dir)
        String jobIdString = targetDirectory;
        try {
            jobId = new JobID(StringUtils.hexStringToByte(jobIdString));
            targetDirectory = null;
        } catch (Exception e) {
            LOG.error("Missing JobID in the command line arguments.");
            System.out.println("Error: Specify a Job ID to cancel a job.");
            return 1;
        }
    } else {
        LOG.error("Missing JobID in the command line arguments.");
        System.out.println("Error: Specify a Job ID to cancel a job.");
        return 1;
    }
    try {
        ActorGateway jobManager = getJobManagerGateway(options);
        Object cancelMsg;
        if (withSavepoint) {
            if (targetDirectory == null) {
                logAndSysout("Cancelling job " + jobId + " with savepoint to default savepoint directory.");
            } else {
                logAndSysout("Cancelling job " + jobId + " with savepoint to " + targetDirectory + ".");
            }
            cancelMsg = new CancelJobWithSavepoint(jobId, targetDirectory);
        } else {
            logAndSysout("Cancelling job " + jobId + ".");
            cancelMsg = new CancelJob(jobId);
        }
        Future<Object> response = jobManager.ask(cancelMsg, clientTimeout);
        final Object rc = Await.result(response, clientTimeout);
        if (rc instanceof CancellationSuccess) {
            if (withSavepoint) {
                CancellationSuccess success = (CancellationSuccess) rc;
                String savepointPath = success.savepointPath();
                logAndSysout("Cancelled job " + jobId + ". Savepoint stored in " + savepointPath + ".");
            } else {
                logAndSysout("Cancelled job " + jobId + ".");
            }
        } else if (rc instanceof CancellationFailure) {
            throw new Exception("Canceling the job with ID " + jobId + " failed.", ((CancellationFailure) rc).cause());
        } else {
            throw new IllegalStateException("Unexpected response: " + rc);
        }
        return 0;
    } catch (Throwable t) {
        return handleError(t);
    }
}
Also used : CancelOptions(org.apache.flink.client.cli.CancelOptions) CancelJobWithSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.CancelJobWithSavepoint) CliArgsException(org.apache.flink.client.cli.CliArgsException) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) ProgramMissingJobException(org.apache.flink.client.program.ProgramMissingJobException) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) ProgramParametrizationException(org.apache.flink.client.program.ProgramParametrizationException) FileNotFoundException(java.io.FileNotFoundException) InvocationTargetException(java.lang.reflect.InvocationTargetException) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) CliArgsException(org.apache.flink.client.cli.CliArgsException) IOException(java.io.IOException) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) CancellationFailure(org.apache.flink.runtime.messages.JobManagerMessages.CancellationFailure) CancelJob(org.apache.flink.runtime.messages.JobManagerMessages.CancelJob) JobID(org.apache.flink.api.common.JobID)

Example 4 with CancelJobWithSavepoint

use of org.apache.flink.runtime.messages.JobManagerMessages.CancelJobWithSavepoint in project flink by apache.

the class JobCancellationWithSavepointHandlersTest method testTriggerNewRequest.

/**
	 * Tests triggering a new request and monitoring it.
	 */
@Test
public void testTriggerNewRequest() throws Exception {
    JobID jobId = new JobID();
    ExecutionGraphHolder holder = mock(ExecutionGraphHolder.class);
    ExecutionGraph graph = mock(ExecutionGraph.class);
    CheckpointCoordinator coord = mock(CheckpointCoordinator.class);
    when(holder.getExecutionGraph(eq(jobId), any(ActorGateway.class))).thenReturn(graph);
    when(graph.getCheckpointCoordinator()).thenReturn(coord);
    JobCancellationWithSavepointHandlers handlers = new JobCancellationWithSavepointHandlers(holder, EC);
    JobCancellationWithSavepointHandlers.TriggerHandler trigger = handlers.getTriggerHandler();
    JobCancellationWithSavepointHandlers.InProgressHandler progress = handlers.getInProgressHandler();
    Map<String, String> params = new HashMap<>();
    params.put("jobid", jobId.toString());
    params.put("targetDirectory", "custom-directory");
    ActorGateway jobManager = mock(ActorGateway.class);
    // Successful
    Promise<Object> promise = new Promise.DefaultPromise<>();
    when(jobManager.ask(any(Object.class), any(FiniteDuration.class))).thenReturn(promise);
    // Trigger
    FullHttpResponse response = trigger.handleRequest(params, Collections.<String, String>emptyMap(), jobManager);
    verify(jobManager).ask(eq(new CancelJobWithSavepoint(jobId, "custom-directory")), any(FiniteDuration.class));
    String location = String.format("/jobs/%s/cancel-with-savepoint/in-progress/1", jobId);
    assertEquals(HttpResponseStatus.ACCEPTED, response.getStatus());
    assertEquals("application/json", response.headers().get(HttpHeaders.Names.CONTENT_TYPE));
    assertEquals(Integer.toString(response.content().readableBytes()), response.headers().get(HttpHeaders.Names.CONTENT_LENGTH));
    assertEquals(location, response.headers().get(HttpHeaders.Names.LOCATION));
    String json = response.content().toString(Charset.forName("UTF-8"));
    JsonNode root = new ObjectMapper().readTree(json);
    assertEquals("accepted", root.get("status").getValueAsText());
    assertEquals("1", root.get("request-id").getValueAsText());
    assertEquals(location, root.get("location").getValueAsText());
    // Trigger again
    response = trigger.handleRequest(params, Collections.<String, String>emptyMap(), jobManager);
    assertEquals(HttpResponseStatus.ACCEPTED, response.getStatus());
    assertEquals("application/json", response.headers().get(HttpHeaders.Names.CONTENT_TYPE));
    assertEquals(Integer.toString(response.content().readableBytes()), response.headers().get(HttpHeaders.Names.CONTENT_LENGTH));
    assertEquals(location, response.headers().get(HttpHeaders.Names.LOCATION));
    json = response.content().toString(Charset.forName("UTF-8"));
    root = new ObjectMapper().readTree(json);
    assertEquals("accepted", root.get("status").getValueAsText());
    assertEquals("1", root.get("request-id").getValueAsText());
    assertEquals(location, root.get("location").getValueAsText());
    // Only single actual request
    verify(jobManager).ask(eq(new CancelJobWithSavepoint(jobId, "custom-directory")), any(FiniteDuration.class));
    // Query progress
    params.put("requestId", "1");
    response = progress.handleRequest(params, Collections.<String, String>emptyMap(), jobManager);
    assertEquals(HttpResponseStatus.ACCEPTED, response.getStatus());
    assertEquals("application/json", response.headers().get(HttpHeaders.Names.CONTENT_TYPE));
    assertEquals(Integer.toString(response.content().readableBytes()), response.headers().get(HttpHeaders.Names.CONTENT_LENGTH));
    json = response.content().toString(Charset.forName("UTF-8"));
    root = new ObjectMapper().readTree(json);
    assertEquals("in-progress", root.get("status").getValueAsText());
    assertEquals("1", root.get("request-id").getValueAsText());
    // Complete
    promise.success(new CancellationSuccess(jobId, "_path-savepoint_"));
    response = progress.handleRequest(params, Collections.<String, String>emptyMap(), jobManager);
    assertEquals(HttpResponseStatus.CREATED, response.getStatus());
    assertEquals("application/json", response.headers().get(HttpHeaders.Names.CONTENT_TYPE));
    assertEquals(Integer.toString(response.content().readableBytes()), response.headers().get(HttpHeaders.Names.CONTENT_LENGTH));
    json = response.content().toString(Charset.forName("UTF-8"));
    root = new ObjectMapper().readTree(json);
    assertEquals("success", root.get("status").getValueAsText());
    assertEquals("1", root.get("request-id").getValueAsText());
    assertEquals("_path-savepoint_", root.get("savepoint-path").getValueAsText());
    // Query again, keep recent history
    response = progress.handleRequest(params, Collections.<String, String>emptyMap(), jobManager);
    assertEquals(HttpResponseStatus.CREATED, response.getStatus());
    assertEquals("application/json", response.headers().get(HttpHeaders.Names.CONTENT_TYPE));
    assertEquals(Integer.toString(response.content().readableBytes()), response.headers().get(HttpHeaders.Names.CONTENT_LENGTH));
    json = response.content().toString(Charset.forName("UTF-8"));
    root = new ObjectMapper().readTree(json);
    assertEquals("success", root.get("status").getValueAsText());
    assertEquals("1", root.get("request-id").getValueAsText());
    assertEquals("_path-savepoint_", root.get("savepoint-path").getValueAsText());
    // Query for unknown request
    params.put("requestId", "9929");
    response = progress.handleRequest(params, Collections.<String, String>emptyMap(), jobManager);
    assertEquals(HttpResponseStatus.BAD_REQUEST, response.getStatus());
    assertEquals("application/json", response.headers().get(HttpHeaders.Names.CONTENT_TYPE));
    assertEquals(Integer.toString(response.content().readableBytes()), response.headers().get(HttpHeaders.Names.CONTENT_LENGTH));
    json = response.content().toString(Charset.forName("UTF-8"));
    root = new ObjectMapper().readTree(json);
    assertEquals("failed", root.get("status").getValueAsText());
    assertEquals("9929", root.get("request-id").getValueAsText());
    assertEquals("Unknown job/request ID", root.get("cause").getValueAsText());
}
Also used : HashMap(java.util.HashMap) FiniteDuration(scala.concurrent.duration.FiniteDuration) CancelJobWithSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.CancelJobWithSavepoint) JsonNode(org.codehaus.jackson.JsonNode) ExecutionGraphHolder(org.apache.flink.runtime.webmonitor.ExecutionGraphHolder) CheckpointCoordinator(org.apache.flink.runtime.checkpoint.CheckpointCoordinator) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) FullHttpResponse(io.netty.handler.codec.http.FullHttpResponse) JobID(org.apache.flink.api.common.JobID) ObjectMapper(org.codehaus.jackson.map.ObjectMapper) Test(org.junit.Test)

Aggregations

JobID (org.apache.flink.api.common.JobID)4 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)4 CancelJobWithSavepoint (org.apache.flink.runtime.messages.JobManagerMessages.CancelJobWithSavepoint)4 HashMap (java.util.HashMap)3 CheckpointCoordinator (org.apache.flink.runtime.checkpoint.CheckpointCoordinator)3 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)3 CancellationSuccess (org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess)3 ExecutionGraphHolder (org.apache.flink.runtime.webmonitor.ExecutionGraphHolder)3 Test (org.junit.Test)3 FiniteDuration (scala.concurrent.duration.FiniteDuration)3 FullHttpResponse (io.netty.handler.codec.http.FullHttpResponse)2 JsonNode (org.codehaus.jackson.JsonNode)2 ObjectMapper (org.codehaus.jackson.map.ObjectMapper)2 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 InvocationTargetException (java.lang.reflect.InvocationTargetException)1 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)1 CancelOptions (org.apache.flink.client.cli.CancelOptions)1 CliArgsException (org.apache.flink.client.cli.CliArgsException)1 ProgramInvocationException (org.apache.flink.client.program.ProgramInvocationException)1