Search in sources :

Example 11 with JobStatusMessage

use of org.apache.flink.runtime.client.JobStatusMessage in project flink by apache.

the class ClassLoaderITCase method testDisposeSavepointWithCustomKvState.

/**
 * Tests disposal of a savepoint, which contains custom user code KvState.
 */
@Test
public void testDisposeSavepointWithCustomKvState() throws Exception {
    ClusterClient<?> clusterClient = new MiniClusterClient(new Configuration(), miniClusterResource.getMiniCluster());
    Deadline deadline = new FiniteDuration(100, TimeUnit.SECONDS).fromNow();
    File checkpointDir = FOLDER.newFolder();
    File outputDir = FOLDER.newFolder();
    final PackagedProgram program = PackagedProgram.newBuilder().setJarFile(new File(CUSTOM_KV_STATE_JAR_PATH)).setArguments(new String[] { String.valueOf(parallelism), checkpointDir.toURI().toString(), "5000", outputDir.toURI().toString(), // Disable unaligned checkpoints as this test is
    "false" // triggering concurrent savepoints/checkpoints
    }).build();
    TestStreamEnvironment.setAsContext(miniClusterResource.getMiniCluster(), parallelism, Collections.singleton(new Path(CUSTOM_KV_STATE_JAR_PATH)), Collections.emptyList());
    // Execute detached
    Thread invokeThread = new Thread(() -> {
        try {
            program.invokeInteractiveModeForExecution();
        } catch (ProgramInvocationException ex) {
            if (ex.getCause() == null || !(ex.getCause() instanceof JobCancellationException)) {
                ex.printStackTrace();
            }
        }
    });
    LOG.info("Starting program invoke thread");
    invokeThread.start();
    // The job ID
    JobID jobId = null;
    LOG.info("Waiting for job status running.");
    // Wait for running job
    while (jobId == null && deadline.hasTimeLeft()) {
        Collection<JobStatusMessage> jobs = clusterClient.listJobs().get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
        for (JobStatusMessage job : jobs) {
            if (job.getJobState() == JobStatus.RUNNING) {
                jobId = job.getJobId();
                LOG.info("Job running. ID: " + jobId);
                break;
            }
        }
        // Retry if job is not available yet
        if (jobId == null) {
            Thread.sleep(100L);
        }
    }
    // Trigger savepoint
    String savepointPath = null;
    for (int i = 0; i < 20; i++) {
        LOG.info("Triggering savepoint (" + (i + 1) + "/20).");
        try {
            savepointPath = clusterClient.triggerSavepoint(jobId, null, SavepointFormatType.CANONICAL).get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
        } catch (Exception cause) {
            LOG.info("Failed to trigger savepoint. Retrying...", cause);
            // This can fail if the operators are not opened yet
            Thread.sleep(500);
        }
    }
    assertNotNull("Failed to trigger savepoint", savepointPath);
    clusterClient.disposeSavepoint(savepointPath).get();
    clusterClient.cancel(jobId).get();
    // make sure, the execution is finished to not influence other test methods
    invokeThread.join(deadline.timeLeft().toMillis());
    assertFalse("Program invoke thread still running", invokeThread.isAlive());
}
Also used : Path(org.apache.flink.core.fs.Path) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) Configuration(org.apache.flink.configuration.Configuration) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) MiniClusterClient(org.apache.flink.client.program.MiniClusterClient) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) SuccessException(org.apache.flink.test.util.SuccessException) JobCancellationException(org.apache.flink.runtime.client.JobCancellationException) IOException(java.io.IOException) JobCancellationException(org.apache.flink.runtime.client.JobCancellationException) PackagedProgram(org.apache.flink.client.program.PackagedProgram) JobStatusMessage(org.apache.flink.runtime.client.JobStatusMessage) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) File(java.io.File) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

JobStatusMessage (org.apache.flink.runtime.client.JobStatusMessage)11 JobID (org.apache.flink.api.common.JobID)6 ProgramInvocationException (org.apache.flink.client.program.ProgramInvocationException)6 Configuration (org.apache.flink.configuration.Configuration)4 FileNotFoundException (java.io.FileNotFoundException)3 IOException (java.io.IOException)3 Collection (java.util.Collection)3 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)3 ProgramMissingJobException (org.apache.flink.client.program.ProgramMissingJobException)3 ProgramParametrizationException (org.apache.flink.client.program.ProgramParametrizationException)3 Test (org.junit.Test)3 FiniteDuration (scala.concurrent.duration.FiniteDuration)3 Timeout (akka.util.Timeout)2 File (java.io.File)2 UndeclaredThrowableException (java.lang.reflect.UndeclaredThrowableException)2 URI (java.net.URI)2 SimpleDateFormat (java.text.SimpleDateFormat)2 ArrayList (java.util.ArrayList)2 Collections (java.util.Collections)2 Comparator (java.util.Comparator)2