Search in sources :

Example 6 with MiniClusterWithClientResource

use of org.apache.flink.test.util.MiniClusterWithClientResource in project flink by apache.

the class ResumeCheckpointManuallyITCase method testExternalizedCheckpoints.

private void testExternalizedCheckpoints(File checkpointDir, String zooKeeperQuorum, StateBackend backend, boolean localRecovery) throws Exception {
    final Configuration config = new Configuration();
    final File savepointDir = temporaryFolder.newFolder();
    config.setString(CheckpointingOptions.CHECKPOINTS_DIRECTORY, checkpointDir.toURI().toString());
    config.setString(CheckpointingOptions.SAVEPOINT_DIRECTORY, savepointDir.toURI().toString());
    config.setBoolean(CheckpointingOptions.LOCAL_RECOVERY, localRecovery);
    // Configure DFS DSTL for this test as it might produce too much GC pressure if
    // ChangelogStateBackend is used.
    // Doing it on cluster level unconditionally as randomization currently happens on the job
    // level (environment); while this factory can only be set on the cluster level.
    FsStateChangelogStorageFactory.configure(config, temporaryFolder.newFolder());
    // ZooKeeper recovery mode?
    if (zooKeeperQuorum != null) {
        final File haDir = temporaryFolder.newFolder();
        config.setString(HighAvailabilityOptions.HA_MODE, "ZOOKEEPER");
        config.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, zooKeeperQuorum);
        config.setString(HighAvailabilityOptions.HA_STORAGE_PATH, haDir.toURI().toString());
    }
    MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setConfiguration(config).setNumberTaskManagers(NUM_TASK_MANAGERS).setNumberSlotsPerTaskManager(SLOTS_PER_TASK_MANAGER).build());
    cluster.before();
    ClusterClient<?> client = cluster.getClusterClient();
    try {
        // main test sequence:  start job -> eCP -> restore job -> eCP -> restore job
        String firstExternalCheckpoint = runJobAndGetExternalizedCheckpoint(backend, checkpointDir, null, client);
        assertNotNull(firstExternalCheckpoint);
        String secondExternalCheckpoint = runJobAndGetExternalizedCheckpoint(backend, checkpointDir, firstExternalCheckpoint, client);
        assertNotNull(secondExternalCheckpoint);
        String thirdExternalCheckpoint = runJobAndGetExternalizedCheckpoint(backend, checkpointDir, secondExternalCheckpoint, client);
        assertNotNull(thirdExternalCheckpoint);
    } finally {
        cluster.after();
    }
}
Also used : MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) Configuration(org.apache.flink.configuration.Configuration) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) File(java.io.File)

Example 7 with MiniClusterWithClientResource

use of org.apache.flink.test.util.MiniClusterWithClientResource in project flink by apache.

the class SavepointITCase method testTriggerSavepointWithCheckpointingDisabled.

@Test
public void testTriggerSavepointWithCheckpointingDisabled() throws Exception {
    // Config
    final int numTaskManagers = 1;
    final int numSlotsPerTaskManager = 1;
    final Configuration config = new Configuration();
    final MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setConfiguration(config).setNumberTaskManagers(numTaskManagers).setNumberSlotsPerTaskManager(numSlotsPerTaskManager).build());
    cluster.before();
    final ClusterClient<?> client = cluster.getClusterClient();
    final JobVertex vertex = new JobVertex("Blocking vertex");
    vertex.setInvokableClass(BlockingNoOpInvokable.class);
    vertex.setParallelism(1);
    final JobGraph graph = JobGraphTestUtils.streamingJobGraph(vertex);
    try {
        client.submitJob(graph).get();
        // triggerSavepoint is only available after all tasks are running
        waitForAllTaskRunning(cluster.getMiniCluster(), graph.getJobID(), false);
        client.triggerSavepoint(graph.getJobID(), null, SavepointFormatType.CANONICAL).get();
        fail();
    } catch (ExecutionException e) {
        assertThrowable(e, IllegalStateException.class);
        assertThrowableWithMessage(e, graph.getJobID().toString());
        assertThrowableWithMessage(e, "is not a streaming job");
    } finally {
        cluster.after();
    }
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) Configuration(org.apache.flink.configuration.Configuration) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) ExecutionException(java.util.concurrent.ExecutionException) Test(org.junit.Test)

Example 8 with MiniClusterWithClientResource

use of org.apache.flink.test.util.MiniClusterWithClientResource in project flink by apache.

the class SavepointITCase method restoreJobAndVerifyState.

private void restoreJobAndVerifyState(MiniClusterResourceFactory clusterFactory, int parallelism, SavepointRestoreSettings savepointRestoreSettings, PostCancelChecker postCancelChecks) throws Exception {
    final JobGraph jobGraph = createJobGraph(parallelism, 0, 1000);
    jobGraph.setSavepointRestoreSettings(savepointRestoreSettings);
    final JobID jobId = jobGraph.getJobID();
    StatefulCounter.resetForTest(parallelism);
    MiniClusterWithClientResource cluster = clusterFactory.get();
    cluster.before();
    ClusterClient<?> client = cluster.getClusterClient();
    try {
        client.submitJob(jobGraph).get();
        // Await state is restored
        StatefulCounter.getRestoreLatch().await();
        // Await some progress after restore
        StatefulCounter.getProgressLatch().await();
        client.cancel(jobId).get();
        FutureUtils.retrySuccessfulWithDelay(() -> client.getJobStatus(jobId), Time.milliseconds(50), Deadline.now().plus(Duration.ofSeconds(30)), status -> status == JobStatus.CANCELED, TestingUtils.defaultScheduledExecutor());
        postCancelChecks.check(cluster);
    } finally {
        cluster.after();
        StatefulCounter.resetForTest(parallelism);
    }
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) JobID(org.apache.flink.api.common.JobID)

Example 9 with MiniClusterWithClientResource

use of org.apache.flink.test.util.MiniClusterWithClientResource in project flink by apache.

the class SavepointITCase method testTriggerSavepointWithoutCheckpointBaseLocations.

@Test
public void testTriggerSavepointWithoutCheckpointBaseLocations() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getCheckpointConfig().disableCheckpointing();
    env.setParallelism(1);
    env.addSource(new IntegerStreamSource()).addSink(new DiscardingSink<>());
    JobGraph jobGraph = env.getStreamGraph().getJobGraph();
    Configuration config = getFileBasedCheckpointsConfig();
    config.addAll(jobGraph.getJobConfiguration());
    MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setConfiguration(config).setNumberTaskManagers(1).setNumberSlotsPerTaskManager(1).build());
    cluster.before();
    ClusterClient<?> client = cluster.getClusterClient();
    String savepointPath = null;
    try {
        client.submitJob(jobGraph).get();
        waitForAllTaskRunning(cluster.getMiniCluster(), jobGraph.getJobID(), false);
        savepointPath = client.triggerSavepoint(jobGraph.getJobID(), null, SavepointFormatType.CANONICAL).get();
        assertNotNull(savepointPath);
        client.cancel(jobGraph.getJobID()).get();
        // checkpoint directory should not be initialized
        assertEquals(0, Objects.requireNonNull(checkpointDir.listFiles()).length);
    } finally {
        if (null != savepointPath) {
            client.disposeSavepoint(savepointPath);
        }
        cluster.after();
    }
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) Configuration(org.apache.flink.configuration.Configuration) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 10 with MiniClusterWithClientResource

use of org.apache.flink.test.util.MiniClusterWithClientResource in project flink by apache.

the class SavepointITCase method testCanRestoreWithModifiedStatelessOperators.

/**
 * FLINK-5985
 *
 * <p>This test ensures we can restore from a savepoint under modifications to the job graph
 * that only concern stateless operators.
 */
@Test
public void testCanRestoreWithModifiedStatelessOperators() throws Exception {
    // Config
    int numTaskManagers = 2;
    int numSlotsPerTaskManager = 2;
    int parallelism = 2;
    // Test deadline
    final Deadline deadline = Deadline.now().plus(Duration.ofMinutes(5));
    // Flink configuration
    final Configuration config = new Configuration();
    config.setString(CheckpointingOptions.SAVEPOINT_DIRECTORY, savepointDir.toURI().toString());
    String savepointPath;
    LOG.info("Flink configuration: " + config + ".");
    // Start Flink
    MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setConfiguration(config).setNumberTaskManagers(numTaskManagers).setNumberSlotsPerTaskManager(numSlotsPerTaskManager).build());
    LOG.info("Shutting down Flink cluster.");
    cluster.before();
    ClusterClient<?> client = cluster.getClusterClient();
    try {
        final StatefulCounter statefulCounter = new StatefulCounter();
        StatefulCounter.resetForTest(parallelism);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(parallelism);
        env.addSource(new InfiniteTestSource()).shuffle().map(value -> 4 * value).shuffle().map(statefulCounter).uid("statefulCounter").shuffle().map(value -> 2 * value).addSink(new DiscardingSink<>());
        JobGraph originalJobGraph = env.getStreamGraph().getJobGraph();
        JobID jobID = client.submitJob(originalJobGraph).get();
        // wait for the Tasks to be ready
        waitForAllTaskRunning(cluster.getMiniCluster(), jobID, false);
        assertTrue(StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS));
        savepointPath = client.triggerSavepoint(jobID, null, SavepointFormatType.CANONICAL).get();
        LOG.info("Retrieved savepoint: " + savepointPath + ".");
    } finally {
        // Shut down the Flink cluster (thereby canceling the job)
        LOG.info("Shutting down Flink cluster.");
        cluster.after();
    }
    // create a new MiniCluster to make sure we start with completely
    // new resources
    cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setConfiguration(config).setNumberTaskManagers(numTaskManagers).setNumberSlotsPerTaskManager(numSlotsPerTaskManager).build());
    LOG.info("Restarting Flink cluster.");
    cluster.before();
    client = cluster.getClusterClient();
    try {
        // Reset static test helpers
        StatefulCounter.resetForTest(parallelism);
        // Gather all task deployment descriptors
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(parallelism);
        // generate a modified job graph that adds a stateless op
        env.addSource(new InfiniteTestSource()).shuffle().map(new StatefulCounter()).uid("statefulCounter").shuffle().map(value -> value).addSink(new DiscardingSink<>());
        JobGraph modifiedJobGraph = env.getStreamGraph().getJobGraph();
        // Set the savepoint path
        modifiedJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
        LOG.info("Resubmitting job " + modifiedJobGraph.getJobID() + " with " + "savepoint path " + savepointPath + " in detached mode.");
        // Submit the job
        client.submitJob(modifiedJobGraph).get();
        // Await state is restored
        assertTrue(StatefulCounter.getRestoreLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS));
        // Await some progress after restore
        assertTrue(StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS));
    } finally {
        cluster.after();
    }
}
Also used : Arrays(java.util.Arrays) SharedObjects(org.apache.flink.testutils.junit.SharedObjects) MemorySize(org.apache.flink.configuration.MemorySize) EmptyRequestBody(org.apache.flink.runtime.rest.messages.EmptyRequestBody) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) ExceptionUtils.findThrowable(org.apache.flink.util.ExceptionUtils.findThrowable) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) TestUtils.submitJobAndWaitForResult(org.apache.flink.test.util.TestUtils.submitJobAndWaitForResult) FSDataOutputStream(org.apache.flink.core.fs.FSDataOutputStream) CheckpointListener(org.apache.flink.api.common.state.CheckpointListener) Duration(java.time.Duration) Map(java.util.Map) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) ExceptionUtils.assertThrowable(org.apache.flink.util.ExceptionUtils.assertThrowable) RichSourceFunction(org.apache.flink.streaming.api.functions.source.RichSourceFunction) Path(java.nio.file.Path) StateSnapshotContext(org.apache.flink.runtime.state.StateSnapshotContext) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) BoundedOneInput(org.apache.flink.streaming.api.operators.BoundedOneInput) FileSystemFactory(org.apache.flink.core.fs.FileSystemFactory) CountDownLatch(java.util.concurrent.CountDownLatch) JobMessageParameters(org.apache.flink.runtime.rest.messages.JobMessageParameters) Stream(java.util.stream.Stream) ValueState(org.apache.flink.api.common.state.ValueState) ClusterClient(org.apache.flink.client.program.ClusterClient) Assert.assertFalse(org.junit.Assert.assertFalse) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) Time(org.apache.flink.api.common.time.Time) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) FlinkException(org.apache.flink.util.FlinkException) LocalFileSystem(org.apache.flink.core.fs.local.LocalFileSystem) JobStatus(org.apache.flink.api.common.JobStatus) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) TypeSafeDiagnosingMatcher(org.hamcrest.TypeSafeDiagnosingMatcher) TaskManagerOptions(org.apache.flink.configuration.TaskManagerOptions) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Collector(org.apache.flink.util.Collector) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) Before(org.junit.Before) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) Files(java.nio.file.Files) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream) File(java.io.File) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) ExecutionException(java.util.concurrent.ExecutionException) JobID(org.apache.flink.api.common.JobID) Paths(java.nio.file.Paths) Matcher(org.hamcrest.Matcher) Assert(org.junit.Assert) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) Assert.assertEquals(org.junit.Assert.assertEquals) StateBackendOptions(org.apache.flink.configuration.StateBackendOptions) EntropyInjectingTestFileSystem(org.apache.flink.testutils.EntropyInjectingTestFileSystem) Deadline(org.apache.flink.api.common.time.Deadline) ExceptionUtils.findThrowableWithMessage(org.apache.flink.util.ExceptionUtils.findThrowableWithMessage) ClusterOptions(org.apache.flink.configuration.ClusterOptions) FileUtils(org.apache.flink.util.FileUtils) URISyntaxException(java.net.URISyntaxException) BiFunction(java.util.function.BiFunction) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) LoggerFactory(org.slf4j.LoggerFactory) BlockingNoOpInvokable(org.apache.flink.runtime.testtasks.BlockingNoOpInvokable) Random(java.util.Random) FunctionSnapshotContext(org.apache.flink.runtime.state.FunctionSnapshotContext) EmbeddedRocksDBStateBackend(org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend) MapFunction(org.apache.flink.api.common.functions.MapFunction) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) ListState(org.apache.flink.api.common.state.ListState) CommonTestUtils.waitForAllTaskRunning(org.apache.flink.runtime.testutils.CommonTestUtils.waitForAllTaskRunning) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) TestLogger(org.apache.flink.util.TestLogger) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) Assert.fail(org.junit.Assert.fail) URI(java.net.URI) KeySelector(org.apache.flink.api.java.functions.KeySelector) CheckpointedFunction(org.apache.flink.streaming.api.checkpoint.CheckpointedFunction) FunctionInitializationContext(org.apache.flink.runtime.state.FunctionInitializationContext) Collection(java.util.Collection) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) CheckpointingOptions(org.apache.flink.configuration.CheckpointingOptions) Objects(java.util.Objects) TestingUtils(org.apache.flink.testutils.TestingUtils) List(java.util.List) FileSystem(org.apache.flink.core.fs.FileSystem) FlinkJobNotFoundException(org.apache.flink.runtime.messages.FlinkJobNotFoundException) Optional(java.util.Optional) CheckpointConfig(org.apache.flink.streaming.api.environment.CheckpointConfig) ParallelSourceFunction(org.apache.flink.streaming.api.functions.source.ParallelSourceFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) IterativeStream(org.apache.flink.streaming.api.datastream.IterativeStream) CompletableFuture(java.util.concurrent.CompletableFuture) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) RestClusterClient(org.apache.flink.client.program.rest.RestClusterClient) RestoreMode(org.apache.flink.runtime.jobgraph.RestoreMode) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) CompletableFuture.allOf(java.util.concurrent.CompletableFuture.allOf) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) JobDetailsHeaders(org.apache.flink.runtime.rest.messages.job.JobDetailsHeaders) SharedReference(org.apache.flink.testutils.junit.SharedReference) Description(org.hamcrest.Description) Logger(org.slf4j.Logger) LocalRecoverableWriter(org.apache.flink.core.fs.local.LocalRecoverableWriter) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) Assert.assertNotNull(org.junit.Assert.assertNotNull) Configuration(org.apache.flink.configuration.Configuration) ExceptionUtils.assertThrowableWithMessage(org.apache.flink.util.ExceptionUtils.assertThrowableWithMessage) DataStream(org.apache.flink.streaming.api.datastream.DataStream) TimeUnit(java.util.concurrent.TimeUnit) Rule(org.junit.Rule) Ignore(org.junit.Ignore) ListCheckpointed(org.apache.flink.streaming.api.checkpoint.ListCheckpointed) FileVisitOption(java.nio.file.FileVisitOption) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) Configuration(org.apache.flink.configuration.Configuration) Deadline(org.apache.flink.api.common.time.Deadline) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

MiniClusterWithClientResource (org.apache.flink.test.util.MiniClusterWithClientResource)34 MiniClusterResourceConfiguration (org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration)26 Configuration (org.apache.flink.configuration.Configuration)24 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)14 Before (org.junit.Before)13 Test (org.junit.Test)13 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)12 JobID (org.apache.flink.api.common.JobID)9 File (java.io.File)8 ExecutionException (java.util.concurrent.ExecutionException)7 JobExecutionException (org.apache.flink.runtime.client.JobExecutionException)7 IOException (java.io.IOException)6 Optional (java.util.Optional)6 RichMapFunction (org.apache.flink.api.common.functions.RichMapFunction)6 List (java.util.List)5 CheckpointingOptions (org.apache.flink.configuration.CheckpointingOptions)5 MemorySize (org.apache.flink.configuration.MemorySize)5 StateBackendOptions (org.apache.flink.configuration.StateBackendOptions)5 FlinkJobNotFoundException (org.apache.flink.runtime.messages.FlinkJobNotFoundException)5 StreamGraph (org.apache.flink.streaming.api.graph.StreamGraph)5