Search in sources :

Example 1 with CommonTestUtils.waitForAllTaskRunning

use of org.apache.flink.runtime.testutils.CommonTestUtils.waitForAllTaskRunning in project flink by apache.

the class SavepointITCase method testTriggerSavepointAndResumeWithNoClaim.

@Test
@Ignore("Disabling this test because it regularly fails on AZP. See FLINK-25427.")
public void testTriggerSavepointAndResumeWithNoClaim() throws Exception {
    final int numTaskManagers = 2;
    final int numSlotsPerTaskManager = 2;
    final int parallelism = numTaskManagers * numSlotsPerTaskManager;
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStateBackend(new EmbeddedRocksDBStateBackend(true));
    env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
    env.getCheckpointConfig().setCheckpointStorage(folder.newFolder().toURI());
    env.setParallelism(parallelism);
    final SharedReference<CountDownLatch> counter = sharedObjects.add(new CountDownLatch(10_000));
    env.fromSequence(1, Long.MAX_VALUE).keyBy(i -> i % parallelism).process(new KeyedProcessFunction<Long, Long, Long>() {

        private ListState<Long> last;

        @Override
        public void open(Configuration parameters) {
            // we use list state here to create sst files of a significant size
            // if sst files do not reach certain thresholds they are not stored
            // in files, but as a byte stream in checkpoints metadata
            last = getRuntimeContext().getListState(new ListStateDescriptor<>("last", BasicTypeInfo.LONG_TYPE_INFO));
        }

        @Override
        public void processElement(Long value, KeyedProcessFunction<Long, Long, Long>.Context ctx, Collector<Long> out) throws Exception {
            last.add(value);
            out.collect(value);
        }
    }).addSink(new SinkFunction<Long>() {

        @Override
        public void invoke(Long value) {
            counter.consumeSync(CountDownLatch::countDown);
        }
    }).setParallelism(1);
    final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
    MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setNumberTaskManagers(numTaskManagers).setNumberSlotsPerTaskManager(numSlotsPerTaskManager).build());
    cluster.before();
    try {
        final JobID jobID1 = new JobID();
        jobGraph.setJobID(jobID1);
        cluster.getClusterClient().submitJob(jobGraph).get();
        CommonTestUtils.waitForAllTaskRunning(cluster.getMiniCluster(), jobID1, false);
        // wait for some records to be processed before taking the checkpoint
        counter.get().await();
        final String firstCheckpoint = cluster.getMiniCluster().triggerCheckpoint(jobID1).get();
        cluster.getClusterClient().cancel(jobID1).get();
        jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(firstCheckpoint, false, RestoreMode.NO_CLAIM));
        final JobID jobID2 = new JobID();
        jobGraph.setJobID(jobID2);
        cluster.getClusterClient().submitJob(jobGraph).get();
        CommonTestUtils.waitForAllTaskRunning(cluster.getMiniCluster(), jobID2, false);
        String secondCheckpoint = cluster.getMiniCluster().triggerCheckpoint(jobID2).get();
        cluster.getClusterClient().cancel(jobID2).get();
        // delete the checkpoint we restored from
        FileUtils.deleteDirectory(Paths.get(new URI(firstCheckpoint)).getParent().toFile());
        // we should be able to restore from the second checkpoint even though it has been built
        // on top of the first checkpoint
        jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(secondCheckpoint, false, RestoreMode.NO_CLAIM));
        final JobID jobID3 = new JobID();
        jobGraph.setJobID(jobID3);
        cluster.getClusterClient().submitJob(jobGraph).get();
        CommonTestUtils.waitForAllTaskRunning(cluster.getMiniCluster(), jobID3, false);
    } finally {
        cluster.after();
    }
}
Also used : Arrays(java.util.Arrays) SharedObjects(org.apache.flink.testutils.junit.SharedObjects) MemorySize(org.apache.flink.configuration.MemorySize) EmptyRequestBody(org.apache.flink.runtime.rest.messages.EmptyRequestBody) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) ExceptionUtils.findThrowable(org.apache.flink.util.ExceptionUtils.findThrowable) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) TestUtils.submitJobAndWaitForResult(org.apache.flink.test.util.TestUtils.submitJobAndWaitForResult) FSDataOutputStream(org.apache.flink.core.fs.FSDataOutputStream) CheckpointListener(org.apache.flink.api.common.state.CheckpointListener) Duration(java.time.Duration) Map(java.util.Map) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) ExceptionUtils.assertThrowable(org.apache.flink.util.ExceptionUtils.assertThrowable) RichSourceFunction(org.apache.flink.streaming.api.functions.source.RichSourceFunction) Path(java.nio.file.Path) StateSnapshotContext(org.apache.flink.runtime.state.StateSnapshotContext) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) BoundedOneInput(org.apache.flink.streaming.api.operators.BoundedOneInput) FileSystemFactory(org.apache.flink.core.fs.FileSystemFactory) CountDownLatch(java.util.concurrent.CountDownLatch) JobMessageParameters(org.apache.flink.runtime.rest.messages.JobMessageParameters) Stream(java.util.stream.Stream) ValueState(org.apache.flink.api.common.state.ValueState) ClusterClient(org.apache.flink.client.program.ClusterClient) Assert.assertFalse(org.junit.Assert.assertFalse) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) Time(org.apache.flink.api.common.time.Time) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) FlinkException(org.apache.flink.util.FlinkException) LocalFileSystem(org.apache.flink.core.fs.local.LocalFileSystem) JobStatus(org.apache.flink.api.common.JobStatus) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) TypeSafeDiagnosingMatcher(org.hamcrest.TypeSafeDiagnosingMatcher) TaskManagerOptions(org.apache.flink.configuration.TaskManagerOptions) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Collector(org.apache.flink.util.Collector) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) Before(org.junit.Before) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) Files(java.nio.file.Files) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream) File(java.io.File) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) ExecutionException(java.util.concurrent.ExecutionException) JobID(org.apache.flink.api.common.JobID) Paths(java.nio.file.Paths) Matcher(org.hamcrest.Matcher) Assert(org.junit.Assert) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) Assert.assertEquals(org.junit.Assert.assertEquals) StateBackendOptions(org.apache.flink.configuration.StateBackendOptions) EntropyInjectingTestFileSystem(org.apache.flink.testutils.EntropyInjectingTestFileSystem) Deadline(org.apache.flink.api.common.time.Deadline) ExceptionUtils.findThrowableWithMessage(org.apache.flink.util.ExceptionUtils.findThrowableWithMessage) ClusterOptions(org.apache.flink.configuration.ClusterOptions) FileUtils(org.apache.flink.util.FileUtils) URISyntaxException(java.net.URISyntaxException) BiFunction(java.util.function.BiFunction) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) LoggerFactory(org.slf4j.LoggerFactory) BlockingNoOpInvokable(org.apache.flink.runtime.testtasks.BlockingNoOpInvokable) Random(java.util.Random) FunctionSnapshotContext(org.apache.flink.runtime.state.FunctionSnapshotContext) EmbeddedRocksDBStateBackend(org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend) MapFunction(org.apache.flink.api.common.functions.MapFunction) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) ListState(org.apache.flink.api.common.state.ListState) CommonTestUtils.waitForAllTaskRunning(org.apache.flink.runtime.testutils.CommonTestUtils.waitForAllTaskRunning) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) TestLogger(org.apache.flink.util.TestLogger) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) Assert.fail(org.junit.Assert.fail) URI(java.net.URI) KeySelector(org.apache.flink.api.java.functions.KeySelector) CheckpointedFunction(org.apache.flink.streaming.api.checkpoint.CheckpointedFunction) FunctionInitializationContext(org.apache.flink.runtime.state.FunctionInitializationContext) Collection(java.util.Collection) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) CheckpointingOptions(org.apache.flink.configuration.CheckpointingOptions) Objects(java.util.Objects) TestingUtils(org.apache.flink.testutils.TestingUtils) List(java.util.List) FileSystem(org.apache.flink.core.fs.FileSystem) FlinkJobNotFoundException(org.apache.flink.runtime.messages.FlinkJobNotFoundException) Optional(java.util.Optional) CheckpointConfig(org.apache.flink.streaming.api.environment.CheckpointConfig) ParallelSourceFunction(org.apache.flink.streaming.api.functions.source.ParallelSourceFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) IterativeStream(org.apache.flink.streaming.api.datastream.IterativeStream) CompletableFuture(java.util.concurrent.CompletableFuture) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) RestClusterClient(org.apache.flink.client.program.rest.RestClusterClient) RestoreMode(org.apache.flink.runtime.jobgraph.RestoreMode) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) CompletableFuture.allOf(java.util.concurrent.CompletableFuture.allOf) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) JobDetailsHeaders(org.apache.flink.runtime.rest.messages.job.JobDetailsHeaders) SharedReference(org.apache.flink.testutils.junit.SharedReference) Description(org.hamcrest.Description) Logger(org.slf4j.Logger) LocalRecoverableWriter(org.apache.flink.core.fs.local.LocalRecoverableWriter) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) Assert.assertNotNull(org.junit.Assert.assertNotNull) Configuration(org.apache.flink.configuration.Configuration) ExceptionUtils.assertThrowableWithMessage(org.apache.flink.util.ExceptionUtils.assertThrowableWithMessage) DataStream(org.apache.flink.streaming.api.datastream.DataStream) TimeUnit(java.util.concurrent.TimeUnit) Rule(org.junit.Rule) Ignore(org.junit.Ignore) ListCheckpointed(org.apache.flink.streaming.api.checkpoint.ListCheckpointed) FileVisitOption(java.nio.file.FileVisitOption) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) Configuration(org.apache.flink.configuration.Configuration) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) CountDownLatch(java.util.concurrent.CountDownLatch) URI(java.net.URI) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) FlinkException(org.apache.flink.util.FlinkException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) URISyntaxException(java.net.URISyntaxException) FileNotFoundException(java.io.FileNotFoundException) FlinkJobNotFoundException(org.apache.flink.runtime.messages.FlinkJobNotFoundException) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) EmbeddedRocksDBStateBackend(org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

File (java.io.File)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 URI (java.net.URI)1 URISyntaxException (java.net.URISyntaxException)1 FileVisitOption (java.nio.file.FileVisitOption)1 Files (java.nio.file.Files)1 Path (java.nio.file.Path)1 Paths (java.nio.file.Paths)1 Duration (java.time.Duration)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 List (java.util.List)1 Map (java.util.Map)1 Objects (java.util.Objects)1 Optional (java.util.Optional)1 Random (java.util.Random)1 CompletableFuture (java.util.concurrent.CompletableFuture)1 CompletableFuture.allOf (java.util.concurrent.CompletableFuture.allOf)1