Search in sources :

Example 1 with FS_SMALL_FILE_THRESHOLD

use of org.apache.flink.configuration.CheckpointingOptions.FS_SMALL_FILE_THRESHOLD in project flink by apache.

the class SavepointDeepCopyTest method testSavepointDeepCopy.

/**
 * Test savepoint deep copy. This method tests the savepoint deep copy by:
 *
 * <ul>
 *   <li>create {@code savepoint1} with operator {@code Operator1}, make sure it has more state
 *       files in addition to _metadata
 *   <li>create {@code savepoint2} from {@code savepoint1} by adding a new operator {@code
 *       Operator2}
 *   <li>check all state files in {@code savepoint1}'s directory are copied over to {@code
 *       savepoint2}'s directory
 *   <li>read the state of {@code Operator1} from {@code savepoint2} and make sure the number of
 *       the keys remain same
 * </ul>
 *
 * @throws Exception throw exceptions when anything goes wrong
 */
@Test
public void testSavepointDeepCopy() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<String> words = env.fromElements(TEXT.split(" "));
    StateBootstrapTransformation<String> transformation = OperatorTransformation.bootstrapWith(words).keyBy(e -> e).transform(new WordMapBootstrapper());
    File savepointUrl1 = createAndRegisterTempFile(new AbstractID().toHexString());
    String savepointPath1 = savepointUrl1.getPath();
    SavepointWriter.newSavepoint(backend, 128).withConfiguration(FS_SMALL_FILE_THRESHOLD, FILE_STATE_SIZE_THRESHOLD).withOperator("Operator1", transformation).write(savepointPath1);
    env.execute("bootstrap savepoint1");
    Assert.assertTrue("Failed to bootstrap savepoint1 with additional state files", Files.list(Paths.get(savepointPath1)).count() > 1);
    Set<String> stateFiles1 = Files.list(Paths.get(savepointPath1)).map(path -> path.getFileName().toString()).collect(Collectors.toSet());
    // create savepoint2 from savepoint1 created above
    File savepointUrl2 = createAndRegisterTempFile(new AbstractID().toHexString());
    String savepointPath2 = savepointUrl2.getPath();
    SavepointWriter savepoint2 = SavepointWriter.fromExistingSavepoint(savepointPath1, backend).withConfiguration(FS_SMALL_FILE_THRESHOLD, FILE_STATE_SIZE_THRESHOLD);
    savepoint2.withOperator("Operator2", transformation).write(savepointPath2);
    env.execute("create savepoint2");
    Assert.assertTrue("Failed to create savepoint2 from savepoint1 with additional state files", Files.list(Paths.get(savepointPath2)).count() > 1);
    Set<String> stateFiles2 = Files.list(Paths.get(savepointPath2)).map(path -> path.getFileName().toString()).collect(Collectors.toSet());
    assertThat("At least one state file in savepoint1 are not in savepoint2", stateFiles1, everyItem(isIn(stateFiles2)));
    // Try to fromExistingSavepoint savepoint2 and read the state of "Operator1" (which has not
    // been
    // touched/changed when savepoint2
    // was created) and make sure the number of keys remain same
    long actuallyKeyNum = JobResultRetriever.collect(SavepointReader.read(env, savepointPath2, backend).readKeyedState("Operator1", new ReadFunction())).size();
    long expectedKeyNum = Arrays.stream(TEXT.split(" ")).distinct().count();
    Assert.assertEquals("Unexpected number of keys in the state of Operator1", expectedKeyNum, actuallyKeyNum);
}
Also used : Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RunWith(org.junit.runner.RunWith) JobResultRetriever(org.apache.flink.state.api.utils.JobResultRetriever) EmbeddedRocksDBStateBackend(org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend) MemorySize(org.apache.flink.configuration.MemorySize) KeyedStateReaderFunction(org.apache.flink.state.api.functions.KeyedStateReaderFunction) Assert.assertThat(org.junit.Assert.assertThat) StateBackend(org.apache.flink.runtime.state.StateBackend) Matchers.everyItem(org.hamcrest.Matchers.everyItem) Collector(org.apache.flink.util.Collector) KeyedStateBootstrapFunction(org.apache.flink.state.api.functions.KeyedStateBootstrapFunction) Matchers.isIn(org.hamcrest.Matchers.isIn) Parameterized(org.junit.runners.Parameterized) AbstractTestBase(org.apache.flink.test.util.AbstractTestBase) Types(org.apache.flink.api.common.typeinfo.Types) Files(java.nio.file.Files) AbstractID(org.apache.flink.util.AbstractID) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) Collection(java.util.Collection) Configuration(org.apache.flink.configuration.Configuration) Set(java.util.Set) Test(org.junit.Test) FS_SMALL_FILE_THRESHOLD(org.apache.flink.configuration.CheckpointingOptions.FS_SMALL_FILE_THRESHOLD) Collectors(java.util.stream.Collectors) File(java.io.File) DataStream(org.apache.flink.streaming.api.datastream.DataStream) ValueState(org.apache.flink.api.common.state.ValueState) Paths(java.nio.file.Paths) HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend) RandomStringUtils(org.apache.commons.lang3.RandomStringUtils) Assert(org.junit.Assert) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) File(java.io.File) AbstractID(org.apache.flink.util.AbstractID) Test(org.junit.Test)

Aggregations

File (java.io.File)1 Files (java.nio.file.Files)1 Paths (java.nio.file.Paths)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 Set (java.util.Set)1 Collectors (java.util.stream.Collectors)1 RandomStringUtils (org.apache.commons.lang3.RandomStringUtils)1 ValueState (org.apache.flink.api.common.state.ValueState)1 ValueStateDescriptor (org.apache.flink.api.common.state.ValueStateDescriptor)1 Types (org.apache.flink.api.common.typeinfo.Types)1 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)1 FS_SMALL_FILE_THRESHOLD (org.apache.flink.configuration.CheckpointingOptions.FS_SMALL_FILE_THRESHOLD)1 Configuration (org.apache.flink.configuration.Configuration)1 MemorySize (org.apache.flink.configuration.MemorySize)1 EmbeddedRocksDBStateBackend (org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend)1 StateBackend (org.apache.flink.runtime.state.StateBackend)1 HashMapStateBackend (org.apache.flink.runtime.state.hashmap.HashMapStateBackend)1 KeyedStateBootstrapFunction (org.apache.flink.state.api.functions.KeyedStateBootstrapFunction)1 KeyedStateReaderFunction (org.apache.flink.state.api.functions.KeyedStateReaderFunction)1