use of org.apache.flink.configuration.CheckpointingOptions.FS_SMALL_FILE_THRESHOLD in project flink by apache.
the class SavepointDeepCopyTest method testSavepointDeepCopy.
/**
* Test savepoint deep copy. This method tests the savepoint deep copy by:
*
* <ul>
* <li>create {@code savepoint1} with operator {@code Operator1}, make sure it has more state
* files in addition to _metadata
* <li>create {@code savepoint2} from {@code savepoint1} by adding a new operator {@code
* Operator2}
* <li>check all state files in {@code savepoint1}'s directory are copied over to {@code
* savepoint2}'s directory
* <li>read the state of {@code Operator1} from {@code savepoint2} and make sure the number of
* the keys remain same
* </ul>
*
* @throws Exception throw exceptions when anything goes wrong
*/
@Test
public void testSavepointDeepCopy() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<String> words = env.fromElements(TEXT.split(" "));
StateBootstrapTransformation<String> transformation = OperatorTransformation.bootstrapWith(words).keyBy(e -> e).transform(new WordMapBootstrapper());
File savepointUrl1 = createAndRegisterTempFile(new AbstractID().toHexString());
String savepointPath1 = savepointUrl1.getPath();
SavepointWriter.newSavepoint(backend, 128).withConfiguration(FS_SMALL_FILE_THRESHOLD, FILE_STATE_SIZE_THRESHOLD).withOperator("Operator1", transformation).write(savepointPath1);
env.execute("bootstrap savepoint1");
Assert.assertTrue("Failed to bootstrap savepoint1 with additional state files", Files.list(Paths.get(savepointPath1)).count() > 1);
Set<String> stateFiles1 = Files.list(Paths.get(savepointPath1)).map(path -> path.getFileName().toString()).collect(Collectors.toSet());
// create savepoint2 from savepoint1 created above
File savepointUrl2 = createAndRegisterTempFile(new AbstractID().toHexString());
String savepointPath2 = savepointUrl2.getPath();
SavepointWriter savepoint2 = SavepointWriter.fromExistingSavepoint(savepointPath1, backend).withConfiguration(FS_SMALL_FILE_THRESHOLD, FILE_STATE_SIZE_THRESHOLD);
savepoint2.withOperator("Operator2", transformation).write(savepointPath2);
env.execute("create savepoint2");
Assert.assertTrue("Failed to create savepoint2 from savepoint1 with additional state files", Files.list(Paths.get(savepointPath2)).count() > 1);
Set<String> stateFiles2 = Files.list(Paths.get(savepointPath2)).map(path -> path.getFileName().toString()).collect(Collectors.toSet());
assertThat("At least one state file in savepoint1 are not in savepoint2", stateFiles1, everyItem(isIn(stateFiles2)));
// Try to fromExistingSavepoint savepoint2 and read the state of "Operator1" (which has not
// been
// touched/changed when savepoint2
// was created) and make sure the number of keys remain same
long actuallyKeyNum = JobResultRetriever.collect(SavepointReader.read(env, savepointPath2, backend).readKeyedState("Operator1", new ReadFunction())).size();
long expectedKeyNum = Arrays.stream(TEXT.split(" ")).distinct().count();
Assert.assertEquals("Unexpected number of keys in the state of Operator1", expectedKeyNum, actuallyKeyNum);
}
Aggregations