use of org.apache.flink.connector.testframe.external.ExternalSystemSplitDataWriter in project flink by apache.
the class SourceTestSuiteBase method restartFromSavepoint.
private void restartFromSavepoint(TestEnvironment testEnv, DataStreamSourceExternalContext<T> externalContext, CheckpointingMode semantic, final int splitNumber, final int beforeParallelism, final int afterParallelism) throws Exception {
// Step 1: Preparation
TestingSourceSettings sourceSettings = TestingSourceSettings.builder().setBoundedness(Boundedness.CONTINUOUS_UNBOUNDED).setCheckpointingMode(semantic).build();
TestEnvironmentSettings envOptions = TestEnvironmentSettings.builder().setConnectorJarPaths(externalContext.getConnectorJarPaths()).build();
// Step 2: Generate test data
final List<ExternalSystemSplitDataWriter<T>> writers = new ArrayList<>();
final List<List<T>> testRecordCollections = new ArrayList<>();
for (int i = 0; i < splitNumber; i++) {
writers.add(externalContext.createSourceSplitDataWriter(sourceSettings));
testRecordCollections.add(generateTestDataForWriter(externalContext, sourceSettings, i, writers.get(i)));
}
// Step 3: Build and execute Flink job
final StreamExecutionEnvironment execEnv = testEnv.createExecutionEnvironment(envOptions);
execEnv.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
execEnv.enableCheckpointing(50);
execEnv.setRestartStrategy(RestartStrategies.noRestart());
DataStreamSource<T> source = execEnv.fromSource(tryCreateSource(externalContext, sourceSettings), WatermarkStrategy.noWatermarks(), "Tested Source").setParallelism(beforeParallelism);
CollectIteratorBuilder<T> iteratorBuilder = addCollectSink(source);
final JobClient jobClient = execEnv.executeAsync("Restart Test");
// Step 4: Check the result and stop Flink job with a savepoint
CollectResultIterator<T> iterator = null;
try {
iterator = iteratorBuilder.build(jobClient);
checkResultWithSemantic(iterator, testRecordCollections, semantic, getTestDataSize(testRecordCollections));
} catch (Exception e) {
killJob(jobClient);
throw e;
}
String savepointPath = jobClient.stopWithSavepoint(true, testEnv.getCheckpointUri(), SavepointFormatType.CANONICAL).get(30, TimeUnit.SECONDS);
waitForJobStatus(jobClient, Collections.singletonList(JobStatus.FINISHED), Deadline.fromNow(DEFAULT_JOB_STATUS_CHANGE_TIMEOUT));
// Step 5: Generate new test data
final List<List<T>> newTestRecordCollections = new ArrayList<>();
for (int i = 0; i < splitNumber; i++) {
newTestRecordCollections.add(generateTestDataForWriter(externalContext, sourceSettings, i, writers.get(i)));
}
// Step 6: restart the Flink job with the savepoint
TestEnvironmentSettings restartEnvOptions = TestEnvironmentSettings.builder().setConnectorJarPaths(externalContext.getConnectorJarPaths()).setSavepointRestorePath(savepointPath).build();
final StreamExecutionEnvironment restartEnv = testEnv.createExecutionEnvironment(restartEnvOptions);
restartEnv.enableCheckpointing(500);
restartEnv.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
DataStreamSource<T> restartSource = restartEnv.fromSource(tryCreateSource(externalContext, sourceSettings), WatermarkStrategy.noWatermarks(), "Tested Source").setParallelism(afterParallelism);
addCollectSink(restartSource);
final JobClient restartJobClient = restartEnv.executeAsync("Restart Test");
waitForJobStatus(restartJobClient, Collections.singletonList(JobStatus.RUNNING), Deadline.fromNow(DEFAULT_JOB_STATUS_CHANGE_TIMEOUT));
try {
iterator.setJobClient(restartJobClient);
/*
* Use the same iterator as the previous run, because the CollectStreamSink will snapshot
* its state and recover from it.
*
* The fetcher in CollectResultIterator is responsible for comminicating with
* the CollectSinkFunction, and deal the result with CheckpointedCollectResultBuffer
* in EXACTLY_ONCE semantic.
*/
checkResultWithSemantic(iterator, newTestRecordCollections, semantic, getTestDataSize(newTestRecordCollections));
} finally {
// Clean up
killJob(restartJobClient);
iterator.close();
}
}
Aggregations