use of org.apache.flink.test.util.MiniClusterWithClientResource in project flink by apache.
the class ResumeCheckpointManuallyITCase method testExternalizedCheckpoints.
private void testExternalizedCheckpoints(File checkpointDir, String zooKeeperQuorum, StateBackend backend, boolean localRecovery) throws Exception {
final Configuration config = new Configuration();
final File savepointDir = temporaryFolder.newFolder();
config.setString(CheckpointingOptions.CHECKPOINTS_DIRECTORY, checkpointDir.toURI().toString());
config.setString(CheckpointingOptions.SAVEPOINT_DIRECTORY, savepointDir.toURI().toString());
config.setBoolean(CheckpointingOptions.LOCAL_RECOVERY, localRecovery);
// Configure DFS DSTL for this test as it might produce too much GC pressure if
// ChangelogStateBackend is used.
// Doing it on cluster level unconditionally as randomization currently happens on the job
// level (environment); while this factory can only be set on the cluster level.
FsStateChangelogStorageFactory.configure(config, temporaryFolder.newFolder());
// ZooKeeper recovery mode?
if (zooKeeperQuorum != null) {
final File haDir = temporaryFolder.newFolder();
config.setString(HighAvailabilityOptions.HA_MODE, "ZOOKEEPER");
config.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, zooKeeperQuorum);
config.setString(HighAvailabilityOptions.HA_STORAGE_PATH, haDir.toURI().toString());
}
MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setConfiguration(config).setNumberTaskManagers(NUM_TASK_MANAGERS).setNumberSlotsPerTaskManager(SLOTS_PER_TASK_MANAGER).build());
cluster.before();
ClusterClient<?> client = cluster.getClusterClient();
try {
// main test sequence: start job -> eCP -> restore job -> eCP -> restore job
String firstExternalCheckpoint = runJobAndGetExternalizedCheckpoint(backend, checkpointDir, null, client);
assertNotNull(firstExternalCheckpoint);
String secondExternalCheckpoint = runJobAndGetExternalizedCheckpoint(backend, checkpointDir, firstExternalCheckpoint, client);
assertNotNull(secondExternalCheckpoint);
String thirdExternalCheckpoint = runJobAndGetExternalizedCheckpoint(backend, checkpointDir, secondExternalCheckpoint, client);
assertNotNull(thirdExternalCheckpoint);
} finally {
cluster.after();
}
}
use of org.apache.flink.test.util.MiniClusterWithClientResource in project flink by apache.
the class SavepointITCase method testTriggerSavepointWithCheckpointingDisabled.
@Test
public void testTriggerSavepointWithCheckpointingDisabled() throws Exception {
// Config
final int numTaskManagers = 1;
final int numSlotsPerTaskManager = 1;
final Configuration config = new Configuration();
final MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setConfiguration(config).setNumberTaskManagers(numTaskManagers).setNumberSlotsPerTaskManager(numSlotsPerTaskManager).build());
cluster.before();
final ClusterClient<?> client = cluster.getClusterClient();
final JobVertex vertex = new JobVertex("Blocking vertex");
vertex.setInvokableClass(BlockingNoOpInvokable.class);
vertex.setParallelism(1);
final JobGraph graph = JobGraphTestUtils.streamingJobGraph(vertex);
try {
client.submitJob(graph).get();
// triggerSavepoint is only available after all tasks are running
waitForAllTaskRunning(cluster.getMiniCluster(), graph.getJobID(), false);
client.triggerSavepoint(graph.getJobID(), null, SavepointFormatType.CANONICAL).get();
fail();
} catch (ExecutionException e) {
assertThrowable(e, IllegalStateException.class);
assertThrowableWithMessage(e, graph.getJobID().toString());
assertThrowableWithMessage(e, "is not a streaming job");
} finally {
cluster.after();
}
}
use of org.apache.flink.test.util.MiniClusterWithClientResource in project flink by apache.
the class SavepointITCase method restoreJobAndVerifyState.
private void restoreJobAndVerifyState(MiniClusterResourceFactory clusterFactory, int parallelism, SavepointRestoreSettings savepointRestoreSettings, PostCancelChecker postCancelChecks) throws Exception {
final JobGraph jobGraph = createJobGraph(parallelism, 0, 1000);
jobGraph.setSavepointRestoreSettings(savepointRestoreSettings);
final JobID jobId = jobGraph.getJobID();
StatefulCounter.resetForTest(parallelism);
MiniClusterWithClientResource cluster = clusterFactory.get();
cluster.before();
ClusterClient<?> client = cluster.getClusterClient();
try {
client.submitJob(jobGraph).get();
// Await state is restored
StatefulCounter.getRestoreLatch().await();
// Await some progress after restore
StatefulCounter.getProgressLatch().await();
client.cancel(jobId).get();
FutureUtils.retrySuccessfulWithDelay(() -> client.getJobStatus(jobId), Time.milliseconds(50), Deadline.now().plus(Duration.ofSeconds(30)), status -> status == JobStatus.CANCELED, TestingUtils.defaultScheduledExecutor());
postCancelChecks.check(cluster);
} finally {
cluster.after();
StatefulCounter.resetForTest(parallelism);
}
}
use of org.apache.flink.test.util.MiniClusterWithClientResource in project flink by apache.
the class SavepointITCase method testTriggerSavepointWithoutCheckpointBaseLocations.
@Test
public void testTriggerSavepointWithoutCheckpointBaseLocations() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.getCheckpointConfig().disableCheckpointing();
env.setParallelism(1);
env.addSource(new IntegerStreamSource()).addSink(new DiscardingSink<>());
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
Configuration config = getFileBasedCheckpointsConfig();
config.addAll(jobGraph.getJobConfiguration());
MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setConfiguration(config).setNumberTaskManagers(1).setNumberSlotsPerTaskManager(1).build());
cluster.before();
ClusterClient<?> client = cluster.getClusterClient();
String savepointPath = null;
try {
client.submitJob(jobGraph).get();
waitForAllTaskRunning(cluster.getMiniCluster(), jobGraph.getJobID(), false);
savepointPath = client.triggerSavepoint(jobGraph.getJobID(), null, SavepointFormatType.CANONICAL).get();
assertNotNull(savepointPath);
client.cancel(jobGraph.getJobID()).get();
// checkpoint directory should not be initialized
assertEquals(0, Objects.requireNonNull(checkpointDir.listFiles()).length);
} finally {
if (null != savepointPath) {
client.disposeSavepoint(savepointPath);
}
cluster.after();
}
}
use of org.apache.flink.test.util.MiniClusterWithClientResource in project flink by apache.
the class SavepointITCase method testCanRestoreWithModifiedStatelessOperators.
/**
* FLINK-5985
*
* <p>This test ensures we can restore from a savepoint under modifications to the job graph
* that only concern stateless operators.
*/
@Test
public void testCanRestoreWithModifiedStatelessOperators() throws Exception {
// Config
int numTaskManagers = 2;
int numSlotsPerTaskManager = 2;
int parallelism = 2;
// Test deadline
final Deadline deadline = Deadline.now().plus(Duration.ofMinutes(5));
// Flink configuration
final Configuration config = new Configuration();
config.setString(CheckpointingOptions.SAVEPOINT_DIRECTORY, savepointDir.toURI().toString());
String savepointPath;
LOG.info("Flink configuration: " + config + ".");
// Start Flink
MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setConfiguration(config).setNumberTaskManagers(numTaskManagers).setNumberSlotsPerTaskManager(numSlotsPerTaskManager).build());
LOG.info("Shutting down Flink cluster.");
cluster.before();
ClusterClient<?> client = cluster.getClusterClient();
try {
final StatefulCounter statefulCounter = new StatefulCounter();
StatefulCounter.resetForTest(parallelism);
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
env.addSource(new InfiniteTestSource()).shuffle().map(value -> 4 * value).shuffle().map(statefulCounter).uid("statefulCounter").shuffle().map(value -> 2 * value).addSink(new DiscardingSink<>());
JobGraph originalJobGraph = env.getStreamGraph().getJobGraph();
JobID jobID = client.submitJob(originalJobGraph).get();
// wait for the Tasks to be ready
waitForAllTaskRunning(cluster.getMiniCluster(), jobID, false);
assertTrue(StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS));
savepointPath = client.triggerSavepoint(jobID, null, SavepointFormatType.CANONICAL).get();
LOG.info("Retrieved savepoint: " + savepointPath + ".");
} finally {
// Shut down the Flink cluster (thereby canceling the job)
LOG.info("Shutting down Flink cluster.");
cluster.after();
}
// create a new MiniCluster to make sure we start with completely
// new resources
cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setConfiguration(config).setNumberTaskManagers(numTaskManagers).setNumberSlotsPerTaskManager(numSlotsPerTaskManager).build());
LOG.info("Restarting Flink cluster.");
cluster.before();
client = cluster.getClusterClient();
try {
// Reset static test helpers
StatefulCounter.resetForTest(parallelism);
// Gather all task deployment descriptors
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
// generate a modified job graph that adds a stateless op
env.addSource(new InfiniteTestSource()).shuffle().map(new StatefulCounter()).uid("statefulCounter").shuffle().map(value -> value).addSink(new DiscardingSink<>());
JobGraph modifiedJobGraph = env.getStreamGraph().getJobGraph();
// Set the savepoint path
modifiedJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
LOG.info("Resubmitting job " + modifiedJobGraph.getJobID() + " with " + "savepoint path " + savepointPath + " in detached mode.");
// Submit the job
client.submitJob(modifiedJobGraph).get();
// Await state is restored
assertTrue(StatefulCounter.getRestoreLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS));
// Await some progress after restore
assertTrue(StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS));
} finally {
cluster.after();
}
}
Aggregations