use of org.apache.flink.test.util.MiniClusterWithClientResource in project flink by apache.
the class SavepointITCase method testTriggerSavepointForNonExistingJob.
@Test
public void testTriggerSavepointForNonExistingJob() throws Exception {
// Config
final int numTaskManagers = 1;
final int numSlotsPerTaskManager = 1;
final Configuration config = new Configuration();
config.setString(CheckpointingOptions.SAVEPOINT_DIRECTORY, savepointDir.toURI().toString());
final MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setConfiguration(config).setNumberTaskManagers(numTaskManagers).setNumberSlotsPerTaskManager(numSlotsPerTaskManager).build());
cluster.before();
final ClusterClient<?> client = cluster.getClusterClient();
final JobID jobID = new JobID();
try {
client.triggerSavepoint(jobID, null, SavepointFormatType.CANONICAL).get();
fail();
} catch (ExecutionException e) {
assertThrowable(e, FlinkJobNotFoundException.class);
assertThrowableWithMessage(e, jobID.toString());
} finally {
cluster.after();
}
}
use of org.apache.flink.test.util.MiniClusterWithClientResource in project flink by apache.
the class SavepointITCase method testStopWithSavepointWithDrainGlobalFailoverIfSavepointAborted.
@Test
public void testStopWithSavepointWithDrainGlobalFailoverIfSavepointAborted() throws Exception {
final int parallelism = 2;
PathFailingFileSystem.resetFailingPath(savepointDir.getAbsolutePath() + ".*/_metadata");
MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setNumberSlotsPerTaskManager(parallelism).build());
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 0L));
env.addSource(new InfiniteTestSource()).name("Infinite test source").addSink(new DiscardingSink<>());
final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
cluster.before();
try {
ClusterClient<?> client = cluster.getClusterClient();
client.submitJob(jobGraph).get();
waitUntilAllTasksAreRunning(cluster.getRestClusterClient(), jobGraph.getJobID());
try {
client.stopWithSavepoint(jobGraph.getJobID(), true, PathFailingFileSystem.SCHEME + "://" + savepointDir.getAbsolutePath(), SavepointFormatType.CANONICAL).get();
fail("The future should fail exceptionally.");
} catch (ExecutionException ex) {
// expected
if (!findThrowableWithMessage(ex, "Expected IO exception").isPresent()) {
throw ex;
}
}
// make sure that we restart all tasks after the savepoint failure
waitUntilAllTasksAreRunning(cluster.getRestClusterClient(), jobGraph.getJobID());
} finally {
cluster.after();
}
}
use of org.apache.flink.test.util.MiniClusterWithClientResource in project flink by apache.
the class SavepointITCase method testStopSavepointWithBoundedInput.
@Test
public void testStopSavepointWithBoundedInput() throws Exception {
final int numTaskManagers = 2;
final int numSlotsPerTaskManager = 2;
for (ChainingStrategy chainingStrategy : ChainingStrategy.values()) {
final MiniClusterResourceFactory clusterFactory = new MiniClusterResourceFactory(numTaskManagers, numSlotsPerTaskManager, getFileBasedCheckpointsConfig());
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
BoundedPassThroughOperator<Integer> operator = new BoundedPassThroughOperator<>(chainingStrategy);
DataStream<Integer> stream = env.addSource(new InfiniteTestSource()).transform("pass-through", BasicTypeInfo.INT_TYPE_INFO, operator);
stream.addSink(new DiscardingSink<>());
final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
final JobID jobId = jobGraph.getJobID();
MiniClusterWithClientResource cluster = clusterFactory.get();
cluster.before();
ClusterClient<?> client = cluster.getClusterClient();
try {
BoundedPassThroughOperator.resetForTest(1, true);
client.submitJob(jobGraph).get();
BoundedPassThroughOperator.getProgressLatch().await();
waitForAllTaskRunning(cluster.getMiniCluster(), jobId, false);
client.stopWithSavepoint(jobId, false, null, SavepointFormatType.CANONICAL).get();
Assert.assertFalse("input ended with chainingStrategy " + chainingStrategy, BoundedPassThroughOperator.inputEnded);
} finally {
cluster.after();
}
}
}
use of org.apache.flink.test.util.MiniClusterWithClientResource in project flink by apache.
the class SavepointITCase method testTriggerSavepointAndResumeWithNoClaim.
@Test
@Ignore("Disabling this test because it regularly fails on AZP. See FLINK-25427.")
public void testTriggerSavepointAndResumeWithNoClaim() throws Exception {
final int numTaskManagers = 2;
final int numSlotsPerTaskManager = 2;
final int parallelism = numTaskManagers * numSlotsPerTaskManager;
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(new EmbeddedRocksDBStateBackend(true));
env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
env.getCheckpointConfig().setCheckpointStorage(folder.newFolder().toURI());
env.setParallelism(parallelism);
final SharedReference<CountDownLatch> counter = sharedObjects.add(new CountDownLatch(10_000));
env.fromSequence(1, Long.MAX_VALUE).keyBy(i -> i % parallelism).process(new KeyedProcessFunction<Long, Long, Long>() {
private ListState<Long> last;
@Override
public void open(Configuration parameters) {
// we use list state here to create sst files of a significant size
// if sst files do not reach certain thresholds they are not stored
// in files, but as a byte stream in checkpoints metadata
last = getRuntimeContext().getListState(new ListStateDescriptor<>("last", BasicTypeInfo.LONG_TYPE_INFO));
}
@Override
public void processElement(Long value, KeyedProcessFunction<Long, Long, Long>.Context ctx, Collector<Long> out) throws Exception {
last.add(value);
out.collect(value);
}
}).addSink(new SinkFunction<Long>() {
@Override
public void invoke(Long value) {
counter.consumeSync(CountDownLatch::countDown);
}
}).setParallelism(1);
final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setNumberTaskManagers(numTaskManagers).setNumberSlotsPerTaskManager(numSlotsPerTaskManager).build());
cluster.before();
try {
final JobID jobID1 = new JobID();
jobGraph.setJobID(jobID1);
cluster.getClusterClient().submitJob(jobGraph).get();
CommonTestUtils.waitForAllTaskRunning(cluster.getMiniCluster(), jobID1, false);
// wait for some records to be processed before taking the checkpoint
counter.get().await();
final String firstCheckpoint = cluster.getMiniCluster().triggerCheckpoint(jobID1).get();
cluster.getClusterClient().cancel(jobID1).get();
jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(firstCheckpoint, false, RestoreMode.NO_CLAIM));
final JobID jobID2 = new JobID();
jobGraph.setJobID(jobID2);
cluster.getClusterClient().submitJob(jobGraph).get();
CommonTestUtils.waitForAllTaskRunning(cluster.getMiniCluster(), jobID2, false);
String secondCheckpoint = cluster.getMiniCluster().triggerCheckpoint(jobID2).get();
cluster.getClusterClient().cancel(jobID2).get();
// delete the checkpoint we restored from
FileUtils.deleteDirectory(Paths.get(new URI(firstCheckpoint)).getParent().toFile());
// we should be able to restore from the second checkpoint even though it has been built
// on top of the first checkpoint
jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(secondCheckpoint, false, RestoreMode.NO_CLAIM));
final JobID jobID3 = new JobID();
jobGraph.setJobID(jobID3);
cluster.getClusterClient().submitJob(jobGraph).get();
CommonTestUtils.waitForAllTaskRunning(cluster.getMiniCluster(), jobID3, false);
} finally {
cluster.after();
}
}
use of org.apache.flink.test.util.MiniClusterWithClientResource in project flink by apache.
the class NotifyCheckpointAbortedITCase method setup.
@Before
public void setup() throws Exception {
Configuration configuration = new Configuration();
configuration.setBoolean(CheckpointingOptions.LOCAL_RECOVERY, true);
configuration.setString(HighAvailabilityOptions.HA_MODE, TestingHAFactory.class.getName());
checkpointPath = new Path(TEMPORARY_FOLDER.newFolder().toURI());
cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setConfiguration(configuration).setNumberTaskManagers(1).setNumberSlotsPerTaskManager(1).build());
cluster.before();
NormalSource.reset();
NormalMap.reset();
DeclineSink.reset();
TestingCompletedCheckpointStore.reset();
}
Aggregations