use of org.apache.flink.test.util.MiniClusterWithClientResource in project flink by apache.
the class SavepointITCase method testSubmitWithUnknownSavepointPath.
@Test
public void testSubmitWithUnknownSavepointPath() throws Exception {
// Config
int numTaskManagers = 1;
int numSlotsPerTaskManager = 1;
int parallelism = numTaskManagers * numSlotsPerTaskManager;
final Configuration config = new Configuration();
config.setString(CheckpointingOptions.SAVEPOINT_DIRECTORY, savepointDir.toURI().toString());
MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setConfiguration(config).setNumberTaskManagers(numTaskManagers).setNumberSlotsPerTaskManager(numSlotsPerTaskManager).build());
cluster.before();
ClusterClient<?> client = cluster.getClusterClient();
try {
// High value to ensure timeouts if restarted.
int numberOfRetries = 1000;
// Submit the job
// Long delay to ensure that the test times out if the job
// manager tries to restart the job.
final JobGraph jobGraph = createJobGraph(parallelism, numberOfRetries, 3600000);
// Set non-existing savepoint path
jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath("unknown path"));
assertEquals("unknown path", jobGraph.getSavepointRestoreSettings().getRestorePath());
LOG.info("Submitting job " + jobGraph.getJobID() + " in detached mode.");
try {
submitJobAndWaitForResult(client, jobGraph, getClass().getClassLoader());
} catch (Exception e) {
Optional<JobExecutionException> expectedJobExecutionException = findThrowable(e, JobExecutionException.class);
Optional<FileNotFoundException> expectedFileNotFoundException = findThrowable(e, FileNotFoundException.class);
if (!(expectedJobExecutionException.isPresent() && expectedFileNotFoundException.isPresent())) {
throw e;
}
}
} finally {
cluster.after();
}
}
use of org.apache.flink.test.util.MiniClusterWithClientResource in project flink by apache.
the class SavepointITCase method testStopWithFailingSourceInOnePipeline.
/**
* FLINK-21030
*
* <p>Tests the handling of a failure that happened while stopping an embarrassingly parallel
* job with a Savepoint. The test expects that the stopping action fails and all executions are
* in state {@code RUNNING} afterwards.
*
* @param failingSource the failing {@link SourceFunction} used in one of the two pipelines.
* @param expectedMaximumNumberOfRestarts the maximum number of restarts allowed by the restart
* strategy.
* @param exceptionAssertion asserts the client-call exception to verify that the right error
* was handled.
* @see SavepointITCase#failingPipelineLatch The latch used to trigger the successful start of
* the later on failing pipeline.
* @see SavepointITCase#succeedingPipelineLatch The latch that triggers the successful start of
* the succeeding pipeline.
* @throws Exception if an error occurred while running the test.
*/
private static void testStopWithFailingSourceInOnePipeline(InfiniteTestSource failingSource, File savepointDir, int expectedMaximumNumberOfRestarts, BiFunction<JobID, ExecutionException, Boolean> exceptionAssertion) throws Exception {
MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().build());
failingPipelineLatch = new OneShotLatch();
succeedingPipelineLatch = new OneShotLatch();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(expectedMaximumNumberOfRestarts, 0));
env.addSource(failingSource).name("Failing Source").map(value -> {
failingPipelineLatch.trigger();
return value;
}).addSink(new DiscardingSink<>());
env.addSource(new InfiniteTestSource()).name("Succeeding Source").map(value -> {
succeedingPipelineLatch.trigger();
return value;
}).addSink(new DiscardingSink<>());
final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
cluster.before();
try {
ClusterClient<?> client = cluster.getClusterClient();
JobID jobID = client.submitJob(jobGraph).get();
// we need to wait for both pipelines to be in state RUNNING because that's the only
// state which allows creating a savepoint
failingPipelineLatch.await();
succeedingPipelineLatch.await();
waitForAllTaskRunning(cluster.getMiniCluster(), jobID, false);
try {
client.stopWithSavepoint(jobGraph.getJobID(), false, savepointDir.getAbsolutePath(), SavepointFormatType.CANONICAL).get();
fail("The future should fail exceptionally.");
} catch (ExecutionException e) {
assertThrowable(e, ex -> exceptionAssertion.apply(jobGraph.getJobID(), e));
}
waitUntilAllTasksAreRunning(cluster.getRestClusterClient(), jobGraph.getJobID());
} finally {
cluster.after();
}
}
use of org.apache.flink.test.util.MiniClusterWithClientResource in project flink by apache.
the class SavepointITCase method submitJobAndTakeSavepoint.
private String submitJobAndTakeSavepoint(MiniClusterResourceFactory clusterFactory, int parallelism) throws Exception {
final JobGraph jobGraph = createJobGraph(parallelism, 0, 1000);
final JobID jobId = jobGraph.getJobID();
StatefulCounter.resetForTest(parallelism);
MiniClusterWithClientResource cluster = clusterFactory.get();
cluster.before();
ClusterClient<?> client = cluster.getClusterClient();
try {
client.submitJob(jobGraph).get();
waitForAllTaskRunning(cluster.getMiniCluster(), jobId, false);
StatefulCounter.getProgressLatch().await();
return client.cancelWithSavepoint(jobId, null, SavepointFormatType.CANONICAL).get();
} finally {
cluster.after();
StatefulCounter.resetForTest(parallelism);
}
}
use of org.apache.flink.test.util.MiniClusterWithClientResource in project flink by apache.
the class SavepointITCase method testStopWithSavepointForFlip27Source.
private void testStopWithSavepointForFlip27Source(boolean drain) throws Exception {
final int numTaskManagers = 2;
final int numSlotsPerTaskManager = 2;
final MiniClusterResourceFactory clusterFactory = new MiniClusterResourceFactory(numTaskManagers, numSlotsPerTaskManager, getFileBasedCheckpointsConfig());
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
BoundedPassThroughOperator<Long> operator = new BoundedPassThroughOperator<>(ChainingStrategy.ALWAYS);
DataStream<Long> stream = env.fromSequence(0, Long.MAX_VALUE).transform("pass-through", BasicTypeInfo.LONG_TYPE_INFO, operator);
stream.addSink(new DiscardingSink<>());
final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
final JobID jobId = jobGraph.getJobID();
MiniClusterWithClientResource cluster = clusterFactory.get();
cluster.before();
ClusterClient<?> client = cluster.getClusterClient();
try {
BoundedPassThroughOperator.resetForTest(1, true);
client.submitJob(jobGraph).get();
BoundedPassThroughOperator.getProgressLatch().await();
waitForAllTaskRunning(cluster.getMiniCluster(), jobId, false);
client.stopWithSavepoint(jobId, drain, null, SavepointFormatType.CANONICAL).get();
if (drain) {
Assert.assertTrue(BoundedPassThroughOperator.inputEnded);
} else {
Assert.assertFalse(BoundedPassThroughOperator.inputEnded);
}
} finally {
cluster.after();
}
}
use of org.apache.flink.test.util.MiniClusterWithClientResource in project flink by apache.
the class SavepointITCase method testSavepointForJobWithIteration.
@Test
public void testSavepointForJobWithIteration() throws Exception {
for (int i = 0; i < ITER_TEST_PARALLELISM; ++i) {
iterTestSnapshotWait[i] = new OneShotLatch();
iterTestRestoreWait[i] = new OneShotLatch();
iterTestCheckpointVerify[i] = 0;
}
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
final IntegerStreamSource source = new IntegerStreamSource();
IterativeStream<Integer> iteration = env.addSource(source).flatMap(new RichFlatMapFunction<Integer, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public void flatMap(Integer in, Collector<Integer> clctr) throws Exception {
clctr.collect(in);
}
}).setParallelism(ITER_TEST_PARALLELISM).keyBy(new KeySelector<Integer, Object>() {
private static final long serialVersionUID = 1L;
@Override
public Object getKey(Integer value) throws Exception {
return value;
}
}).flatMap(new DuplicateFilter()).setParallelism(ITER_TEST_PARALLELISM).iterate();
DataStream<Integer> iterationBody = iteration.map(new MapFunction<Integer, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public Integer map(Integer value) throws Exception {
return value;
}
}).setParallelism(ITER_TEST_PARALLELISM);
iteration.closeWith(iterationBody);
StreamGraph streamGraph = env.getStreamGraph();
JobGraph jobGraph = streamGraph.getJobGraph();
Configuration config = getFileBasedCheckpointsConfig();
config.addAll(jobGraph.getJobConfiguration());
config.set(TaskManagerOptions.MANAGED_MEMORY_SIZE, MemorySize.ZERO);
MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setConfiguration(config).setNumberTaskManagers(1).setNumberSlotsPerTaskManager(2 * jobGraph.getMaximumParallelism()).build());
cluster.before();
ClusterClient<?> client = cluster.getClusterClient();
String savepointPath = null;
try {
client.submitJob(jobGraph).get();
waitForAllTaskRunning(cluster.getMiniCluster(), jobGraph.getJobID(), false);
for (OneShotLatch latch : iterTestSnapshotWait) {
latch.await();
}
savepointPath = client.triggerSavepoint(jobGraph.getJobID(), null, SavepointFormatType.CANONICAL).get();
client.cancel(jobGraph.getJobID()).get();
while (!client.getJobStatus(jobGraph.getJobID()).get().isGloballyTerminalState()) {
Thread.sleep(100);
}
jobGraph = streamGraph.getJobGraph();
jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
client.submitJob(jobGraph).get();
for (OneShotLatch latch : iterTestRestoreWait) {
latch.await();
}
client.cancel(jobGraph.getJobID()).get();
while (!client.getJobStatus(jobGraph.getJobID()).get().isGloballyTerminalState()) {
Thread.sleep(100);
}
} finally {
if (null != savepointPath) {
client.disposeSavepoint(savepointPath);
}
cluster.after();
}
}
Aggregations