use of org.apache.flink.runtime.executiongraph.restart.InfiniteDelayRestartStrategy in project flink by apache.
the class ExecutionGraphRestartTest method testCancelWhileFailing.
@Test
public void testCancelWhileFailing() throws Exception {
// We want to manually control the restart and delay
RestartStrategy restartStrategy = new InfiniteDelayRestartStrategy();
Tuple2<ExecutionGraph, Instance> executionGraphInstanceTuple = createSpyExecutionGraph(restartStrategy);
ExecutionGraph executionGraph = executionGraphInstanceTuple.f0;
Instance instance = executionGraphInstanceTuple.f1;
doNothing().when(executionGraph).jobVertexInFinalState();
// Kill the instance...
instance.markDead();
Deadline deadline = TestingUtils.TESTING_DURATION().fromNow();
// ...and wait for all vertices to be in state FAILED. The
// jobVertexInFinalState does nothing, that's why we don't wait on the
// job status.
boolean success = false;
while (deadline.hasTimeLeft() && !success) {
success = true;
for (ExecutionVertex vertex : executionGraph.getAllExecutionVertices()) {
ExecutionState state = vertex.getExecutionState();
if (state != ExecutionState.FAILED && state != ExecutionState.CANCELED) {
success = false;
Thread.sleep(100);
break;
}
}
}
// Still in failing
assertEquals(JobStatus.FAILING, executionGraph.getState());
// The cancel call needs to change the state to CANCELLING
executionGraph.cancel();
assertEquals(JobStatus.CANCELLING, executionGraph.getState());
// Unspy and finalize the job state
doCallRealMethod().when(executionGraph).jobVertexInFinalState();
executionGraph.jobVertexInFinalState();
assertEquals(JobStatus.CANCELED, executionGraph.getState());
}
use of org.apache.flink.runtime.executiongraph.restart.InfiniteDelayRestartStrategy in project flink by apache.
the class ExecutionGraphRestartTest method testFailExecutionGraphAfterCancel.
/**
* Tests that it is possible to fail a graph via a call to
* {@link ExecutionGraph#fail(Throwable)} after cancellation.
*/
@Test
public void testFailExecutionGraphAfterCancel() throws Exception {
Instance instance = ExecutionGraphTestUtils.getInstance(new ActorTaskManagerGateway(new SimpleActorGateway(TestingUtils.directExecutionContext())), 2);
Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
scheduler.newInstanceAvailable(instance);
JobVertex vertex = newJobVertex("Test Vertex", 1, NoOpInvokable.class);
ExecutionConfig executionConfig = new ExecutionConfig();
executionConfig.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, Integer.MAX_VALUE));
JobGraph jobGraph = new JobGraph("Test Job", vertex);
jobGraph.setExecutionConfig(executionConfig);
ExecutionGraph eg = newExecutionGraph(new InfiniteDelayRestartStrategy(), scheduler);
eg.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
assertEquals(JobStatus.CREATED, eg.getState());
eg.scheduleForExecution();
assertEquals(JobStatus.RUNNING, eg.getState());
// Fail right after cancel (for example with concurrent slot release)
eg.cancel();
assertEquals(JobStatus.CANCELLING, eg.getState());
eg.fail(new Exception("Test Exception"));
assertEquals(JobStatus.FAILING, eg.getState());
Execution execution = eg.getAllExecutionVertices().iterator().next().getCurrentExecutionAttempt();
execution.cancelingComplete();
assertEquals(JobStatus.RESTARTING, eg.getState());
}
use of org.apache.flink.runtime.executiongraph.restart.InfiniteDelayRestartStrategy in project flink by apache.
the class ExecutionGraphRestartTest method testCancelWhileRestarting.
@Test
public void testCancelWhileRestarting() throws Exception {
// We want to manually control the restart and delay
RestartStrategy restartStrategy = new InfiniteDelayRestartStrategy();
Tuple2<ExecutionGraph, Instance> executionGraphInstanceTuple = createExecutionGraph(restartStrategy);
ExecutionGraph executionGraph = executionGraphInstanceTuple.f0;
Instance instance = executionGraphInstanceTuple.f1;
// Kill the instance and wait for the job to restart
instance.markDead();
Deadline deadline = TestingUtils.TESTING_DURATION().fromNow();
while (deadline.hasTimeLeft() && executionGraph.getState() != JobStatus.RESTARTING) {
Thread.sleep(100);
}
assertEquals(JobStatus.RESTARTING, executionGraph.getState());
// Canceling needs to abort the restart
executionGraph.cancel();
assertEquals(JobStatus.CANCELED, executionGraph.getState());
// The restart has been aborted
executionGraph.restart();
assertEquals(JobStatus.CANCELED, executionGraph.getState());
}
use of org.apache.flink.runtime.executiongraph.restart.InfiniteDelayRestartStrategy in project flink by apache.
the class ExecutionGraphRestartTest method testFailExecutionAfterCancel.
/**
* Tests that a graph is not restarted after cancellation via a call to
* {@link ExecutionGraph#fail(Throwable)}. This can happen when a slot is
* released concurrently with cancellation.
*/
@Test
public void testFailExecutionAfterCancel() throws Exception {
Instance instance = ExecutionGraphTestUtils.getInstance(new ActorTaskManagerGateway(new SimpleActorGateway(TestingUtils.directExecutionContext())), 2);
Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
scheduler.newInstanceAvailable(instance);
JobVertex vertex = newJobVertex("Test Vertex", 1, NoOpInvokable.class);
ExecutionConfig executionConfig = new ExecutionConfig();
executionConfig.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, Integer.MAX_VALUE));
JobGraph jobGraph = new JobGraph("Test Job", vertex);
jobGraph.setExecutionConfig(executionConfig);
ExecutionGraph eg = newExecutionGraph(new InfiniteDelayRestartStrategy(), scheduler);
eg.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
assertEquals(JobStatus.CREATED, eg.getState());
eg.scheduleForExecution();
assertEquals(JobStatus.RUNNING, eg.getState());
// Fail right after cancel (for example with concurrent slot release)
eg.cancel();
for (ExecutionVertex v : eg.getAllExecutionVertices()) {
v.getCurrentExecutionAttempt().fail(new Exception("Test Exception"));
}
assertEquals(JobStatus.CANCELED, eg.getState());
Execution execution = eg.getAllExecutionVertices().iterator().next().getCurrentExecutionAttempt();
execution.cancelingComplete();
assertEquals(JobStatus.CANCELED, eg.getState());
}
use of org.apache.flink.runtime.executiongraph.restart.InfiniteDelayRestartStrategy in project flink by apache.
the class ExecutionGraphSignalsTest method testFailureWhileRestarting.
/**
* Test that failing in state restarting will retrigger the restarting logic. This means that
* it only goes into the state FAILED after the restart strategy says the job is no longer
* restartable.
*/
@Test
public void testFailureWhileRestarting() throws IllegalAccessException, NoSuchFieldException, InterruptedException {
Field restartStrategyField = eg.getClass().getDeclaredField("restartStrategy");
restartStrategyField.setAccessible(true);
restartStrategyField.set(eg, new InfiniteDelayRestartStrategy(1));
f.set(eg, JobStatus.RESTARTING);
eg.fail(new Exception("Test"));
// we should restart since we have one restart attempt left
assertEquals(JobStatus.RESTARTING, eg.getState());
eg.fail(new Exception("Test"));
// after depleting all our restart attempts we should go into Failed
assertEquals(JobStatus.FAILED, eg.getState());
}
Aggregations