use of org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway in project flink by apache.
the class ExecutionGraphRestartTest method testFailExecutionAfterCancel.
/**
* Tests that a graph is not restarted after cancellation via a call to
* {@link ExecutionGraph#fail(Throwable)}. This can happen when a slot is
* released concurrently with cancellation.
*/
@Test
public void testFailExecutionAfterCancel() throws Exception {
Instance instance = ExecutionGraphTestUtils.getInstance(new ActorTaskManagerGateway(new SimpleActorGateway(TestingUtils.directExecutionContext())), 2);
Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
scheduler.newInstanceAvailable(instance);
JobVertex vertex = newJobVertex("Test Vertex", 1, NoOpInvokable.class);
ExecutionConfig executionConfig = new ExecutionConfig();
executionConfig.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, Integer.MAX_VALUE));
JobGraph jobGraph = new JobGraph("Test Job", vertex);
jobGraph.setExecutionConfig(executionConfig);
ExecutionGraph eg = newExecutionGraph(new InfiniteDelayRestartStrategy(), scheduler);
eg.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
assertEquals(JobStatus.CREATED, eg.getState());
eg.scheduleForExecution();
assertEquals(JobStatus.RUNNING, eg.getState());
// Fail right after cancel (for example with concurrent slot release)
eg.cancel();
for (ExecutionVertex v : eg.getAllExecutionVertices()) {
v.getCurrentExecutionAttempt().fail(new Exception("Test Exception"));
}
assertEquals(JobStatus.CANCELED, eg.getState());
Execution execution = eg.getAllExecutionVertices().iterator().next().getCurrentExecutionAttempt();
execution.cancelingComplete();
assertEquals(JobStatus.CANCELED, eg.getState());
}
use of org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway in project flink by apache.
the class ExecutionGraphRestartTest method testFailWhileRestarting.
@Test
public void testFailWhileRestarting() throws Exception {
Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
Instance instance = ExecutionGraphTestUtils.getInstance(new ActorTaskManagerGateway(new SimpleActorGateway(TestingUtils.directExecutionContext())), NUM_TASKS);
scheduler.newInstanceAvailable(instance);
// Blocking program
ExecutionGraph executionGraph = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), new JobID(), "TestJob", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), // We want to manually control the restart and delay
new InfiniteDelayRestartStrategy(), scheduler);
JobVertex jobVertex = new JobVertex("NoOpInvokable");
jobVertex.setInvokableClass(NoOpInvokable.class);
jobVertex.setParallelism(NUM_TASKS);
JobGraph jobGraph = new JobGraph("TestJob", jobVertex);
executionGraph.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
assertEquals(JobStatus.CREATED, executionGraph.getState());
executionGraph.scheduleForExecution();
assertEquals(JobStatus.RUNNING, executionGraph.getState());
// Kill the instance and wait for the job to restart
instance.markDead();
Deadline deadline = TestingUtils.TESTING_DURATION().fromNow();
while (deadline.hasTimeLeft() && executionGraph.getState() != JobStatus.RESTARTING) {
Thread.sleep(100);
}
assertEquals(JobStatus.RESTARTING, executionGraph.getState());
// The restarting should not fail with an ordinary exception
executionGraph.fail(new Exception("Test exception"));
assertEquals(JobStatus.RESTARTING, executionGraph.getState());
// but it should fail when sending a SuppressRestartsException
executionGraph.fail(new SuppressRestartsException(new Exception("Test exception")));
assertEquals(JobStatus.FAILED, executionGraph.getState());
// The restart has been aborted
executionGraph.restart();
assertEquals(JobStatus.FAILED, executionGraph.getState());
}
use of org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway in project flink by apache.
the class ExecutionGraphRestartTest method testSuspendWhileRestarting.
/**
* Tests that a suspend call while restarting a job, will abort the restarting.
*
* @throws Exception
*/
@Test
public void testSuspendWhileRestarting() throws Exception {
FiniteDuration timeout = new FiniteDuration(1, TimeUnit.MINUTES);
Deadline deadline = timeout.fromNow();
Instance instance = ExecutionGraphTestUtils.getInstance(new ActorTaskManagerGateway(new SimpleActorGateway(TestingUtils.directExecutionContext())), NUM_TASKS);
Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
scheduler.newInstanceAvailable(instance);
JobVertex sender = new JobVertex("Task");
sender.setInvokableClass(NoOpInvokable.class);
sender.setParallelism(NUM_TASKS);
JobGraph jobGraph = new JobGraph("Pointwise job", sender);
ControllableRestartStrategy controllableRestartStrategy = new ControllableRestartStrategy(timeout);
ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), new JobID(), "Test job", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), controllableRestartStrategy, scheduler);
eg.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
assertEquals(JobStatus.CREATED, eg.getState());
eg.scheduleForExecution();
assertEquals(JobStatus.RUNNING, eg.getState());
instance.markDead();
Await.ready(controllableRestartStrategy.getReachedCanRestart(), deadline.timeLeft());
assertEquals(JobStatus.RESTARTING, eg.getState());
eg.suspend(new Exception("Test exception"));
assertEquals(JobStatus.SUSPENDED, eg.getState());
controllableRestartStrategy.unlockRestart();
Await.ready(controllableRestartStrategy.getRestartDone(), deadline.timeLeft());
assertEquals(JobStatus.SUSPENDED, eg.getState());
}
use of org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway in project flink by apache.
the class ExecutionVertexDeploymentTest method testFailExternallyDuringDeploy.
@Test
public void testFailExternallyDuringDeploy() {
try {
final JobVertexID jid = new JobVertexID();
final TestingUtils.QueuedActionExecutionContext ec = TestingUtils.queuedActionExecutionContext();
final TestingUtils.ActionQueue queue = ec.actionQueue();
final ExecutionJobVertex ejv = getExecutionVertex(jid, ec);
final ExecutionVertex vertex = new ExecutionVertex(ejv, 0, new IntermediateResult[0], AkkaUtils.getDefaultTimeout());
final Instance instance = getInstance(new ActorTaskManagerGateway(new SimpleActorGateway(TestingUtils.directExecutionContext())));
final SimpleSlot slot = instance.allocateSimpleSlot(ejv.getJobId());
assertEquals(ExecutionState.CREATED, vertex.getExecutionState());
vertex.deployToSlot(slot);
assertEquals(ExecutionState.DEPLOYING, vertex.getExecutionState());
Exception testError = new Exception("test error");
vertex.fail(testError);
assertEquals(ExecutionState.FAILED, vertex.getExecutionState());
assertEquals(testError, vertex.getFailureCause());
queue.triggerNextAction();
queue.triggerNextAction();
assertTrue(vertex.getStateTimestamp(ExecutionState.CREATED) > 0);
assertTrue(vertex.getStateTimestamp(ExecutionState.DEPLOYING) > 0);
assertTrue(vertex.getStateTimestamp(ExecutionState.FAILED) > 0);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway in project flink by apache.
the class ExecutionVertexDeploymentTest method testDeployCall.
@Test
public void testDeployCall() {
try {
final JobVertexID jid = new JobVertexID();
final ExecutionJobVertex ejv = getExecutionVertex(jid);
// mock taskmanager to simply accept the call
Instance instance = getInstance(new ActorTaskManagerGateway(new SimpleActorGateway(TestingUtils.directExecutionContext())));
final SimpleSlot slot = instance.allocateSimpleSlot(ejv.getJobId());
final ExecutionVertex vertex = new ExecutionVertex(ejv, 0, new IntermediateResult[0], AkkaUtils.getDefaultTimeout());
assertEquals(ExecutionState.CREATED, vertex.getExecutionState());
vertex.deployToSlot(slot);
assertEquals(ExecutionState.DEPLOYING, vertex.getExecutionState());
// no repeated scheduling
try {
vertex.deployToSlot(slot);
fail("Scheduled from wrong state");
} catch (IllegalStateException e) {
// as expected
}
assertNull(vertex.getFailureCause());
assertTrue(vertex.getStateTimestamp(ExecutionState.CREATED) > 0);
assertTrue(vertex.getStateTimestamp(ExecutionState.DEPLOYING) > 0);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations