use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.
the class ExecutionVertexStopTest method testStopRpc.
@Test
public void testStopRpc() throws Exception {
final JobVertexID jid = new JobVertexID();
final ExecutionJobVertex ejv = getExecutionVertex(jid);
final ExecutionVertex vertex = new ExecutionVertex(ejv, 0, new IntermediateResult[0], AkkaUtils.getDefaultTimeout());
final ExecutionAttemptID execId = vertex.getCurrentExecutionAttempt().getAttemptId();
setVertexState(vertex, ExecutionState.SCHEDULED);
assertEquals(ExecutionState.SCHEDULED, vertex.getExecutionState());
final ActorGateway gateway = new StopSequenceInstanceGateway(TestingUtils.defaultExecutionContext());
Instance instance = getInstance(new ActorTaskManagerGateway(gateway));
SimpleSlot slot = instance.allocateSimpleSlot(new JobID());
vertex.deployToSlot(slot);
receivedStopSignal = false;
vertex.stop();
assertTrue(receivedStopSignal);
}
use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.
the class ExecutionVertexCancelTest method testCancelConcurrentlyToDeploying_CallsNotOvertaking.
@Test
public void testCancelConcurrentlyToDeploying_CallsNotOvertaking() {
try {
final JobVertexID jid = new JobVertexID();
final TestingUtils.QueuedActionExecutionContext executionContext = TestingUtils.queuedActionExecutionContext();
final TestingUtils.ActionQueue actions = executionContext.actionQueue();
final ExecutionJobVertex ejv = getExecutionVertex(jid, executionContext);
final ExecutionVertex vertex = new ExecutionVertex(ejv, 0, new IntermediateResult[0], AkkaUtils.getDefaultTimeout());
setVertexState(vertex, ExecutionState.SCHEDULED);
assertEquals(ExecutionState.SCHEDULED, vertex.getExecutionState());
ActorGateway actorGateway = new CancelSequenceActorGateway(executionContext, 2);
Instance instance = getInstance(new ActorTaskManagerGateway(actorGateway));
SimpleSlot slot = instance.allocateSimpleSlot(new JobID());
vertex.deployToSlot(slot);
assertEquals(ExecutionState.DEPLOYING, vertex.getExecutionState());
vertex.cancel();
assertEquals(ExecutionState.CANCELING, vertex.getExecutionState());
// first action happens (deploy)
actions.triggerNextAction();
assertEquals(ExecutionState.CANCELING, vertex.getExecutionState());
// the deploy call found itself in canceling after it returned and needs to send a cancel call
// the call did not yet execute, so it is still in canceling
assertEquals(ExecutionState.CANCELING, vertex.getExecutionState());
// second action happens (cancel call from cancel function)
actions.triggerNextAction();
// TaskManager reports back (canceling done)
vertex.getCurrentExecutionAttempt().cancelingComplete();
// should properly set state to cancelled
assertEquals(ExecutionState.CANCELED, vertex.getExecutionState());
// trigger the correction canceling call
actions.triggerNextAction();
assertEquals(ExecutionState.CANCELED, vertex.getExecutionState());
assertTrue(slot.isReleased());
assertNull(vertex.getFailureCause());
assertTrue(vertex.getStateTimestamp(ExecutionState.CREATED) > 0);
assertTrue(vertex.getStateTimestamp(ExecutionState.CANCELING) > 0);
assertTrue(vertex.getStateTimestamp(ExecutionState.CANCELED) > 0);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.
the class CoordinatorShutdownTest method testCoordinatorShutsDownOnSuccess.
@Test
public void testCoordinatorShutsDownOnSuccess() {
LocalFlinkMiniCluster cluster = null;
try {
Configuration config = new Configuration();
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 1);
cluster = new LocalFlinkMiniCluster(config, true);
cluster.start();
// build a test graph with snapshotting enabled
JobVertex vertex = new JobVertex("Test Vertex");
vertex.setInvokableClass(BlockingInvokable.class);
List<JobVertexID> vertexIdList = Collections.singletonList(vertex.getID());
JobGraph testGraph = new JobGraph("test job", vertex);
testGraph.setSnapshotSettings(new JobSnapshottingSettings(vertexIdList, vertexIdList, vertexIdList, 5000, 60000, 0L, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true));
ActorGateway jmGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
FiniteDuration timeout = new FiniteDuration(60, TimeUnit.SECONDS);
JobManagerMessages.SubmitJob submitMessage = new JobManagerMessages.SubmitJob(testGraph, ListeningBehaviour.EXECUTION_RESULT);
// submit is successful, but then the job blocks due to the invokable
Future<Object> submitFuture = jmGateway.ask(submitMessage, timeout);
Await.result(submitFuture, timeout);
// get the execution graph and store the ExecutionGraph reference
Future<Object> jobRequestFuture = jmGateway.ask(new JobManagerMessages.RequestJob(testGraph.getJobID()), timeout);
ExecutionGraph graph = (ExecutionGraph) ((JobManagerMessages.JobFound) Await.result(jobRequestFuture, timeout)).executionGraph();
assertNotNull(graph);
BlockingInvokable.unblock();
graph.waitUntilFinished();
// verify that the coordinator was shut down
CheckpointCoordinator coord = graph.getCheckpointCoordinator();
assertTrue(coord == null || coord.isShutdown());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
if (cluster != null) {
cluster.shutdown();
cluster.awaitTermination();
}
}
}
use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.
the class JobClientActorRecoveryITCase method testJobClientRecovery.
/**
* Tests wether the JobClientActor can connect to a newly elected leading job manager to obtain
* the JobExecutionResult. The submitted job blocks for the first execution attempt. The
* leading job manager will be killed so that the second job manager will be elected as the
* leader. The newly elected leader has to retrieve the checkpointed job from ZooKeeper
* and continue its execution. This time, the job does not block and, thus, can be finished.
* The execution result should be sent to the JobClientActor which originally submitted the
* job.
*
* @throws Exception
*/
@Test
public void testJobClientRecovery() throws Exception {
File rootFolder = tempFolder.getRoot();
Configuration config = ZooKeeperTestUtils.createZooKeeperHAConfig(zkServer.getConnectString(), rootFolder.getPath());
config.setInteger(ConfigConstants.LOCAL_NUMBER_JOB_MANAGER, 2);
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
final TestingCluster cluster = new TestingCluster(config);
cluster.start();
JobVertex blockingVertex = new JobVertex("Blocking Vertex");
blockingVertex.setInvokableClass(BlockingTask.class);
blockingVertex.setParallelism(1);
final JobGraph jobGraph = new JobGraph("Blocking Test Job", blockingVertex);
final Promise<JobExecutionResult> promise = new scala.concurrent.impl.Promise.DefaultPromise<>();
Deadline deadline = new FiniteDuration(2, TimeUnit.MINUTES).fromNow();
try {
Thread submitter = new Thread(new Runnable() {
@Override
public void run() {
try {
JobExecutionResult result = cluster.submitJobAndWait(jobGraph, false);
promise.success(result);
} catch (Exception e) {
promise.failure(e);
}
}
});
submitter.start();
synchronized (BlockingTask.waitLock) {
while (BlockingTask.HasBlockedExecution < 1 && deadline.hasTimeLeft()) {
BlockingTask.waitLock.wait(deadline.timeLeft().toMillis());
}
}
if (deadline.isOverdue()) {
Assert.fail("The job has not blocked within the given deadline.");
}
ActorGateway gateway = cluster.getLeaderGateway(deadline.timeLeft());
gateway.tell(TestingJobManagerMessages.getDisablePostStop());
gateway.tell(PoisonPill.getInstance());
// if the job fails then an exception is thrown here
Await.result(promise.future(), deadline.timeLeft());
} finally {
cluster.shutdown();
}
}
use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.
the class ClusterShutdownITCase method testClusterShutdownWithoutResourceManager.
/**
* Tests a faked cluster shutdown procedure without the ResourceManager.
*/
@Test
public void testClusterShutdownWithoutResourceManager() {
new JavaTestKit(system) {
{
new Within(duration("30 seconds")) {
@Override
protected void run() {
ActorGateway me = TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
// start job manager which doesn't shutdown the actor system
ActorGateway jobManager = TestingUtils.createJobManager(system, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), config, "jobmanager1");
// Tell the JobManager to inform us of shutdown actions
jobManager.tell(TestingMessages.getNotifyOfComponentShutdown(), me);
// Register a TaskManager
ActorGateway taskManager = TestingUtils.createTaskManager(system, jobManager, config, true, true);
// Tell the TaskManager to inform us of TaskManager shutdowns
taskManager.tell(TestingMessages.getNotifyOfComponentShutdown(), me);
// No resource manager connected
jobManager.tell(new StopCluster(ApplicationStatus.SUCCEEDED, "Shutting down."), me);
expectMsgAllOf(new TestingMessages.ComponentShutdown(taskManager.actor()), new TestingMessages.ComponentShutdown(jobManager.actor()), StopClusterSuccessful.getInstance());
}
};
}
};
}
Aggregations