use of akka.actor.ActorRef in project flink by apache.
the class TestingYarnFlinkResourceManager method handleMessage.
@Override
protected void handleMessage(Object message) {
if (message instanceof RequestNumberOfRegisteredResources) {
getSender().tell(getNumberOfStartedTaskManagers(), getSelf());
} else if (message instanceof NotifyWhenResourcesRegistered) {
NotifyWhenResourcesRegistered notifyMessage = (NotifyWhenResourcesRegistered) message;
if (getNumberOfStartedTaskManagers() >= notifyMessage.getNumberResources()) {
getSender().tell(true, getSelf());
} else {
waitingQueue.offer(Tuple2.of(notifyMessage.getNumberResources(), getSender()));
}
} else if (message instanceof NotifyResourceStarted) {
super.handleMessage(message);
while (!waitingQueue.isEmpty() && waitingQueue.peek().f0 <= getNumberOfStartedTaskManagers()) {
ActorRef receiver = waitingQueue.poll().f1;
receiver.tell(true, getSelf());
}
} else {
super.handleMessage(message);
}
}
use of akka.actor.ActorRef in project flink by apache.
the class UtilsTest method testYarnFlinkResourceManagerJobManagerLostLeadership.
@Test
public void testYarnFlinkResourceManagerJobManagerLostLeadership() throws Exception {
new JavaTestKit(system) {
{
final Deadline deadline = new FiniteDuration(3, TimeUnit.MINUTES).fromNow();
Configuration flinkConfig = new Configuration();
YarnConfiguration yarnConfig = new YarnConfiguration();
TestingLeaderRetrievalService leaderRetrievalService = new TestingLeaderRetrievalService();
String applicationMasterHostName = "localhost";
String webInterfaceURL = "foobar";
ContaineredTaskManagerParameters taskManagerParameters = new ContaineredTaskManagerParameters(1l, 1l, 1l, 1, new HashMap<String, String>());
ContainerLaunchContext taskManagerLaunchContext = mock(ContainerLaunchContext.class);
int yarnHeartbeatIntervalMillis = 1000;
int maxFailedContainers = 10;
int numInitialTaskManagers = 5;
final YarnResourceManagerCallbackHandler callbackHandler = new YarnResourceManagerCallbackHandler();
AMRMClientAsync<AMRMClient.ContainerRequest> resourceManagerClient = mock(AMRMClientAsync.class);
NMClient nodeManagerClient = mock(NMClient.class);
UUID leaderSessionID = UUID.randomUUID();
final List<Container> containerList = new ArrayList<>();
for (int i = 0; i < numInitialTaskManagers; i++) {
containerList.add(new TestingContainer("container_" + i, "localhost"));
}
doAnswer(new Answer() {
int counter = 0;
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
if (counter < containerList.size()) {
callbackHandler.onContainersAllocated(Collections.singletonList(containerList.get(counter++)));
}
return null;
}
}).when(resourceManagerClient).addContainerRequest(Matchers.any(AMRMClient.ContainerRequest.class));
ActorRef resourceManager = null;
ActorRef leader1;
try {
leader1 = system.actorOf(Props.create(TestingUtils.ForwardingActor.class, getRef(), Option.apply(leaderSessionID)));
resourceManager = system.actorOf(Props.create(TestingYarnFlinkResourceManager.class, flinkConfig, yarnConfig, leaderRetrievalService, applicationMasterHostName, webInterfaceURL, taskManagerParameters, taskManagerLaunchContext, yarnHeartbeatIntervalMillis, maxFailedContainers, numInitialTaskManagers, callbackHandler, resourceManagerClient, nodeManagerClient));
leaderRetrievalService.notifyListener(leader1.path().toString(), leaderSessionID);
final AkkaActorGateway leader1Gateway = new AkkaActorGateway(leader1, leaderSessionID);
final AkkaActorGateway resourceManagerGateway = new AkkaActorGateway(resourceManager, leaderSessionID);
doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
Container container = (Container) invocation.getArguments()[0];
resourceManagerGateway.tell(new NotifyResourceStarted(YarnFlinkResourceManager.extractResourceID(container)), leader1Gateway);
return null;
}
}).when(nodeManagerClient).startContainer(Matchers.any(Container.class), Matchers.any(ContainerLaunchContext.class));
expectMsgClass(deadline.timeLeft(), RegisterResourceManager.class);
resourceManagerGateway.tell(new RegisterResourceManagerSuccessful(leader1, Collections.EMPTY_LIST));
for (int i = 0; i < containerList.size(); i++) {
expectMsgClass(deadline.timeLeft(), Acknowledge.class);
}
Future<Object> taskManagerRegisteredFuture = resourceManagerGateway.ask(new NotifyWhenResourcesRegistered(numInitialTaskManagers), deadline.timeLeft());
Await.ready(taskManagerRegisteredFuture, deadline.timeLeft());
leaderRetrievalService.notifyListener(null, null);
leaderRetrievalService.notifyListener(leader1.path().toString(), leaderSessionID);
expectMsgClass(deadline.timeLeft(), RegisterResourceManager.class);
resourceManagerGateway.tell(new RegisterResourceManagerSuccessful(leader1, Collections.EMPTY_LIST));
for (Container container : containerList) {
resourceManagerGateway.tell(new NotifyResourceStarted(YarnFlinkResourceManager.extractResourceID(container)), leader1Gateway);
}
for (int i = 0; i < containerList.size(); i++) {
expectMsgClass(deadline.timeLeft(), Acknowledge.class);
}
Future<Object> numberOfRegisteredResourcesFuture = resourceManagerGateway.ask(RequestNumberOfRegisteredResources.Instance, deadline.timeLeft());
int numberOfRegisteredResources = (Integer) Await.result(numberOfRegisteredResourcesFuture, deadline.timeLeft());
assertEquals(numInitialTaskManagers, numberOfRegisteredResources);
} finally {
if (resourceManager != null) {
resourceManager.tell(PoisonPill.getInstance(), ActorRef.noSender());
}
}
}
};
}
use of akka.actor.ActorRef in project flink by apache.
the class JobManagerTest method testSavepointRestoreSettings.
/**
* Tests that configured {@link SavepointRestoreSettings} are respected.
*/
@Test
public void testSavepointRestoreSettings() throws Exception {
FiniteDuration timeout = new FiniteDuration(30, TimeUnit.SECONDS);
ActorSystem actorSystem = null;
ActorGateway jobManager = null;
ActorGateway archiver = null;
ActorGateway taskManager = null;
try {
actorSystem = AkkaUtils.createLocalActorSystem(new Configuration());
Tuple2<ActorRef, ActorRef> master = JobManager.startJobManagerActors(new Configuration(), actorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), Option.apply("jm"), Option.apply("arch"), TestingJobManager.class, TestingMemoryArchivist.class);
jobManager = new AkkaActorGateway(master._1(), null);
archiver = new AkkaActorGateway(master._2(), null);
Configuration tmConfig = new Configuration();
tmConfig.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 4);
ActorRef taskManagerRef = TaskManager.startTaskManagerComponentsAndActor(tmConfig, ResourceID.generate(), actorSystem, "localhost", Option.apply("tm"), Option.<LeaderRetrievalService>apply(new StandaloneLeaderRetrievalService(jobManager.path())), true, TestingTaskManager.class);
taskManager = new AkkaActorGateway(taskManagerRef, null);
// Wait until connected
Object msg = new TestingTaskManagerMessages.NotifyWhenRegisteredAtJobManager(jobManager.actor());
Await.ready(taskManager.ask(msg, timeout), timeout);
// Create job graph
JobVertex sourceVertex = new JobVertex("Source");
sourceVertex.setInvokableClass(BlockingStatefulInvokable.class);
sourceVertex.setParallelism(1);
JobGraph jobGraph = new JobGraph("TestingJob", sourceVertex);
JobSnapshottingSettings snapshottingSettings = new JobSnapshottingSettings(Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), // deactivated checkpointing
Long.MAX_VALUE, 360000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true);
jobGraph.setSnapshotSettings(snapshottingSettings);
// Submit job graph
msg = new JobManagerMessages.SubmitJob(jobGraph, ListeningBehaviour.DETACHED);
Await.result(jobManager.ask(msg, timeout), timeout);
// Wait for all tasks to be running
msg = new TestingJobManagerMessages.WaitForAllVerticesToBeRunning(jobGraph.getJobID());
Await.result(jobManager.ask(msg, timeout), timeout);
// Trigger savepoint
File targetDirectory = tmpFolder.newFolder();
msg = new TriggerSavepoint(jobGraph.getJobID(), Option.apply(targetDirectory.getAbsolutePath()));
Future<Object> future = jobManager.ask(msg, timeout);
Object result = Await.result(future, timeout);
String savepointPath = ((TriggerSavepointSuccess) result).savepointPath();
// Cancel because of restarts
msg = new TestingJobManagerMessages.NotifyWhenJobRemoved(jobGraph.getJobID());
Future<?> removedFuture = jobManager.ask(msg, timeout);
Future<?> cancelFuture = jobManager.ask(new CancelJob(jobGraph.getJobID()), timeout);
Object response = Await.result(cancelFuture, timeout);
assertTrue("Unexpected response: " + response, response instanceof CancellationSuccess);
Await.ready(removedFuture, timeout);
// Adjust the job (we need a new operator ID)
JobVertex newSourceVertex = new JobVertex("NewSource");
newSourceVertex.setInvokableClass(BlockingStatefulInvokable.class);
newSourceVertex.setParallelism(1);
JobGraph newJobGraph = new JobGraph("NewTestingJob", newSourceVertex);
JobSnapshottingSettings newSnapshottingSettings = new JobSnapshottingSettings(Collections.singletonList(newSourceVertex.getID()), Collections.singletonList(newSourceVertex.getID()), Collections.singletonList(newSourceVertex.getID()), // deactivated checkpointing
Long.MAX_VALUE, 360000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true);
newJobGraph.setSnapshotSettings(newSnapshottingSettings);
SavepointRestoreSettings restoreSettings = SavepointRestoreSettings.forPath(savepointPath, false);
newJobGraph.setSavepointRestoreSettings(restoreSettings);
msg = new JobManagerMessages.SubmitJob(newJobGraph, ListeningBehaviour.DETACHED);
response = Await.result(jobManager.ask(msg, timeout), timeout);
assertTrue("Unexpected response: " + response, response instanceof JobManagerMessages.JobResultFailure);
JobManagerMessages.JobResultFailure failure = (JobManagerMessages.JobResultFailure) response;
Throwable cause = failure.cause().deserializeError(ClassLoader.getSystemClassLoader());
assertTrue(cause instanceof IllegalStateException);
assertTrue(cause.getMessage().contains("allowNonRestoredState"));
// Wait until removed
msg = new TestingJobManagerMessages.NotifyWhenJobRemoved(newJobGraph.getJobID());
Await.ready(jobManager.ask(msg, timeout), timeout);
// Resubmit, but allow non restored state now
restoreSettings = SavepointRestoreSettings.forPath(savepointPath, true);
newJobGraph.setSavepointRestoreSettings(restoreSettings);
msg = new JobManagerMessages.SubmitJob(newJobGraph, ListeningBehaviour.DETACHED);
response = Await.result(jobManager.ask(msg, timeout), timeout);
assertTrue("Unexpected response: " + response, response instanceof JobManagerMessages.JobSubmitSuccess);
} finally {
if (actorSystem != null) {
actorSystem.shutdown();
}
if (archiver != null) {
archiver.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
if (jobManager != null) {
jobManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
if (taskManager != null) {
taskManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
}
}
use of akka.actor.ActorRef in project flink by apache.
the class JobManagerTest method testCancelWithSavepoint.
@Test
public void testCancelWithSavepoint() throws Exception {
File defaultSavepointDir = tmpFolder.newFolder();
FiniteDuration timeout = new FiniteDuration(30, TimeUnit.SECONDS);
Configuration config = new Configuration();
config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, defaultSavepointDir.getAbsolutePath());
ActorSystem actorSystem = null;
ActorGateway jobManager = null;
ActorGateway archiver = null;
ActorGateway taskManager = null;
try {
actorSystem = AkkaUtils.createLocalActorSystem(new Configuration());
Tuple2<ActorRef, ActorRef> master = JobManager.startJobManagerActors(config, actorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), Option.apply("jm"), Option.apply("arch"), TestingJobManager.class, TestingMemoryArchivist.class);
jobManager = new AkkaActorGateway(master._1(), null);
archiver = new AkkaActorGateway(master._2(), null);
ActorRef taskManagerRef = TaskManager.startTaskManagerComponentsAndActor(config, ResourceID.generate(), actorSystem, "localhost", Option.apply("tm"), Option.<LeaderRetrievalService>apply(new StandaloneLeaderRetrievalService(jobManager.path())), true, TestingTaskManager.class);
taskManager = new AkkaActorGateway(taskManagerRef, null);
// Wait until connected
Object msg = new TestingTaskManagerMessages.NotifyWhenRegisteredAtJobManager(jobManager.actor());
Await.ready(taskManager.ask(msg, timeout), timeout);
// Create job graph
JobVertex sourceVertex = new JobVertex("Source");
sourceVertex.setInvokableClass(BlockingStatefulInvokable.class);
sourceVertex.setParallelism(1);
JobGraph jobGraph = new JobGraph("TestingJob", sourceVertex);
JobSnapshottingSettings snapshottingSettings = new JobSnapshottingSettings(Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), 3600000, 3600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true);
jobGraph.setSnapshotSettings(snapshottingSettings);
// Submit job graph
msg = new JobManagerMessages.SubmitJob(jobGraph, ListeningBehaviour.DETACHED);
Await.result(jobManager.ask(msg, timeout), timeout);
// Wait for all tasks to be running
msg = new TestingJobManagerMessages.WaitForAllVerticesToBeRunning(jobGraph.getJobID());
Await.result(jobManager.ask(msg, timeout), timeout);
// Notify when canelled
msg = new NotifyWhenJobStatus(jobGraph.getJobID(), JobStatus.CANCELED);
Future<Object> cancelled = jobManager.ask(msg, timeout);
// Cancel with savepoint
String savepointPath = null;
for (int i = 0; i < 10; i++) {
msg = new JobManagerMessages.CancelJobWithSavepoint(jobGraph.getJobID(), null);
CancellationResponse cancelResp = (CancellationResponse) Await.result(jobManager.ask(msg, timeout), timeout);
if (cancelResp instanceof CancellationFailure) {
CancellationFailure failure = (CancellationFailure) cancelResp;
if (failure.cause().getMessage().contains(CheckpointDeclineReason.NOT_ALL_REQUIRED_TASKS_RUNNING.message())) {
// wait and retry
Thread.sleep(200);
} else {
failure.cause().printStackTrace();
fail("Failed to cancel job: " + failure.cause().getMessage());
}
} else {
savepointPath = ((CancellationSuccess) cancelResp).savepointPath();
break;
}
}
// Verify savepoint path
assertNotEquals("Savepoint not triggered", null, savepointPath);
// Wait for job status change
Await.ready(cancelled, timeout);
File savepointFile = new File(savepointPath);
assertEquals(true, savepointFile.exists());
} finally {
if (actorSystem != null) {
actorSystem.shutdown();
}
if (archiver != null) {
archiver.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
if (jobManager != null) {
jobManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
if (taskManager != null) {
taskManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
}
}
use of akka.actor.ActorRef in project flink by apache.
the class JobManagerTest method testSavepointWithDeactivatedPeriodicCheckpointing.
/**
* Tests that we can trigger a savepoint when periodic checkpoints are disabled.
*/
@Test
public void testSavepointWithDeactivatedPeriodicCheckpointing() throws Exception {
File defaultSavepointDir = tmpFolder.newFolder();
FiniteDuration timeout = new FiniteDuration(30, TimeUnit.SECONDS);
Configuration config = new Configuration();
config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, defaultSavepointDir.getAbsolutePath());
ActorSystem actorSystem = null;
ActorGateway jobManager = null;
ActorGateway archiver = null;
ActorGateway taskManager = null;
try {
actorSystem = AkkaUtils.createLocalActorSystem(new Configuration());
Tuple2<ActorRef, ActorRef> master = JobManager.startJobManagerActors(config, actorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), Option.apply("jm"), Option.apply("arch"), TestingJobManager.class, TestingMemoryArchivist.class);
jobManager = new AkkaActorGateway(master._1(), null);
archiver = new AkkaActorGateway(master._2(), null);
ActorRef taskManagerRef = TaskManager.startTaskManagerComponentsAndActor(config, ResourceID.generate(), actorSystem, "localhost", Option.apply("tm"), Option.<LeaderRetrievalService>apply(new StandaloneLeaderRetrievalService(jobManager.path())), true, TestingTaskManager.class);
taskManager = new AkkaActorGateway(taskManagerRef, null);
// Wait until connected
Object msg = new TestingTaskManagerMessages.NotifyWhenRegisteredAtJobManager(jobManager.actor());
Await.ready(taskManager.ask(msg, timeout), timeout);
// Create job graph
JobVertex sourceVertex = new JobVertex("Source");
sourceVertex.setInvokableClass(BlockingStatefulInvokable.class);
sourceVertex.setParallelism(1);
JobGraph jobGraph = new JobGraph("TestingJob", sourceVertex);
JobSnapshottingSettings snapshottingSettings = new JobSnapshottingSettings(Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), // deactivated checkpointing
Long.MAX_VALUE, 360000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true);
jobGraph.setSnapshotSettings(snapshottingSettings);
// Submit job graph
msg = new JobManagerMessages.SubmitJob(jobGraph, ListeningBehaviour.DETACHED);
Await.result(jobManager.ask(msg, timeout), timeout);
// Wait for all tasks to be running
msg = new TestingJobManagerMessages.WaitForAllVerticesToBeRunning(jobGraph.getJobID());
Await.result(jobManager.ask(msg, timeout), timeout);
// Cancel with savepoint
File targetDirectory = tmpFolder.newFolder();
msg = new TriggerSavepoint(jobGraph.getJobID(), Option.apply(targetDirectory.getAbsolutePath()));
Future<Object> future = jobManager.ask(msg, timeout);
Object result = Await.result(future, timeout);
assertTrue("Did not trigger savepoint", result instanceof TriggerSavepointSuccess);
assertEquals(1, targetDirectory.listFiles().length);
} finally {
if (actorSystem != null) {
actorSystem.shutdown();
}
if (archiver != null) {
archiver.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
if (jobManager != null) {
jobManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
if (taskManager != null) {
taskManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
}
}
Aggregations