use of org.apache.flink.runtime.instance.AkkaActorGateway in project flink by apache.
the class ChaosMonkeyITCase method submitJobGraph.
// - Utilities ---------------------------------------------------------------------------------
private void submitJobGraph(JobGraph jobGraph, JobManagerProcess jobManager, TestingListener leaderListener, ActorSystem actorSystem, FiniteDuration timeout) throws Exception {
ActorRef jobManagerRef = jobManager.getActorRef(actorSystem, timeout);
UUID jobManagerLeaderId = leaderListener.getLeaderSessionID();
AkkaActorGateway jobManagerGateway = new AkkaActorGateway(jobManagerRef, jobManagerLeaderId);
jobManagerGateway.tell(new JobManagerMessages.SubmitJob(jobGraph, ListeningBehaviour.DETACHED));
}
use of org.apache.flink.runtime.instance.AkkaActorGateway in project flink by apache.
the class ChaosMonkeyITCase method waitForJobRemoved.
private void waitForJobRemoved(JobID jobId, JobManagerProcess jobManager, ActorSystem actorSystem, FiniteDuration timeout) throws Exception {
ActorRef jobManagerRef = jobManager.getActorRef(actorSystem, timeout);
AkkaActorGateway jobManagerGateway = new AkkaActorGateway(jobManagerRef, null);
Future<Object> archiveFuture = jobManagerGateway.ask(JobManagerMessages.getRequestArchive(), timeout);
ActorRef archive = ((JobManagerMessages.ResponseArchive) Await.result(archiveFuture, timeout)).actor();
AkkaActorGateway archiveGateway = new AkkaActorGateway(archive, null);
Deadline deadline = timeout.fromNow();
while (deadline.hasTimeLeft()) {
JobManagerMessages.JobStatusResponse resp = JobManagerActorTestUtils.requestJobStatus(jobId, archiveGateway, deadline.timeLeft());
if (resp instanceof JobManagerMessages.JobNotFound) {
Thread.sleep(100);
} else {
return;
}
}
}
use of org.apache.flink.runtime.instance.AkkaActorGateway in project flink by apache.
the class ChaosMonkeyITCase method requestJobStatus.
private JobStatus requestJobStatus(JobID jobId, JobManagerProcess jobManager, ActorSystem actorSystem, FiniteDuration timeout) throws Exception {
ActorRef jobManagerRef = jobManager.getActorRef(actorSystem, timeout);
AkkaActorGateway jobManagerGateway = new AkkaActorGateway(jobManagerRef, null);
JobManagerMessages.JobStatusResponse resp = JobManagerActorTestUtils.requestJobStatus(jobId, jobManagerGateway, timeout);
if (resp instanceof JobManagerMessages.CurrentJobStatus) {
JobManagerMessages.CurrentJobStatus jobStatusResponse = (JobManagerMessages.CurrentJobStatus) resp;
return jobStatusResponse.status();
} else if (resp instanceof JobManagerMessages.JobNotFound) {
return JobStatus.RESTARTING;
}
throw new IllegalStateException("Unexpected response from JobManager");
}
use of org.apache.flink.runtime.instance.AkkaActorGateway in project flink by apache.
the class LocalFlinkMiniClusterITCase method testLocalFlinkMiniClusterWithMultipleTaskManagers.
@Test
public void testLocalFlinkMiniClusterWithMultipleTaskManagers() {
final ActorSystem system = ActorSystem.create("Testkit", AkkaUtils.getDefaultAkkaConfig());
LocalFlinkMiniCluster miniCluster = null;
final int numTMs = 3;
final int numSlots = 14;
// gather the threads that already exist
final Set<Thread> threadsBefore = new HashSet<>();
{
final Thread[] allThreads = new Thread[Thread.activeCount()];
Thread.enumerate(allThreads);
threadsBefore.addAll(Arrays.asList(allThreads));
}
try {
Configuration config = new Configuration();
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTMs);
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, numSlots);
miniCluster = new LocalFlinkMiniCluster(config, true);
miniCluster.start();
final ActorGateway jmGateway = miniCluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
new JavaTestKit(system) {
{
final ActorGateway selfGateway = new AkkaActorGateway(getRef(), null);
new Within(TestingUtils.TESTING_DURATION()) {
@Override
protected void run() {
jmGateway.tell(JobManagerMessages.getRequestNumberRegisteredTaskManager(), selfGateway);
expectMsgEquals(TestingUtils.TESTING_DURATION(), numTMs);
jmGateway.tell(JobManagerMessages.getRequestTotalNumberOfSlots(), selfGateway);
expectMsgEquals(TestingUtils.TESTING_DURATION(), numTMs * numSlots);
}
};
}
};
} finally {
if (miniCluster != null) {
miniCluster.stop();
miniCluster.awaitTermination();
}
JavaTestKit.shutdownActorSystem(system);
system.awaitTermination();
}
// shut down the global execution context, to make sure it does not affect this testing
try {
Field f = ExecutionContextImpl.class.getDeclaredField("executor");
f.setAccessible(true);
Object exec = ExecutionContext$.MODULE$.global();
ForkJoinPool executor = (ForkJoinPool) f.get(exec);
executor.shutdownNow();
} catch (Exception e) {
System.err.println("Cannot test proper thread shutdown for local execution.");
return;
}
// check for remaining threads
// we need to check repeatedly for a while, because some threads shut down slowly
long deadline = System.currentTimeMillis() + 30000;
boolean foundThreads = true;
String threadName = "";
while (System.currentTimeMillis() < deadline) {
// check that no additional threads remain
final Thread[] threadsAfter = new Thread[Thread.activeCount()];
Thread.enumerate(threadsAfter);
foundThreads = false;
for (Thread t : threadsAfter) {
if (t.isAlive() && !threadsBefore.contains(t)) {
// this thread was not there before. check if it is allowed
boolean allowed = false;
for (String prefix : ALLOWED_THREAD_PREFIXES) {
if (t.getName().startsWith(prefix)) {
allowed = true;
break;
}
}
if (!allowed) {
foundThreads = true;
threadName = t.toString();
break;
}
}
}
if (foundThreads) {
try {
Thread.sleep(500);
} catch (InterruptedException ignored) {
}
} else {
break;
}
}
if (foundThreads) {
fail("Thread " + threadName + " was started by the mini cluster, but not shut down");
}
}
use of org.apache.flink.runtime.instance.AkkaActorGateway in project flink by apache.
the class JobManagerTest method testSavepointRestoreSettings.
/**
* Tests that configured {@link SavepointRestoreSettings} are respected.
*/
@Test
public void testSavepointRestoreSettings() throws Exception {
FiniteDuration timeout = new FiniteDuration(30, TimeUnit.SECONDS);
ActorSystem actorSystem = null;
ActorGateway jobManager = null;
ActorGateway archiver = null;
ActorGateway taskManager = null;
try {
actorSystem = AkkaUtils.createLocalActorSystem(new Configuration());
Tuple2<ActorRef, ActorRef> master = JobManager.startJobManagerActors(new Configuration(), actorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), Option.apply("jm"), Option.apply("arch"), TestingJobManager.class, TestingMemoryArchivist.class);
jobManager = new AkkaActorGateway(master._1(), null);
archiver = new AkkaActorGateway(master._2(), null);
Configuration tmConfig = new Configuration();
tmConfig.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 4);
ActorRef taskManagerRef = TaskManager.startTaskManagerComponentsAndActor(tmConfig, ResourceID.generate(), actorSystem, "localhost", Option.apply("tm"), Option.<LeaderRetrievalService>apply(new StandaloneLeaderRetrievalService(jobManager.path())), true, TestingTaskManager.class);
taskManager = new AkkaActorGateway(taskManagerRef, null);
// Wait until connected
Object msg = new TestingTaskManagerMessages.NotifyWhenRegisteredAtJobManager(jobManager.actor());
Await.ready(taskManager.ask(msg, timeout), timeout);
// Create job graph
JobVertex sourceVertex = new JobVertex("Source");
sourceVertex.setInvokableClass(BlockingStatefulInvokable.class);
sourceVertex.setParallelism(1);
JobGraph jobGraph = new JobGraph("TestingJob", sourceVertex);
JobSnapshottingSettings snapshottingSettings = new JobSnapshottingSettings(Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), // deactivated checkpointing
Long.MAX_VALUE, 360000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true);
jobGraph.setSnapshotSettings(snapshottingSettings);
// Submit job graph
msg = new JobManagerMessages.SubmitJob(jobGraph, ListeningBehaviour.DETACHED);
Await.result(jobManager.ask(msg, timeout), timeout);
// Wait for all tasks to be running
msg = new TestingJobManagerMessages.WaitForAllVerticesToBeRunning(jobGraph.getJobID());
Await.result(jobManager.ask(msg, timeout), timeout);
// Trigger savepoint
File targetDirectory = tmpFolder.newFolder();
msg = new TriggerSavepoint(jobGraph.getJobID(), Option.apply(targetDirectory.getAbsolutePath()));
Future<Object> future = jobManager.ask(msg, timeout);
Object result = Await.result(future, timeout);
String savepointPath = ((TriggerSavepointSuccess) result).savepointPath();
// Cancel because of restarts
msg = new TestingJobManagerMessages.NotifyWhenJobRemoved(jobGraph.getJobID());
Future<?> removedFuture = jobManager.ask(msg, timeout);
Future<?> cancelFuture = jobManager.ask(new CancelJob(jobGraph.getJobID()), timeout);
Object response = Await.result(cancelFuture, timeout);
assertTrue("Unexpected response: " + response, response instanceof CancellationSuccess);
Await.ready(removedFuture, timeout);
// Adjust the job (we need a new operator ID)
JobVertex newSourceVertex = new JobVertex("NewSource");
newSourceVertex.setInvokableClass(BlockingStatefulInvokable.class);
newSourceVertex.setParallelism(1);
JobGraph newJobGraph = new JobGraph("NewTestingJob", newSourceVertex);
JobSnapshottingSettings newSnapshottingSettings = new JobSnapshottingSettings(Collections.singletonList(newSourceVertex.getID()), Collections.singletonList(newSourceVertex.getID()), Collections.singletonList(newSourceVertex.getID()), // deactivated checkpointing
Long.MAX_VALUE, 360000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true);
newJobGraph.setSnapshotSettings(newSnapshottingSettings);
SavepointRestoreSettings restoreSettings = SavepointRestoreSettings.forPath(savepointPath, false);
newJobGraph.setSavepointRestoreSettings(restoreSettings);
msg = new JobManagerMessages.SubmitJob(newJobGraph, ListeningBehaviour.DETACHED);
response = Await.result(jobManager.ask(msg, timeout), timeout);
assertTrue("Unexpected response: " + response, response instanceof JobManagerMessages.JobResultFailure);
JobManagerMessages.JobResultFailure failure = (JobManagerMessages.JobResultFailure) response;
Throwable cause = failure.cause().deserializeError(ClassLoader.getSystemClassLoader());
assertTrue(cause instanceof IllegalStateException);
assertTrue(cause.getMessage().contains("allowNonRestoredState"));
// Wait until removed
msg = new TestingJobManagerMessages.NotifyWhenJobRemoved(newJobGraph.getJobID());
Await.ready(jobManager.ask(msg, timeout), timeout);
// Resubmit, but allow non restored state now
restoreSettings = SavepointRestoreSettings.forPath(savepointPath, true);
newJobGraph.setSavepointRestoreSettings(restoreSettings);
msg = new JobManagerMessages.SubmitJob(newJobGraph, ListeningBehaviour.DETACHED);
response = Await.result(jobManager.ask(msg, timeout), timeout);
assertTrue("Unexpected response: " + response, response instanceof JobManagerMessages.JobSubmitSuccess);
} finally {
if (actorSystem != null) {
actorSystem.shutdown();
}
if (archiver != null) {
archiver.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
if (jobManager != null) {
jobManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
if (taskManager != null) {
taskManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
}
}
}
Aggregations