Search in sources :

Example 61 with ActorSystem

use of akka.actor.ActorSystem in project flink by apache.

the class LocalFlinkMiniClusterITCase method testLocalFlinkMiniClusterWithMultipleTaskManagers.

@Test
public void testLocalFlinkMiniClusterWithMultipleTaskManagers() {
    final ActorSystem system = ActorSystem.create("Testkit", AkkaUtils.getDefaultAkkaConfig());
    LocalFlinkMiniCluster miniCluster = null;
    final int numTMs = 3;
    final int numSlots = 14;
    // gather the threads that already exist
    final Set<Thread> threadsBefore = new HashSet<>();
    {
        final Thread[] allThreads = new Thread[Thread.activeCount()];
        Thread.enumerate(allThreads);
        threadsBefore.addAll(Arrays.asList(allThreads));
    }
    try {
        Configuration config = new Configuration();
        config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTMs);
        config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, numSlots);
        miniCluster = new LocalFlinkMiniCluster(config, true);
        miniCluster.start();
        final ActorGateway jmGateway = miniCluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
        new JavaTestKit(system) {

            {
                final ActorGateway selfGateway = new AkkaActorGateway(getRef(), null);
                new Within(TestingUtils.TESTING_DURATION()) {

                    @Override
                    protected void run() {
                        jmGateway.tell(JobManagerMessages.getRequestNumberRegisteredTaskManager(), selfGateway);
                        expectMsgEquals(TestingUtils.TESTING_DURATION(), numTMs);
                        jmGateway.tell(JobManagerMessages.getRequestTotalNumberOfSlots(), selfGateway);
                        expectMsgEquals(TestingUtils.TESTING_DURATION(), numTMs * numSlots);
                    }
                };
            }
        };
    } finally {
        if (miniCluster != null) {
            miniCluster.stop();
            miniCluster.awaitTermination();
        }
        JavaTestKit.shutdownActorSystem(system);
        system.awaitTermination();
    }
    // shut down the global execution context, to make sure it does not affect this testing
    try {
        Field f = ExecutionContextImpl.class.getDeclaredField("executor");
        f.setAccessible(true);
        Object exec = ExecutionContext$.MODULE$.global();
        ForkJoinPool executor = (ForkJoinPool) f.get(exec);
        executor.shutdownNow();
    } catch (Exception e) {
        System.err.println("Cannot test proper thread shutdown for local execution.");
        return;
    }
    // check for remaining threads
    // we need to check repeatedly for a while, because some threads shut down slowly
    long deadline = System.currentTimeMillis() + 30000;
    boolean foundThreads = true;
    String threadName = "";
    while (System.currentTimeMillis() < deadline) {
        // check that no additional threads remain
        final Thread[] threadsAfter = new Thread[Thread.activeCount()];
        Thread.enumerate(threadsAfter);
        foundThreads = false;
        for (Thread t : threadsAfter) {
            if (t.isAlive() && !threadsBefore.contains(t)) {
                // this thread was not there before. check if it is allowed
                boolean allowed = false;
                for (String prefix : ALLOWED_THREAD_PREFIXES) {
                    if (t.getName().startsWith(prefix)) {
                        allowed = true;
                        break;
                    }
                }
                if (!allowed) {
                    foundThreads = true;
                    threadName = t.toString();
                    break;
                }
            }
        }
        if (foundThreads) {
            try {
                Thread.sleep(500);
            } catch (InterruptedException ignored) {
            }
        } else {
            break;
        }
    }
    if (foundThreads) {
        fail("Thread " + threadName + " was started by the mini cluster, but not shut down");
    }
}
Also used : ActorSystem(akka.actor.ActorSystem) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) Configuration(org.apache.flink.configuration.Configuration) LocalFlinkMiniCluster(org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster) Field(java.lang.reflect.Field) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) JavaTestKit(akka.testkit.JavaTestKit) HashSet(java.util.HashSet) ForkJoinPool(scala.concurrent.forkjoin.ForkJoinPool) Test(org.junit.Test)

Example 62 with ActorSystem

use of akka.actor.ActorSystem in project flink by apache.

the class TaskManagerFailureRecoveryITCase method testRestartWithFailingTaskManager.

@Test
public void testRestartWithFailingTaskManager() {
    final int PARALLELISM = 4;
    LocalFlinkMiniCluster cluster = null;
    ActorSystem additionalSystem = null;
    try {
        Configuration config = new Configuration();
        config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 2);
        config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, PARALLELISM);
        config.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, 16);
        config.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_INTERVAL, "500 ms");
        config.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_PAUSE, "20 s");
        config.setInteger(ConfigConstants.AKKA_WATCH_THRESHOLD, 20);
        cluster = new LocalFlinkMiniCluster(config, false);
        cluster.start();
        // for the result
        List<Long> resultCollection = new ArrayList<Long>();
        final ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
        env.setParallelism(PARALLELISM);
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 1000));
        env.getConfig().disableSysoutLogging();
        env.generateSequence(1, 10).map(new FailingMapper<Long>()).reduce(new ReduceFunction<Long>() {

            @Override
            public Long reduce(Long value1, Long value2) {
                return value1 + value2;
            }
        }).output(new LocalCollectionOutputFormat<Long>(resultCollection));
        // simple reference (atomic does not matter) to pass back an exception from the trigger thread
        final AtomicReference<Throwable> ref = new AtomicReference<Throwable>();
        // trigger the execution from a separate thread, so we are available to temper with the
        // cluster during the execution
        Thread trigger = new Thread("program trigger") {

            @Override
            public void run() {
                try {
                    env.execute();
                } catch (Throwable t) {
                    ref.set(t);
                }
            }
        };
        trigger.setDaemon(true);
        trigger.start();
        // the mappers in turn are waiting
        for (int i = 0; i < PARALLELISM; i++) {
            FailingMapper.TASK_TO_COORD_QUEUE.take();
        }
        // bring up one more task manager and wait for it to appear
        {
            additionalSystem = cluster.startTaskManagerActorSystem(2);
            ActorRef additionalTaskManager = cluster.startTaskManager(2, additionalSystem);
            Object message = TaskManagerMessages.getNotifyWhenRegisteredAtJobManagerMessage();
            Future<Object> future = Patterns.ask(additionalTaskManager, message, 30000);
            try {
                Await.result(future, new FiniteDuration(30000, TimeUnit.MILLISECONDS));
            } catch (TimeoutException e) {
                fail("The additional TaskManager did not come up within 30 seconds");
            }
        }
        // kill the two other TaskManagers
        for (ActorRef tm : cluster.getTaskManagersAsJava()) {
            tm.tell(PoisonPill.getInstance(), null);
        }
        // wait for the next set of mappers (the recovery ones) to come online
        for (int i = 0; i < PARALLELISM; i++) {
            FailingMapper.TASK_TO_COORD_QUEUE.take();
        }
        // tell the mappers that they may continue this time
        for (int i = 0; i < PARALLELISM; i++) {
            FailingMapper.COORD_TO_TASK_QUEUE.add(new Object());
        }
        // wait for the program to finish
        trigger.join();
        if (ref.get() != null) {
            Throwable t = ref.get();
            t.printStackTrace();
            fail("Program execution caused an exception: " + t.getMessage());
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    } finally {
        if (additionalSystem != null) {
            additionalSystem.shutdown();
        }
        if (cluster != null) {
            cluster.stop();
        }
    }
}
Also used : ActorSystem(akka.actor.ActorSystem) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) ArrayList(java.util.ArrayList) ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction) FiniteDuration(scala.concurrent.duration.FiniteDuration) AtomicReference(java.util.concurrent.atomic.AtomicReference) TimeoutException(java.util.concurrent.TimeoutException) LocalFlinkMiniCluster(org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster) Future(scala.concurrent.Future) TimeoutException(java.util.concurrent.TimeoutException) Test(org.junit.Test)

Example 63 with ActorSystem

use of akka.actor.ActorSystem in project flink by apache.

the class JobManagerTest method testSavepointRestoreSettings.

/**
	 * Tests that configured {@link SavepointRestoreSettings} are respected.
	 */
@Test
public void testSavepointRestoreSettings() throws Exception {
    FiniteDuration timeout = new FiniteDuration(30, TimeUnit.SECONDS);
    ActorSystem actorSystem = null;
    ActorGateway jobManager = null;
    ActorGateway archiver = null;
    ActorGateway taskManager = null;
    try {
        actorSystem = AkkaUtils.createLocalActorSystem(new Configuration());
        Tuple2<ActorRef, ActorRef> master = JobManager.startJobManagerActors(new Configuration(), actorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), Option.apply("jm"), Option.apply("arch"), TestingJobManager.class, TestingMemoryArchivist.class);
        jobManager = new AkkaActorGateway(master._1(), null);
        archiver = new AkkaActorGateway(master._2(), null);
        Configuration tmConfig = new Configuration();
        tmConfig.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 4);
        ActorRef taskManagerRef = TaskManager.startTaskManagerComponentsAndActor(tmConfig, ResourceID.generate(), actorSystem, "localhost", Option.apply("tm"), Option.<LeaderRetrievalService>apply(new StandaloneLeaderRetrievalService(jobManager.path())), true, TestingTaskManager.class);
        taskManager = new AkkaActorGateway(taskManagerRef, null);
        // Wait until connected
        Object msg = new TestingTaskManagerMessages.NotifyWhenRegisteredAtJobManager(jobManager.actor());
        Await.ready(taskManager.ask(msg, timeout), timeout);
        // Create job graph
        JobVertex sourceVertex = new JobVertex("Source");
        sourceVertex.setInvokableClass(BlockingStatefulInvokable.class);
        sourceVertex.setParallelism(1);
        JobGraph jobGraph = new JobGraph("TestingJob", sourceVertex);
        JobSnapshottingSettings snapshottingSettings = new JobSnapshottingSettings(Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), // deactivated checkpointing
        Long.MAX_VALUE, 360000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true);
        jobGraph.setSnapshotSettings(snapshottingSettings);
        // Submit job graph
        msg = new JobManagerMessages.SubmitJob(jobGraph, ListeningBehaviour.DETACHED);
        Await.result(jobManager.ask(msg, timeout), timeout);
        // Wait for all tasks to be running
        msg = new TestingJobManagerMessages.WaitForAllVerticesToBeRunning(jobGraph.getJobID());
        Await.result(jobManager.ask(msg, timeout), timeout);
        // Trigger savepoint
        File targetDirectory = tmpFolder.newFolder();
        msg = new TriggerSavepoint(jobGraph.getJobID(), Option.apply(targetDirectory.getAbsolutePath()));
        Future<Object> future = jobManager.ask(msg, timeout);
        Object result = Await.result(future, timeout);
        String savepointPath = ((TriggerSavepointSuccess) result).savepointPath();
        // Cancel because of restarts
        msg = new TestingJobManagerMessages.NotifyWhenJobRemoved(jobGraph.getJobID());
        Future<?> removedFuture = jobManager.ask(msg, timeout);
        Future<?> cancelFuture = jobManager.ask(new CancelJob(jobGraph.getJobID()), timeout);
        Object response = Await.result(cancelFuture, timeout);
        assertTrue("Unexpected response: " + response, response instanceof CancellationSuccess);
        Await.ready(removedFuture, timeout);
        // Adjust the job (we need a new operator ID)
        JobVertex newSourceVertex = new JobVertex("NewSource");
        newSourceVertex.setInvokableClass(BlockingStatefulInvokable.class);
        newSourceVertex.setParallelism(1);
        JobGraph newJobGraph = new JobGraph("NewTestingJob", newSourceVertex);
        JobSnapshottingSettings newSnapshottingSettings = new JobSnapshottingSettings(Collections.singletonList(newSourceVertex.getID()), Collections.singletonList(newSourceVertex.getID()), Collections.singletonList(newSourceVertex.getID()), // deactivated checkpointing
        Long.MAX_VALUE, 360000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true);
        newJobGraph.setSnapshotSettings(newSnapshottingSettings);
        SavepointRestoreSettings restoreSettings = SavepointRestoreSettings.forPath(savepointPath, false);
        newJobGraph.setSavepointRestoreSettings(restoreSettings);
        msg = new JobManagerMessages.SubmitJob(newJobGraph, ListeningBehaviour.DETACHED);
        response = Await.result(jobManager.ask(msg, timeout), timeout);
        assertTrue("Unexpected response: " + response, response instanceof JobManagerMessages.JobResultFailure);
        JobManagerMessages.JobResultFailure failure = (JobManagerMessages.JobResultFailure) response;
        Throwable cause = failure.cause().deserializeError(ClassLoader.getSystemClassLoader());
        assertTrue(cause instanceof IllegalStateException);
        assertTrue(cause.getMessage().contains("allowNonRestoredState"));
        // Wait until removed
        msg = new TestingJobManagerMessages.NotifyWhenJobRemoved(newJobGraph.getJobID());
        Await.ready(jobManager.ask(msg, timeout), timeout);
        // Resubmit, but allow non restored state now
        restoreSettings = SavepointRestoreSettings.forPath(savepointPath, true);
        newJobGraph.setSavepointRestoreSettings(restoreSettings);
        msg = new JobManagerMessages.SubmitJob(newJobGraph, ListeningBehaviour.DETACHED);
        response = Await.result(jobManager.ask(msg, timeout), timeout);
        assertTrue("Unexpected response: " + response, response instanceof JobManagerMessages.JobSubmitSuccess);
    } finally {
        if (actorSystem != null) {
            actorSystem.shutdown();
        }
        if (archiver != null) {
            archiver.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
        }
        if (jobManager != null) {
            jobManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
        }
        if (taskManager != null) {
            taskManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
        }
    }
}
Also used : ActorSystem(akka.actor.ActorSystem) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) JobSubmitSuccess(org.apache.flink.runtime.messages.JobManagerMessages.JobSubmitSuccess) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) CancelJob(org.apache.flink.runtime.messages.JobManagerMessages.CancelJob) WaitForAllVerticesToBeRunning(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunning) JobSnapshottingSettings(org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) FiniteDuration(scala.concurrent.duration.FiniteDuration) SubmitJob(org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob) TriggerSavepointSuccess(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepointSuccess) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) StandaloneLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.StandaloneLeaderRetrievalService) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) TriggerSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint) File(java.io.File) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) Test(org.junit.Test)

Example 64 with ActorSystem

use of akka.actor.ActorSystem in project flink by apache.

the class JobManagerTest method testCancelWithSavepoint.

@Test
public void testCancelWithSavepoint() throws Exception {
    File defaultSavepointDir = tmpFolder.newFolder();
    FiniteDuration timeout = new FiniteDuration(30, TimeUnit.SECONDS);
    Configuration config = new Configuration();
    config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, defaultSavepointDir.getAbsolutePath());
    ActorSystem actorSystem = null;
    ActorGateway jobManager = null;
    ActorGateway archiver = null;
    ActorGateway taskManager = null;
    try {
        actorSystem = AkkaUtils.createLocalActorSystem(new Configuration());
        Tuple2<ActorRef, ActorRef> master = JobManager.startJobManagerActors(config, actorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), Option.apply("jm"), Option.apply("arch"), TestingJobManager.class, TestingMemoryArchivist.class);
        jobManager = new AkkaActorGateway(master._1(), null);
        archiver = new AkkaActorGateway(master._2(), null);
        ActorRef taskManagerRef = TaskManager.startTaskManagerComponentsAndActor(config, ResourceID.generate(), actorSystem, "localhost", Option.apply("tm"), Option.<LeaderRetrievalService>apply(new StandaloneLeaderRetrievalService(jobManager.path())), true, TestingTaskManager.class);
        taskManager = new AkkaActorGateway(taskManagerRef, null);
        // Wait until connected
        Object msg = new TestingTaskManagerMessages.NotifyWhenRegisteredAtJobManager(jobManager.actor());
        Await.ready(taskManager.ask(msg, timeout), timeout);
        // Create job graph
        JobVertex sourceVertex = new JobVertex("Source");
        sourceVertex.setInvokableClass(BlockingStatefulInvokable.class);
        sourceVertex.setParallelism(1);
        JobGraph jobGraph = new JobGraph("TestingJob", sourceVertex);
        JobSnapshottingSettings snapshottingSettings = new JobSnapshottingSettings(Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), 3600000, 3600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true);
        jobGraph.setSnapshotSettings(snapshottingSettings);
        // Submit job graph
        msg = new JobManagerMessages.SubmitJob(jobGraph, ListeningBehaviour.DETACHED);
        Await.result(jobManager.ask(msg, timeout), timeout);
        // Wait for all tasks to be running
        msg = new TestingJobManagerMessages.WaitForAllVerticesToBeRunning(jobGraph.getJobID());
        Await.result(jobManager.ask(msg, timeout), timeout);
        // Notify when canelled
        msg = new NotifyWhenJobStatus(jobGraph.getJobID(), JobStatus.CANCELED);
        Future<Object> cancelled = jobManager.ask(msg, timeout);
        // Cancel with savepoint
        String savepointPath = null;
        for (int i = 0; i < 10; i++) {
            msg = new JobManagerMessages.CancelJobWithSavepoint(jobGraph.getJobID(), null);
            CancellationResponse cancelResp = (CancellationResponse) Await.result(jobManager.ask(msg, timeout), timeout);
            if (cancelResp instanceof CancellationFailure) {
                CancellationFailure failure = (CancellationFailure) cancelResp;
                if (failure.cause().getMessage().contains(CheckpointDeclineReason.NOT_ALL_REQUIRED_TASKS_RUNNING.message())) {
                    // wait and retry
                    Thread.sleep(200);
                } else {
                    failure.cause().printStackTrace();
                    fail("Failed to cancel job: " + failure.cause().getMessage());
                }
            } else {
                savepointPath = ((CancellationSuccess) cancelResp).savepointPath();
                break;
            }
        }
        // Verify savepoint path
        assertNotEquals("Savepoint not triggered", null, savepointPath);
        // Wait for job status change
        Await.ready(cancelled, timeout);
        File savepointFile = new File(savepointPath);
        assertEquals(true, savepointFile.exists());
    } finally {
        if (actorSystem != null) {
            actorSystem.shutdown();
        }
        if (archiver != null) {
            archiver.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
        }
        if (jobManager != null) {
            jobManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
        }
        if (taskManager != null) {
            taskManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
        }
    }
}
Also used : ActorSystem(akka.actor.ActorSystem) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) WaitForAllVerticesToBeRunning(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunning) JobSnapshottingSettings(org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) FiniteDuration(scala.concurrent.duration.FiniteDuration) SubmitJob(org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob) TriggerSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) StandaloneLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.StandaloneLeaderRetrievalService) CancellationFailure(org.apache.flink.runtime.messages.JobManagerMessages.CancellationFailure) File(java.io.File) NotifyWhenJobStatus(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.NotifyWhenJobStatus) CancellationResponse(org.apache.flink.runtime.messages.JobManagerMessages.CancellationResponse) Test(org.junit.Test)

Example 65 with ActorSystem

use of akka.actor.ActorSystem in project flink by apache.

the class JobManagerTest method testSavepointWithDeactivatedPeriodicCheckpointing.

/**
	 * Tests that we can trigger a savepoint when periodic checkpoints are disabled.
	 */
@Test
public void testSavepointWithDeactivatedPeriodicCheckpointing() throws Exception {
    File defaultSavepointDir = tmpFolder.newFolder();
    FiniteDuration timeout = new FiniteDuration(30, TimeUnit.SECONDS);
    Configuration config = new Configuration();
    config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, defaultSavepointDir.getAbsolutePath());
    ActorSystem actorSystem = null;
    ActorGateway jobManager = null;
    ActorGateway archiver = null;
    ActorGateway taskManager = null;
    try {
        actorSystem = AkkaUtils.createLocalActorSystem(new Configuration());
        Tuple2<ActorRef, ActorRef> master = JobManager.startJobManagerActors(config, actorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), Option.apply("jm"), Option.apply("arch"), TestingJobManager.class, TestingMemoryArchivist.class);
        jobManager = new AkkaActorGateway(master._1(), null);
        archiver = new AkkaActorGateway(master._2(), null);
        ActorRef taskManagerRef = TaskManager.startTaskManagerComponentsAndActor(config, ResourceID.generate(), actorSystem, "localhost", Option.apply("tm"), Option.<LeaderRetrievalService>apply(new StandaloneLeaderRetrievalService(jobManager.path())), true, TestingTaskManager.class);
        taskManager = new AkkaActorGateway(taskManagerRef, null);
        // Wait until connected
        Object msg = new TestingTaskManagerMessages.NotifyWhenRegisteredAtJobManager(jobManager.actor());
        Await.ready(taskManager.ask(msg, timeout), timeout);
        // Create job graph
        JobVertex sourceVertex = new JobVertex("Source");
        sourceVertex.setInvokableClass(BlockingStatefulInvokable.class);
        sourceVertex.setParallelism(1);
        JobGraph jobGraph = new JobGraph("TestingJob", sourceVertex);
        JobSnapshottingSettings snapshottingSettings = new JobSnapshottingSettings(Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), // deactivated checkpointing
        Long.MAX_VALUE, 360000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true);
        jobGraph.setSnapshotSettings(snapshottingSettings);
        // Submit job graph
        msg = new JobManagerMessages.SubmitJob(jobGraph, ListeningBehaviour.DETACHED);
        Await.result(jobManager.ask(msg, timeout), timeout);
        // Wait for all tasks to be running
        msg = new TestingJobManagerMessages.WaitForAllVerticesToBeRunning(jobGraph.getJobID());
        Await.result(jobManager.ask(msg, timeout), timeout);
        // Cancel with savepoint
        File targetDirectory = tmpFolder.newFolder();
        msg = new TriggerSavepoint(jobGraph.getJobID(), Option.apply(targetDirectory.getAbsolutePath()));
        Future<Object> future = jobManager.ask(msg, timeout);
        Object result = Await.result(future, timeout);
        assertTrue("Did not trigger savepoint", result instanceof TriggerSavepointSuccess);
        assertEquals(1, targetDirectory.listFiles().length);
    } finally {
        if (actorSystem != null) {
            actorSystem.shutdown();
        }
        if (archiver != null) {
            archiver.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
        }
        if (jobManager != null) {
            jobManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
        }
        if (taskManager != null) {
            taskManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
        }
    }
}
Also used : ActorSystem(akka.actor.ActorSystem) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) JobSnapshottingSettings(org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) FiniteDuration(scala.concurrent.duration.FiniteDuration) SubmitJob(org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob) TriggerSavepointSuccess(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepointSuccess) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) StandaloneLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.StandaloneLeaderRetrievalService) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) TriggerSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint) File(java.io.File) WaitForAllVerticesToBeRunning(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunning) Test(org.junit.Test)

Aggregations

ActorSystem (akka.actor.ActorSystem)91 ActorRef (akka.actor.ActorRef)54 Test (org.junit.Test)51 Configuration (org.apache.flink.configuration.Configuration)27 FiniteDuration (scala.concurrent.duration.FiniteDuration)12 File (java.io.File)11 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)11 LeaderRetrievalService (org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService)11 Props (akka.actor.Props)10 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)10 TestActorRef (akka.testkit.TestActorRef)8 IOException (java.io.IOException)8 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)8 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)8 Deadline (scala.concurrent.duration.Deadline)8 AddressFromURIString (akka.actor.AddressFromURIString)7 ActorMaterializer (akka.stream.ActorMaterializer)7 Materializer (akka.stream.Materializer)7 Sink (akka.stream.javadsl.Sink)7 Source (akka.stream.javadsl.Source)7