Search in sources :

Example 11 with FiniteDuration

use of scala.concurrent.duration.FiniteDuration in project flink by apache.

the class AbstractTaskManagerProcessFailureRecoveryTest method waitUntilNumTaskManagersAreRegistered.

protected void waitUntilNumTaskManagersAreRegistered(ActorRef jobManager, int numExpected, long maxDelayMillis) throws Exception {
    // 10 ms = 10,000,000 nanos
    final long pollInterval = 10_000_000;
    final long deadline = System.nanoTime() + maxDelayMillis * 1_000_000;
    long time;
    while ((time = System.nanoTime()) < deadline) {
        FiniteDuration timeout = new FiniteDuration(pollInterval, TimeUnit.NANOSECONDS);
        try {
            Future<?> result = Patterns.ask(jobManager, JobManagerMessages.getRequestNumberRegisteredTaskManager(), new Timeout(timeout));
            int numTMs = (Integer) Await.result(result, timeout);
            if (numTMs == numExpected) {
                return;
            }
        } catch (TimeoutException e) {
        // ignore and retry
        } catch (ClassCastException e) {
            fail("Wrong response: " + e.getMessage());
        }
        long timePassed = System.nanoTime() - time;
        long remainingMillis = (pollInterval - timePassed) / 1_000_000;
        if (remainingMillis > 0) {
            Thread.sleep(remainingMillis);
        }
    }
    fail("The TaskManagers did not register within the expected time (" + maxDelayMillis + "msecs)");
}
Also used : Timeout(akka.util.Timeout) FiniteDuration(scala.concurrent.duration.FiniteDuration) TimeoutException(java.util.concurrent.TimeoutException)

Example 12 with FiniteDuration

use of scala.concurrent.duration.FiniteDuration in project flink by apache.

the class WebFrontendITCase method testStopYarn.

@Test
public void testStopYarn() throws Exception {
    // this only works if there is no active job at this point
    assertTrue(cluster.getCurrentlyRunningJobsJava().isEmpty());
    // Create a task
    final JobVertex sender = new JobVertex("Sender");
    sender.setParallelism(2);
    sender.setInvokableClass(StoppableInvokable.class);
    final JobGraph jobGraph = new JobGraph("Stoppable streaming test job", sender);
    final JobID jid = jobGraph.getJobID();
    cluster.submitJobDetached(jobGraph);
    // wait for job to show up
    while (cluster.getCurrentlyRunningJobsJava().isEmpty()) {
        Thread.sleep(10);
    }
    final FiniteDuration testTimeout = new FiniteDuration(2, TimeUnit.MINUTES);
    final Deadline deadline = testTimeout.fromNow();
    while (!cluster.getCurrentlyRunningJobsJava().isEmpty()) {
        try (HttpTestClient client = new HttpTestClient("localhost", port)) {
            // Request the file from the web server
            client.sendGetRequest("/jobs/" + jid + "/yarn-stop", deadline.timeLeft());
            HttpTestClient.SimpleHttpResponse response = client.getNextResponse(deadline.timeLeft());
            assertEquals(HttpResponseStatus.OK, response.getStatus());
            assertEquals(response.getType(), MimeTypes.getMimeTypeForExtension("json"));
            assertEquals("{}", response.getContent());
        }
        Thread.sleep(20);
    }
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) HttpTestClient(org.apache.flink.runtime.webmonitor.testutils.HttpTestClient) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 13 with FiniteDuration

use of scala.concurrent.duration.FiniteDuration in project flink by apache.

the class TaskManagerProcessReapingTestBase method testReapProcessOnFailure.

@Test
public void testReapProcessOnFailure() {
    Process taskManagerProcess = null;
    ActorSystem jmActorSystem = null;
    final StringWriter processOutput = new StringWriter();
    try {
        String javaCommand = getJavaCommandPath();
        // is available on this machine
        if (javaCommand == null) {
            System.out.println("---- Skipping TaskManagerProcessReapingTest : Could not find java executable ----");
            return;
        }
        // create a logging file for the process
        File tempLogFile = File.createTempFile("testlogconfig", "properties");
        tempLogFile.deleteOnExit();
        CommonTestUtils.printLog4jDebugConfig(tempLogFile);
        final int jobManagerPort = NetUtils.getAvailablePort();
        // start a JobManager
        Tuple2<String, Object> localAddress = new Tuple2<String, Object>("localhost", jobManagerPort);
        jmActorSystem = AkkaUtils.createActorSystem(new Configuration(), new Some<Tuple2<String, Object>>(localAddress));
        ActorRef jmActor = JobManager.startJobManagerActors(new Configuration(), jmActorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), JobManager.class, MemoryArchivist.class)._1;
        // start a ResourceManager
        StandaloneLeaderRetrievalService standaloneLeaderRetrievalService = new StandaloneLeaderRetrievalService(AkkaUtils.getAkkaURL(jmActorSystem, jmActor));
        FlinkResourceManager.startResourceManagerActors(new Configuration(), jmActorSystem, standaloneLeaderRetrievalService, StandaloneResourceManager.class);
        final int taskManagerPort = NetUtils.getAvailablePort();
        // start the task manager process
        String[] command = new String[] { javaCommand, "-Dlog.level=DEBUG", "-Dlog4j.configuration=file:" + tempLogFile.getAbsolutePath(), "-Xms256m", "-Xmx256m", "-classpath", getCurrentClasspath(), TaskManagerTestEntryPoint.class.getName(), String.valueOf(jobManagerPort), String.valueOf(taskManagerPort) };
        ProcessBuilder bld = new ProcessBuilder(command);
        taskManagerProcess = bld.start();
        new PipeForwarder(taskManagerProcess.getErrorStream(), processOutput);
        // grab the reference to the TaskManager. try multiple times, until the process
        // is started and the TaskManager is up
        String taskManagerActorName = String.format("akka.tcp://flink@%s/user/%s", "localhost:" + taskManagerPort, TaskManager.TASK_MANAGER_NAME());
        ActorRef taskManagerRef = null;
        Throwable lastError = null;
        for (int i = 0; i < 40; i++) {
            try {
                taskManagerRef = TaskManager.getTaskManagerRemoteReference(taskManagerActorName, jmActorSystem, new FiniteDuration(25, TimeUnit.SECONDS));
                break;
            } catch (Throwable t) {
                // TaskManager probably not ready yet
                lastError = t;
            }
            Thread.sleep(500);
        }
        assertTrue("TaskManager process died", isProcessAlive(taskManagerProcess));
        if (taskManagerRef == null) {
            if (lastError != null) {
                lastError.printStackTrace();
            }
            fail("TaskManager process did not launch the TaskManager properly. Failed to look up " + taskManagerActorName);
        }
        // kill the TaskManager actor
        onTaskManagerProcessRunning(taskManagerRef);
        // wait for max 5 seconds for the process to terminate
        {
            long now = System.currentTimeMillis();
            long deadline = now + 10000;
            while (now < deadline && isProcessAlive(taskManagerProcess)) {
                Thread.sleep(100);
                now = System.currentTimeMillis();
            }
        }
        assertFalse("TaskManager process did not terminate upon actor death", isProcessAlive(taskManagerProcess));
        int returnCode = taskManagerProcess.exitValue();
        assertEquals("TaskManager died, but not because of the process reaper", TaskManager.RUNTIME_FAILURE_RETURN_CODE(), returnCode);
        onTaskManagerProcessTerminated(processOutput.toString());
    } catch (Exception e) {
        e.printStackTrace();
        printProcessLog(processOutput.toString());
        fail(e.getMessage());
    } catch (Error e) {
        e.printStackTrace();
        printProcessLog(processOutput.toString());
        throw e;
    } finally {
        if (taskManagerProcess != null) {
            taskManagerProcess.destroy();
        }
        if (jmActorSystem != null) {
            jmActorSystem.shutdown();
        }
    }
}
Also used : ActorSystem(akka.actor.ActorSystem) MemoryArchivist(org.apache.flink.runtime.jobmanager.MemoryArchivist) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) FiniteDuration(scala.concurrent.duration.FiniteDuration) JobManager(org.apache.flink.runtime.jobmanager.JobManager) IOException(java.io.IOException) Some(scala.Some) StringWriter(java.io.StringWriter) Tuple2(scala.Tuple2) StandaloneLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.StandaloneLeaderRetrievalService) File(java.io.File) Test(org.junit.Test)

Example 14 with FiniteDuration

use of scala.concurrent.duration.FiniteDuration in project flink by apache.

the class TaskManagerRegistrationTest method testTaskManagerNoExcessiveRegistrationMessages.

/**
	 * Tests that the TaskManager does not send an excessive amount of registration messages to
	 * the job manager if its registration was rejected.
	 */
@Test
public void testTaskManagerNoExcessiveRegistrationMessages() throws Exception {
    new JavaTestKit(actorSystem) {

        {
            ActorGateway jm = null;
            ActorGateway taskManager = null;
            try {
                FiniteDuration timeout = new FiniteDuration(5, TimeUnit.SECONDS);
                jm = TestingUtils.createForwardingActor(actorSystem, getTestActor(), Option.<String>empty());
                final ActorGateway jmGateway = jm;
                long refusedRegistrationPause = 500;
                long initialRegistrationPause = 100;
                long maxDelay = 30000;
                Configuration tmConfig = new Configuration(config);
                tmConfig.setString(ConfigConstants.TASK_MANAGER_REFUSED_REGISTRATION_PAUSE, refusedRegistrationPause + " ms");
                tmConfig.setString(ConfigConstants.TASK_MANAGER_INITIAL_REGISTRATION_PAUSE, initialRegistrationPause + " ms");
                // we make the test actor (the test kit) the JobManager to intercept
                // the messages
                taskManager = createTaskManager(actorSystem, jmGateway, tmConfig, true, false);
                final ActorGateway taskManagerGateway = taskManager;
                final Deadline deadline = timeout.fromNow();
                try {
                    while (deadline.hasTimeLeft()) {
                        // the TaskManager should try to register
                        expectMsgClass(deadline.timeLeft(), RegisterTaskManager.class);
                        // we decline the registration
                        taskManagerGateway.tell(new RefuseRegistration(new Exception("test reason")), jmGateway);
                    }
                } catch (AssertionError error) {
                // ignore since it simply means that we have used up all our time
                }
                RegisterTaskManager[] registerTaskManagerMessages = new ReceiveWhile<RegisterTaskManager>(RegisterTaskManager.class, timeout) {

                    @Override
                    protected RegisterTaskManager match(Object msg) throws Exception {
                        if (msg instanceof RegisterTaskManager) {
                            return (RegisterTaskManager) msg;
                        } else {
                            throw noMatch();
                        }
                    }
                }.get();
                int maxExponent = (int) Math.floor(Math.log(((double) maxDelay / initialRegistrationPause + 1)) / Math.log(2));
                int exponent = (int) Math.ceil(Math.log(((double) timeout.toMillis() / initialRegistrationPause + 1)) / Math.log(2));
                int exp = Math.min(maxExponent, exponent);
                long difference = timeout.toMillis() - (initialRegistrationPause * (1 << exp));
                int numberRegisterTaskManagerMessages = exp;
                if (difference > 0) {
                    numberRegisterTaskManagerMessages += Math.ceil((double) difference / maxDelay);
                }
                int maxExpectedNumberOfRegisterTaskManagerMessages = numberRegisterTaskManagerMessages * 2;
                assertTrue("The number of RegisterTaskManager messages #" + registerTaskManagerMessages.length + " should be less than #" + maxExpectedNumberOfRegisterTaskManagerMessages, registerTaskManagerMessages.length <= maxExpectedNumberOfRegisterTaskManagerMessages);
            } finally {
                stopActor(taskManager);
                stopActor(jm);
            }
        }
    };
}
Also used : RegisterTaskManager(org.apache.flink.runtime.messages.RegistrationMessages.RegisterTaskManager) Configuration(org.apache.flink.configuration.Configuration) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) RefuseRegistration(org.apache.flink.runtime.messages.RegistrationMessages.RefuseRegistration) InvalidActorNameException(akka.actor.InvalidActorNameException) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) JavaTestKit(akka.testkit.JavaTestKit) Test(org.junit.Test)

Example 15 with FiniteDuration

use of scala.concurrent.duration.FiniteDuration in project flink by apache.

the class TaskManagerRegistrationTest method testDelayedRegistration.

/**
	 * A test that verifies that two TaskManagers correctly register at the
	 * JobManager.
	 */
@Test
public void testDelayedRegistration() {
    new JavaTestKit(actorSystem) {

        {
            ActorGateway jobManager = null;
            ActorGateway taskManager = null;
            FiniteDuration delayedTimeout = timeout.$times(3);
            try {
                // start a TaskManager that tries to register at the JobManager before the JobManager is
                // available. we give it the regular JobManager akka URL
                taskManager = createTaskManager(actorSystem, JobManager.getLocalJobManagerAkkaURL(Option.<String>empty()), new Configuration(), true, false);
                // let it try for a bit
                Thread.sleep(6000);
                // now start the JobManager, with the regular akka URL
                jobManager = createJobManager(actorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), new Configuration());
                startResourceManager(config, jobManager.actor());
                startResourceManager(config, jobManager.actor());
                // check that the TaskManagers are registered
                Future<Object> responseFuture = taskManager.ask(TaskManagerMessages.getNotifyWhenRegisteredAtJobManagerMessage(), delayedTimeout);
                Object response = Await.result(responseFuture, delayedTimeout);
                // this is a hack to work around the way Java can interact with scala case objects
                Class<?> confirmClass = TaskManagerMessages.getRegisteredAtJobManagerMessage().getClass();
                assertTrue(response != null && confirmClass.isAssignableFrom(response.getClass()));
            } catch (Exception e) {
                e.printStackTrace();
                fail(e.getMessage());
            } finally {
                stopActor(taskManager);
                stopActor(jobManager);
            }
        }
    };
}
Also used : Configuration(org.apache.flink.configuration.Configuration) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) FiniteDuration(scala.concurrent.duration.FiniteDuration) JavaTestKit(akka.testkit.JavaTestKit) InvalidActorNameException(akka.actor.InvalidActorNameException) Test(org.junit.Test)

Aggregations

FiniteDuration (scala.concurrent.duration.FiniteDuration)77 Test (org.junit.Test)61 Configuration (org.apache.flink.configuration.Configuration)37 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)30 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)27 ActorRef (akka.actor.ActorRef)25 Deadline (scala.concurrent.duration.Deadline)24 JobID (org.apache.flink.api.common.JobID)19 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)19 JobManagerMessages (org.apache.flink.runtime.messages.JobManagerMessages)17 TestingJobManagerMessages (org.apache.flink.runtime.testingUtils.TestingJobManagerMessages)13 ActorSystem (akka.actor.ActorSystem)12 JavaTestKit (akka.testkit.JavaTestKit)11 Timeout (akka.util.Timeout)11 File (java.io.File)11 TimeoutException (java.util.concurrent.TimeoutException)11 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)11 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)11 Props (akka.actor.Props)10 IOException (java.io.IOException)10