use of scala.concurrent.duration.FiniteDuration in project flink by apache.
the class AbstractTaskManagerProcessFailureRecoveryTest method waitUntilNumTaskManagersAreRegistered.
protected void waitUntilNumTaskManagersAreRegistered(ActorRef jobManager, int numExpected, long maxDelayMillis) throws Exception {
// 10 ms = 10,000,000 nanos
final long pollInterval = 10_000_000;
final long deadline = System.nanoTime() + maxDelayMillis * 1_000_000;
long time;
while ((time = System.nanoTime()) < deadline) {
FiniteDuration timeout = new FiniteDuration(pollInterval, TimeUnit.NANOSECONDS);
try {
Future<?> result = Patterns.ask(jobManager, JobManagerMessages.getRequestNumberRegisteredTaskManager(), new Timeout(timeout));
int numTMs = (Integer) Await.result(result, timeout);
if (numTMs == numExpected) {
return;
}
} catch (TimeoutException e) {
// ignore and retry
} catch (ClassCastException e) {
fail("Wrong response: " + e.getMessage());
}
long timePassed = System.nanoTime() - time;
long remainingMillis = (pollInterval - timePassed) / 1_000_000;
if (remainingMillis > 0) {
Thread.sleep(remainingMillis);
}
}
fail("The TaskManagers did not register within the expected time (" + maxDelayMillis + "msecs)");
}
use of scala.concurrent.duration.FiniteDuration in project flink by apache.
the class WebFrontendITCase method testStopYarn.
@Test
public void testStopYarn() throws Exception {
// this only works if there is no active job at this point
assertTrue(cluster.getCurrentlyRunningJobsJava().isEmpty());
// Create a task
final JobVertex sender = new JobVertex("Sender");
sender.setParallelism(2);
sender.setInvokableClass(StoppableInvokable.class);
final JobGraph jobGraph = new JobGraph("Stoppable streaming test job", sender);
final JobID jid = jobGraph.getJobID();
cluster.submitJobDetached(jobGraph);
// wait for job to show up
while (cluster.getCurrentlyRunningJobsJava().isEmpty()) {
Thread.sleep(10);
}
final FiniteDuration testTimeout = new FiniteDuration(2, TimeUnit.MINUTES);
final Deadline deadline = testTimeout.fromNow();
while (!cluster.getCurrentlyRunningJobsJava().isEmpty()) {
try (HttpTestClient client = new HttpTestClient("localhost", port)) {
// Request the file from the web server
client.sendGetRequest("/jobs/" + jid + "/yarn-stop", deadline.timeLeft());
HttpTestClient.SimpleHttpResponse response = client.getNextResponse(deadline.timeLeft());
assertEquals(HttpResponseStatus.OK, response.getStatus());
assertEquals(response.getType(), MimeTypes.getMimeTypeForExtension("json"));
assertEquals("{}", response.getContent());
}
Thread.sleep(20);
}
}
use of scala.concurrent.duration.FiniteDuration in project flink by apache.
the class TaskManagerProcessReapingTestBase method testReapProcessOnFailure.
@Test
public void testReapProcessOnFailure() {
Process taskManagerProcess = null;
ActorSystem jmActorSystem = null;
final StringWriter processOutput = new StringWriter();
try {
String javaCommand = getJavaCommandPath();
// is available on this machine
if (javaCommand == null) {
System.out.println("---- Skipping TaskManagerProcessReapingTest : Could not find java executable ----");
return;
}
// create a logging file for the process
File tempLogFile = File.createTempFile("testlogconfig", "properties");
tempLogFile.deleteOnExit();
CommonTestUtils.printLog4jDebugConfig(tempLogFile);
final int jobManagerPort = NetUtils.getAvailablePort();
// start a JobManager
Tuple2<String, Object> localAddress = new Tuple2<String, Object>("localhost", jobManagerPort);
jmActorSystem = AkkaUtils.createActorSystem(new Configuration(), new Some<Tuple2<String, Object>>(localAddress));
ActorRef jmActor = JobManager.startJobManagerActors(new Configuration(), jmActorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), JobManager.class, MemoryArchivist.class)._1;
// start a ResourceManager
StandaloneLeaderRetrievalService standaloneLeaderRetrievalService = new StandaloneLeaderRetrievalService(AkkaUtils.getAkkaURL(jmActorSystem, jmActor));
FlinkResourceManager.startResourceManagerActors(new Configuration(), jmActorSystem, standaloneLeaderRetrievalService, StandaloneResourceManager.class);
final int taskManagerPort = NetUtils.getAvailablePort();
// start the task manager process
String[] command = new String[] { javaCommand, "-Dlog.level=DEBUG", "-Dlog4j.configuration=file:" + tempLogFile.getAbsolutePath(), "-Xms256m", "-Xmx256m", "-classpath", getCurrentClasspath(), TaskManagerTestEntryPoint.class.getName(), String.valueOf(jobManagerPort), String.valueOf(taskManagerPort) };
ProcessBuilder bld = new ProcessBuilder(command);
taskManagerProcess = bld.start();
new PipeForwarder(taskManagerProcess.getErrorStream(), processOutput);
// grab the reference to the TaskManager. try multiple times, until the process
// is started and the TaskManager is up
String taskManagerActorName = String.format("akka.tcp://flink@%s/user/%s", "localhost:" + taskManagerPort, TaskManager.TASK_MANAGER_NAME());
ActorRef taskManagerRef = null;
Throwable lastError = null;
for (int i = 0; i < 40; i++) {
try {
taskManagerRef = TaskManager.getTaskManagerRemoteReference(taskManagerActorName, jmActorSystem, new FiniteDuration(25, TimeUnit.SECONDS));
break;
} catch (Throwable t) {
// TaskManager probably not ready yet
lastError = t;
}
Thread.sleep(500);
}
assertTrue("TaskManager process died", isProcessAlive(taskManagerProcess));
if (taskManagerRef == null) {
if (lastError != null) {
lastError.printStackTrace();
}
fail("TaskManager process did not launch the TaskManager properly. Failed to look up " + taskManagerActorName);
}
// kill the TaskManager actor
onTaskManagerProcessRunning(taskManagerRef);
// wait for max 5 seconds for the process to terminate
{
long now = System.currentTimeMillis();
long deadline = now + 10000;
while (now < deadline && isProcessAlive(taskManagerProcess)) {
Thread.sleep(100);
now = System.currentTimeMillis();
}
}
assertFalse("TaskManager process did not terminate upon actor death", isProcessAlive(taskManagerProcess));
int returnCode = taskManagerProcess.exitValue();
assertEquals("TaskManager died, but not because of the process reaper", TaskManager.RUNTIME_FAILURE_RETURN_CODE(), returnCode);
onTaskManagerProcessTerminated(processOutput.toString());
} catch (Exception e) {
e.printStackTrace();
printProcessLog(processOutput.toString());
fail(e.getMessage());
} catch (Error e) {
e.printStackTrace();
printProcessLog(processOutput.toString());
throw e;
} finally {
if (taskManagerProcess != null) {
taskManagerProcess.destroy();
}
if (jmActorSystem != null) {
jmActorSystem.shutdown();
}
}
}
use of scala.concurrent.duration.FiniteDuration in project flink by apache.
the class TaskManagerRegistrationTest method testTaskManagerNoExcessiveRegistrationMessages.
/**
* Tests that the TaskManager does not send an excessive amount of registration messages to
* the job manager if its registration was rejected.
*/
@Test
public void testTaskManagerNoExcessiveRegistrationMessages() throws Exception {
new JavaTestKit(actorSystem) {
{
ActorGateway jm = null;
ActorGateway taskManager = null;
try {
FiniteDuration timeout = new FiniteDuration(5, TimeUnit.SECONDS);
jm = TestingUtils.createForwardingActor(actorSystem, getTestActor(), Option.<String>empty());
final ActorGateway jmGateway = jm;
long refusedRegistrationPause = 500;
long initialRegistrationPause = 100;
long maxDelay = 30000;
Configuration tmConfig = new Configuration(config);
tmConfig.setString(ConfigConstants.TASK_MANAGER_REFUSED_REGISTRATION_PAUSE, refusedRegistrationPause + " ms");
tmConfig.setString(ConfigConstants.TASK_MANAGER_INITIAL_REGISTRATION_PAUSE, initialRegistrationPause + " ms");
// we make the test actor (the test kit) the JobManager to intercept
// the messages
taskManager = createTaskManager(actorSystem, jmGateway, tmConfig, true, false);
final ActorGateway taskManagerGateway = taskManager;
final Deadline deadline = timeout.fromNow();
try {
while (deadline.hasTimeLeft()) {
// the TaskManager should try to register
expectMsgClass(deadline.timeLeft(), RegisterTaskManager.class);
// we decline the registration
taskManagerGateway.tell(new RefuseRegistration(new Exception("test reason")), jmGateway);
}
} catch (AssertionError error) {
// ignore since it simply means that we have used up all our time
}
RegisterTaskManager[] registerTaskManagerMessages = new ReceiveWhile<RegisterTaskManager>(RegisterTaskManager.class, timeout) {
@Override
protected RegisterTaskManager match(Object msg) throws Exception {
if (msg instanceof RegisterTaskManager) {
return (RegisterTaskManager) msg;
} else {
throw noMatch();
}
}
}.get();
int maxExponent = (int) Math.floor(Math.log(((double) maxDelay / initialRegistrationPause + 1)) / Math.log(2));
int exponent = (int) Math.ceil(Math.log(((double) timeout.toMillis() / initialRegistrationPause + 1)) / Math.log(2));
int exp = Math.min(maxExponent, exponent);
long difference = timeout.toMillis() - (initialRegistrationPause * (1 << exp));
int numberRegisterTaskManagerMessages = exp;
if (difference > 0) {
numberRegisterTaskManagerMessages += Math.ceil((double) difference / maxDelay);
}
int maxExpectedNumberOfRegisterTaskManagerMessages = numberRegisterTaskManagerMessages * 2;
assertTrue("The number of RegisterTaskManager messages #" + registerTaskManagerMessages.length + " should be less than #" + maxExpectedNumberOfRegisterTaskManagerMessages, registerTaskManagerMessages.length <= maxExpectedNumberOfRegisterTaskManagerMessages);
} finally {
stopActor(taskManager);
stopActor(jm);
}
}
};
}
use of scala.concurrent.duration.FiniteDuration in project flink by apache.
the class TaskManagerRegistrationTest method testDelayedRegistration.
/**
* A test that verifies that two TaskManagers correctly register at the
* JobManager.
*/
@Test
public void testDelayedRegistration() {
new JavaTestKit(actorSystem) {
{
ActorGateway jobManager = null;
ActorGateway taskManager = null;
FiniteDuration delayedTimeout = timeout.$times(3);
try {
// start a TaskManager that tries to register at the JobManager before the JobManager is
// available. we give it the regular JobManager akka URL
taskManager = createTaskManager(actorSystem, JobManager.getLocalJobManagerAkkaURL(Option.<String>empty()), new Configuration(), true, false);
// let it try for a bit
Thread.sleep(6000);
// now start the JobManager, with the regular akka URL
jobManager = createJobManager(actorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), new Configuration());
startResourceManager(config, jobManager.actor());
startResourceManager(config, jobManager.actor());
// check that the TaskManagers are registered
Future<Object> responseFuture = taskManager.ask(TaskManagerMessages.getNotifyWhenRegisteredAtJobManagerMessage(), delayedTimeout);
Object response = Await.result(responseFuture, delayedTimeout);
// this is a hack to work around the way Java can interact with scala case objects
Class<?> confirmClass = TaskManagerMessages.getRegisteredAtJobManagerMessage().getClass();
assertTrue(response != null && confirmClass.isAssignableFrom(response.getClass()));
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
stopActor(taskManager);
stopActor(jobManager);
}
}
};
}
Aggregations