use of scala.concurrent.duration.FiniteDuration in project flink by apache.
the class ZooKeeperLeaderElectionTest method testZooKeeperReelection.
/**
* Tests repeatedly the reelection of still available LeaderContender. After a contender has
* been elected as the leader, it is removed. This forces the ZooKeeperLeaderElectionService
* to elect a new leader.
*/
@Test
public void testZooKeeperReelection() throws Exception {
Configuration configuration = new Configuration();
configuration.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, testingServer.getConnectString());
configuration.setString(HighAvailabilityOptions.HA_MODE, "zookeeper");
Deadline deadline = new FiniteDuration(5, TimeUnit.MINUTES).fromNow();
int num = 20;
ZooKeeperLeaderElectionService[] leaderElectionService = new ZooKeeperLeaderElectionService[num];
TestingContender[] contenders = new TestingContender[num];
ZooKeeperLeaderRetrievalService leaderRetrievalService = null;
TestingListener listener = new TestingListener();
try {
leaderRetrievalService = ZooKeeperUtils.createLeaderRetrievalService(configuration);
LOG.debug("Start leader retrieval service for the TestingListener.");
leaderRetrievalService.start(listener);
for (int i = 0; i < num; i++) {
leaderElectionService[i] = ZooKeeperUtils.createLeaderElectionService(configuration);
contenders[i] = new TestingContender(TEST_URL + "_" + i, leaderElectionService[i]);
LOG.debug("Start leader election service for contender #{}.", i);
leaderElectionService[i].start(contenders[i]);
}
String pattern = TEST_URL + "_" + "(\\d+)";
Pattern regex = Pattern.compile(pattern);
int numberSeenLeaders = 0;
while (deadline.hasTimeLeft() && numberSeenLeaders < num) {
LOG.debug("Wait for new leader #{}.", numberSeenLeaders);
String address = listener.waitForNewLeader(deadline.timeLeft().toMillis());
Matcher m = regex.matcher(address);
if (m.find()) {
int index = Integer.parseInt(m.group(1));
TestingContender contender = contenders[index];
// check that the retrieval service has retrieved the correct leader
if (address.equals(contender.getAddress()) && listener.getLeaderSessionID().equals(contender.getLeaderSessionID())) {
// kill the election service of the leader
LOG.debug("Stop leader election service of contender #{}.", numberSeenLeaders);
leaderElectionService[index].stop();
leaderElectionService[index] = null;
numberSeenLeaders++;
}
} else {
fail("Did not find the leader's index.");
}
}
assertFalse(deadline.isOverdue());
assertEquals(num, numberSeenLeaders);
} finally {
if (leaderRetrievalService != null) {
leaderRetrievalService.stop();
}
for (ZooKeeperLeaderElectionService electionService : leaderElectionService) {
if (electionService != null) {
electionService.stop();
}
}
}
}
use of scala.concurrent.duration.FiniteDuration in project flink by apache.
the class ZooKeeperLeaderRetrievalTest method testConnectingAddressRetrievalWithDelayedLeaderElection.
/**
* Tests that LeaderRetrievalUtils.findConnectingAdress finds the correct connecting address
* in case of an old leader address in ZooKeeper and a subsequent election of a new leader.
* The findConnectingAddress should block until the new leader has been elected and his
* address has been written to ZooKeeper.
*/
@Test
public void testConnectingAddressRetrievalWithDelayedLeaderElection() throws Exception {
FiniteDuration timeout = new FiniteDuration(1, TimeUnit.MINUTES);
Configuration config = new Configuration();
long sleepingTime = 1000;
config.setString(HighAvailabilityOptions.HA_MODE, "zookeeper");
config.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, testingServer.getConnectString());
LeaderElectionService leaderElectionService = null;
LeaderElectionService faultyLeaderElectionService;
ServerSocket serverSocket;
InetAddress localHost;
Thread thread;
CuratorFramework[] client = new CuratorFramework[2];
try {
client[0] = ZooKeeperUtils.startCuratorFramework(config);
client[1] = ZooKeeperUtils.startCuratorFramework(config);
String wrongHostPort = NetUtils.unresolvedHostAndPortToNormalizedString("1.1.1.1", 1234);
String wrongAddress = JobManager.getRemoteJobManagerAkkaURL(AkkaUtils.getAkkaProtocol(config), wrongHostPort, Option.<String>empty());
try {
localHost = InetAddress.getLocalHost();
serverSocket = new ServerSocket(0, 50, localHost);
} catch (UnknownHostException e) {
// may happen if disconnected. skip test.
System.err.println("Skipping 'testNetworkInterfaceSelection' test.");
return;
} catch (IOException e) {
// may happen in certain test setups, skip test.
System.err.println("Skipping 'testNetworkInterfaceSelection' test.");
return;
}
InetSocketAddress correctInetSocketAddress = new InetSocketAddress(localHost, serverSocket.getLocalPort());
String hostPort = NetUtils.unresolvedHostAndPortToNormalizedString(localHost.getHostName(), correctInetSocketAddress.getPort());
String correctAddress = JobManager.getRemoteJobManagerAkkaURL(AkkaUtils.getAkkaProtocol(config), hostPort, Option.<String>empty());
faultyLeaderElectionService = ZooKeeperUtils.createLeaderElectionService(client[0], config);
TestingContender wrongLeaderAddressContender = new TestingContender(wrongAddress, faultyLeaderElectionService);
faultyLeaderElectionService.start(wrongLeaderAddressContender);
FindConnectingAddress findConnectingAddress = new FindConnectingAddress(config, timeout);
thread = new Thread(findConnectingAddress);
thread.start();
leaderElectionService = ZooKeeperUtils.createLeaderElectionService(client[1], config);
TestingContender correctLeaderAddressContender = new TestingContender(correctAddress, leaderElectionService);
Thread.sleep(sleepingTime);
faultyLeaderElectionService.stop();
leaderElectionService.start(correctLeaderAddressContender);
thread.join();
InetAddress result = findConnectingAddress.getInetAddress();
// check that we can connect to the localHost
Socket socket = new Socket();
try {
// port 0 = let the OS choose the port
SocketAddress bindP = new InetSocketAddress(result, 0);
// machine
socket.bind(bindP);
socket.connect(correctInetSocketAddress, 1000);
} finally {
socket.close();
}
} finally {
if (leaderElectionService != null) {
leaderElectionService.stop();
}
if (client[0] != null) {
client[0].close();
}
if (client[1] != null) {
client[1].close();
}
}
}
use of scala.concurrent.duration.FiniteDuration in project flink by apache.
the class ZooKeeperLeaderRetrievalTest method testTimeoutOfFindConnectingAddress.
/**
* Tests that the LeaderRetrievalUtils.findConnectingAddress stops trying to find the
* connecting address if no leader address has been specified. The call should return
* then InetAddress.getLocalHost().
*/
@Test
public void testTimeoutOfFindConnectingAddress() throws Exception {
Configuration config = new Configuration();
config.setString(HighAvailabilityOptions.HA_MODE, "zookeeper");
config.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, testingServer.getConnectString());
FiniteDuration timeout = new FiniteDuration(10, TimeUnit.SECONDS);
LeaderRetrievalService leaderRetrievalService = LeaderRetrievalUtils.createLeaderRetrievalService(config);
InetAddress result = LeaderRetrievalUtils.findConnectingAddress(leaderRetrievalService, timeout);
assertEquals(InetAddress.getLocalHost(), result);
}
use of scala.concurrent.duration.FiniteDuration in project flink by apache.
the class OneInputStreamTaskTest method testSnapshottingAndRestoring.
/**
* Tests that the stream operator can snapshot and restore the operator state of chained
* operators
*/
@Test
public void testSnapshottingAndRestoring() throws Exception {
final Deadline deadline = new FiniteDuration(2, TimeUnit.MINUTES).fromNow();
final OneInputStreamTask<String, String> streamTask = new OneInputStreamTask<String, String>();
final OneInputStreamTaskTestHarness<String, String> testHarness = new OneInputStreamTaskTestHarness<String, String>(streamTask, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
testHarness.setupOutputForSingletonOperatorChain();
IdentityKeySelector<String> keySelector = new IdentityKeySelector<>();
testHarness.configureForKeyedStream(keySelector, BasicTypeInfo.STRING_TYPE_INFO);
long checkpointId = 1L;
long checkpointTimestamp = 1L;
long recoveryTimestamp = 3L;
long seed = 2L;
int numberChainedTasks = 11;
StreamConfig streamConfig = testHarness.getStreamConfig();
configureChainedTestingStreamOperator(streamConfig, numberChainedTasks, seed, recoveryTimestamp);
AcknowledgeStreamMockEnvironment env = new AcknowledgeStreamMockEnvironment(testHarness.jobConfig, testHarness.taskConfig, testHarness.executionConfig, testHarness.memorySize, new MockInputSplitProvider(), testHarness.bufferSize);
// reset number of restore calls
TestingStreamOperator.numberRestoreCalls = 0;
testHarness.invoke(env);
testHarness.waitForTaskRunning(deadline.timeLeft().toMillis());
CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, checkpointTimestamp);
while (!streamTask.triggerCheckpoint(checkpointMetaData, CheckpointOptions.forFullCheckpoint())) ;
// since no state was set, there shouldn't be restore calls
assertEquals(0, TestingStreamOperator.numberRestoreCalls);
env.getCheckpointLatch().await();
assertEquals(checkpointId, env.getCheckpointId());
testHarness.endInput();
testHarness.waitForTaskCompletion(deadline.timeLeft().toMillis());
final OneInputStreamTask<String, String> restoredTask = new OneInputStreamTask<String, String>();
restoredTask.setInitialState(new TaskStateHandles(env.getCheckpointStateHandles()));
final OneInputStreamTaskTestHarness<String, String> restoredTaskHarness = new OneInputStreamTaskTestHarness<String, String>(restoredTask, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
restoredTaskHarness.configureForKeyedStream(keySelector, BasicTypeInfo.STRING_TYPE_INFO);
StreamConfig restoredTaskStreamConfig = restoredTaskHarness.getStreamConfig();
configureChainedTestingStreamOperator(restoredTaskStreamConfig, numberChainedTasks, seed, recoveryTimestamp);
TestingStreamOperator.numberRestoreCalls = 0;
restoredTaskHarness.invoke();
restoredTaskHarness.endInput();
restoredTaskHarness.waitForTaskCompletion(deadline.timeLeft().toMillis());
// restore of every chained operator should have been called
assertEquals(numberChainedTasks, TestingStreamOperator.numberRestoreCalls);
TestingStreamOperator.numberRestoreCalls = 0;
}
use of scala.concurrent.duration.FiniteDuration in project flink by apache.
the class JobClient method awaitJobResult.
/**
* Given a JobListeningContext, awaits the result of the job execution that this context is bound to
* @param listeningContext The listening context of the job execution
* @return The result of the execution
* @throws JobExecutionException if anything goes wrong while monitoring the job
*/
public static JobExecutionResult awaitJobResult(JobListeningContext listeningContext) throws JobExecutionException {
final JobID jobID = listeningContext.getJobID();
final ActorRef jobClientActor = listeningContext.getJobClientActor();
final Future<Object> jobSubmissionFuture = listeningContext.getJobResultFuture();
final FiniteDuration askTimeout = listeningContext.getTimeout();
// retrieves class loader if necessary
final ClassLoader classLoader = listeningContext.getClassLoader();
// ping the JobClientActor from time to time to check if it is still running
while (!jobSubmissionFuture.isCompleted()) {
try {
Await.ready(jobSubmissionFuture, askTimeout);
} catch (InterruptedException e) {
throw new JobExecutionException(jobID, "Interrupted while waiting for job completion.");
} catch (TimeoutException e) {
try {
Await.result(Patterns.ask(jobClientActor, // Ping the Actor to see if it is alive
new Identify(true), Timeout.durationToTimeout(askTimeout)), askTimeout);
// we got a reply, continue waiting for the job result
} catch (Exception eInner) {
// thus the health check failed
if (!jobSubmissionFuture.isCompleted()) {
throw new JobExecutionException(jobID, "JobClientActor seems to have died before the JobExecutionResult could be retrieved.", eInner);
}
}
}
}
final Object answer;
try {
// we have already awaited the result, zero time to wait here
answer = Await.result(jobSubmissionFuture, Duration.Zero());
} catch (Throwable throwable) {
throw new JobExecutionException(jobID, "Couldn't retrieve the JobExecutionResult from the JobManager.", throwable);
} finally {
// failsafe shutdown of the client actor
jobClientActor.tell(PoisonPill.getInstance(), ActorRef.noSender());
}
// second block handles the actual response
if (answer instanceof JobManagerMessages.JobResultSuccess) {
LOG.info("Job execution complete");
SerializedJobExecutionResult result = ((JobManagerMessages.JobResultSuccess) answer).result();
if (result != null) {
try {
return result.toJobExecutionResult(classLoader);
} catch (Throwable t) {
throw new JobExecutionException(jobID, "Job was successfully executed but JobExecutionResult could not be deserialized.");
}
} else {
throw new JobExecutionException(jobID, "Job was successfully executed but result contained a null JobExecutionResult.");
}
} else if (answer instanceof JobManagerMessages.JobResultFailure) {
LOG.info("Job execution failed");
SerializedThrowable serThrowable = ((JobManagerMessages.JobResultFailure) answer).cause();
if (serThrowable != null) {
Throwable cause = serThrowable.deserializeError(classLoader);
if (cause instanceof JobExecutionException) {
throw (JobExecutionException) cause;
} else {
throw new JobExecutionException(jobID, "Job execution failed", cause);
}
} else {
throw new JobExecutionException(jobID, "Job execution failed with null as failure cause.");
}
} else if (answer instanceof JobManagerMessages.JobNotFound) {
throw new JobRetrievalException(((JobManagerMessages.JobNotFound) answer).jobID(), "Couldn't retrieve Job " + jobID + " because it was not running.");
} else {
throw new JobExecutionException(jobID, "Unknown answer from JobManager after submitting the job: " + answer);
}
}
Aggregations