Search in sources :

Example 6 with FiniteDuration

use of scala.concurrent.duration.FiniteDuration in project flink by apache.

the class ZooKeeperLeaderElectionTest method testZooKeeperReelection.

/**
	 * Tests repeatedly the reelection of still available LeaderContender. After a contender has
	 * been elected as the leader, it is removed. This forces the ZooKeeperLeaderElectionService
	 * to elect a new leader.
	 */
@Test
public void testZooKeeperReelection() throws Exception {
    Configuration configuration = new Configuration();
    configuration.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, testingServer.getConnectString());
    configuration.setString(HighAvailabilityOptions.HA_MODE, "zookeeper");
    Deadline deadline = new FiniteDuration(5, TimeUnit.MINUTES).fromNow();
    int num = 20;
    ZooKeeperLeaderElectionService[] leaderElectionService = new ZooKeeperLeaderElectionService[num];
    TestingContender[] contenders = new TestingContender[num];
    ZooKeeperLeaderRetrievalService leaderRetrievalService = null;
    TestingListener listener = new TestingListener();
    try {
        leaderRetrievalService = ZooKeeperUtils.createLeaderRetrievalService(configuration);
        LOG.debug("Start leader retrieval service for the TestingListener.");
        leaderRetrievalService.start(listener);
        for (int i = 0; i < num; i++) {
            leaderElectionService[i] = ZooKeeperUtils.createLeaderElectionService(configuration);
            contenders[i] = new TestingContender(TEST_URL + "_" + i, leaderElectionService[i]);
            LOG.debug("Start leader election service for contender #{}.", i);
            leaderElectionService[i].start(contenders[i]);
        }
        String pattern = TEST_URL + "_" + "(\\d+)";
        Pattern regex = Pattern.compile(pattern);
        int numberSeenLeaders = 0;
        while (deadline.hasTimeLeft() && numberSeenLeaders < num) {
            LOG.debug("Wait for new leader #{}.", numberSeenLeaders);
            String address = listener.waitForNewLeader(deadline.timeLeft().toMillis());
            Matcher m = regex.matcher(address);
            if (m.find()) {
                int index = Integer.parseInt(m.group(1));
                TestingContender contender = contenders[index];
                // check that the retrieval service has retrieved the correct leader
                if (address.equals(contender.getAddress()) && listener.getLeaderSessionID().equals(contender.getLeaderSessionID())) {
                    // kill the election service of the leader
                    LOG.debug("Stop leader election service of contender #{}.", numberSeenLeaders);
                    leaderElectionService[index].stop();
                    leaderElectionService[index] = null;
                    numberSeenLeaders++;
                }
            } else {
                fail("Did not find the leader's index.");
            }
        }
        assertFalse(deadline.isOverdue());
        assertEquals(num, numberSeenLeaders);
    } finally {
        if (leaderRetrievalService != null) {
            leaderRetrievalService.stop();
        }
        for (ZooKeeperLeaderElectionService electionService : leaderElectionService) {
            if (electionService != null) {
                electionService.stop();
            }
        }
    }
}
Also used : Pattern(java.util.regex.Pattern) Configuration(org.apache.flink.configuration.Configuration) Matcher(java.util.regex.Matcher) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) ZooKeeperLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalService) Test(org.junit.Test)

Example 7 with FiniteDuration

use of scala.concurrent.duration.FiniteDuration in project flink by apache.

the class ZooKeeperLeaderRetrievalTest method testConnectingAddressRetrievalWithDelayedLeaderElection.

/**
	 * Tests that LeaderRetrievalUtils.findConnectingAdress finds the correct connecting address
	 * in case of an old leader address in ZooKeeper and a subsequent election of a new leader.
	 * The findConnectingAddress should block until the new leader has been elected and his
	 * address has been written to ZooKeeper.
	 */
@Test
public void testConnectingAddressRetrievalWithDelayedLeaderElection() throws Exception {
    FiniteDuration timeout = new FiniteDuration(1, TimeUnit.MINUTES);
    Configuration config = new Configuration();
    long sleepingTime = 1000;
    config.setString(HighAvailabilityOptions.HA_MODE, "zookeeper");
    config.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, testingServer.getConnectString());
    LeaderElectionService leaderElectionService = null;
    LeaderElectionService faultyLeaderElectionService;
    ServerSocket serverSocket;
    InetAddress localHost;
    Thread thread;
    CuratorFramework[] client = new CuratorFramework[2];
    try {
        client[0] = ZooKeeperUtils.startCuratorFramework(config);
        client[1] = ZooKeeperUtils.startCuratorFramework(config);
        String wrongHostPort = NetUtils.unresolvedHostAndPortToNormalizedString("1.1.1.1", 1234);
        String wrongAddress = JobManager.getRemoteJobManagerAkkaURL(AkkaUtils.getAkkaProtocol(config), wrongHostPort, Option.<String>empty());
        try {
            localHost = InetAddress.getLocalHost();
            serverSocket = new ServerSocket(0, 50, localHost);
        } catch (UnknownHostException e) {
            // may happen if disconnected. skip test.
            System.err.println("Skipping 'testNetworkInterfaceSelection' test.");
            return;
        } catch (IOException e) {
            // may happen in certain test setups, skip test.
            System.err.println("Skipping 'testNetworkInterfaceSelection' test.");
            return;
        }
        InetSocketAddress correctInetSocketAddress = new InetSocketAddress(localHost, serverSocket.getLocalPort());
        String hostPort = NetUtils.unresolvedHostAndPortToNormalizedString(localHost.getHostName(), correctInetSocketAddress.getPort());
        String correctAddress = JobManager.getRemoteJobManagerAkkaURL(AkkaUtils.getAkkaProtocol(config), hostPort, Option.<String>empty());
        faultyLeaderElectionService = ZooKeeperUtils.createLeaderElectionService(client[0], config);
        TestingContender wrongLeaderAddressContender = new TestingContender(wrongAddress, faultyLeaderElectionService);
        faultyLeaderElectionService.start(wrongLeaderAddressContender);
        FindConnectingAddress findConnectingAddress = new FindConnectingAddress(config, timeout);
        thread = new Thread(findConnectingAddress);
        thread.start();
        leaderElectionService = ZooKeeperUtils.createLeaderElectionService(client[1], config);
        TestingContender correctLeaderAddressContender = new TestingContender(correctAddress, leaderElectionService);
        Thread.sleep(sleepingTime);
        faultyLeaderElectionService.stop();
        leaderElectionService.start(correctLeaderAddressContender);
        thread.join();
        InetAddress result = findConnectingAddress.getInetAddress();
        // check that we can connect to the localHost
        Socket socket = new Socket();
        try {
            // port 0 = let the OS choose the port
            SocketAddress bindP = new InetSocketAddress(result, 0);
            // machine
            socket.bind(bindP);
            socket.connect(correctInetSocketAddress, 1000);
        } finally {
            socket.close();
        }
    } finally {
        if (leaderElectionService != null) {
            leaderElectionService.stop();
        }
        if (client[0] != null) {
            client[0].close();
        }
        if (client[1] != null) {
            client[1].close();
        }
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) UnknownHostException(java.net.UnknownHostException) InetSocketAddress(java.net.InetSocketAddress) FiniteDuration(scala.concurrent.duration.FiniteDuration) ServerSocket(java.net.ServerSocket) IOException(java.io.IOException) CuratorFramework(org.apache.curator.framework.CuratorFramework) SocketAddress(java.net.SocketAddress) InetSocketAddress(java.net.InetSocketAddress) InetAddress(java.net.InetAddress) Socket(java.net.Socket) ServerSocket(java.net.ServerSocket) Test(org.junit.Test)

Example 8 with FiniteDuration

use of scala.concurrent.duration.FiniteDuration in project flink by apache.

the class ZooKeeperLeaderRetrievalTest method testTimeoutOfFindConnectingAddress.

/**
	 * Tests that the LeaderRetrievalUtils.findConnectingAddress stops trying to find the
	 * connecting address if no leader address has been specified. The call should return
	 * then InetAddress.getLocalHost().
	 */
@Test
public void testTimeoutOfFindConnectingAddress() throws Exception {
    Configuration config = new Configuration();
    config.setString(HighAvailabilityOptions.HA_MODE, "zookeeper");
    config.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, testingServer.getConnectString());
    FiniteDuration timeout = new FiniteDuration(10, TimeUnit.SECONDS);
    LeaderRetrievalService leaderRetrievalService = LeaderRetrievalUtils.createLeaderRetrievalService(config);
    InetAddress result = LeaderRetrievalUtils.findConnectingAddress(leaderRetrievalService, timeout);
    assertEquals(InetAddress.getLocalHost(), result);
}
Also used : Configuration(org.apache.flink.configuration.Configuration) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) FiniteDuration(scala.concurrent.duration.FiniteDuration) InetAddress(java.net.InetAddress) Test(org.junit.Test)

Example 9 with FiniteDuration

use of scala.concurrent.duration.FiniteDuration in project flink by apache.

the class OneInputStreamTaskTest method testSnapshottingAndRestoring.

/**
	 * Tests that the stream operator can snapshot and restore the operator state of chained
	 * operators
	 */
@Test
public void testSnapshottingAndRestoring() throws Exception {
    final Deadline deadline = new FiniteDuration(2, TimeUnit.MINUTES).fromNow();
    final OneInputStreamTask<String, String> streamTask = new OneInputStreamTask<String, String>();
    final OneInputStreamTaskTestHarness<String, String> testHarness = new OneInputStreamTaskTestHarness<String, String>(streamTask, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
    testHarness.setupOutputForSingletonOperatorChain();
    IdentityKeySelector<String> keySelector = new IdentityKeySelector<>();
    testHarness.configureForKeyedStream(keySelector, BasicTypeInfo.STRING_TYPE_INFO);
    long checkpointId = 1L;
    long checkpointTimestamp = 1L;
    long recoveryTimestamp = 3L;
    long seed = 2L;
    int numberChainedTasks = 11;
    StreamConfig streamConfig = testHarness.getStreamConfig();
    configureChainedTestingStreamOperator(streamConfig, numberChainedTasks, seed, recoveryTimestamp);
    AcknowledgeStreamMockEnvironment env = new AcknowledgeStreamMockEnvironment(testHarness.jobConfig, testHarness.taskConfig, testHarness.executionConfig, testHarness.memorySize, new MockInputSplitProvider(), testHarness.bufferSize);
    // reset number of restore calls
    TestingStreamOperator.numberRestoreCalls = 0;
    testHarness.invoke(env);
    testHarness.waitForTaskRunning(deadline.timeLeft().toMillis());
    CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, checkpointTimestamp);
    while (!streamTask.triggerCheckpoint(checkpointMetaData, CheckpointOptions.forFullCheckpoint())) ;
    // since no state was set, there shouldn't be restore calls
    assertEquals(0, TestingStreamOperator.numberRestoreCalls);
    env.getCheckpointLatch().await();
    assertEquals(checkpointId, env.getCheckpointId());
    testHarness.endInput();
    testHarness.waitForTaskCompletion(deadline.timeLeft().toMillis());
    final OneInputStreamTask<String, String> restoredTask = new OneInputStreamTask<String, String>();
    restoredTask.setInitialState(new TaskStateHandles(env.getCheckpointStateHandles()));
    final OneInputStreamTaskTestHarness<String, String> restoredTaskHarness = new OneInputStreamTaskTestHarness<String, String>(restoredTask, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
    restoredTaskHarness.configureForKeyedStream(keySelector, BasicTypeInfo.STRING_TYPE_INFO);
    StreamConfig restoredTaskStreamConfig = restoredTaskHarness.getStreamConfig();
    configureChainedTestingStreamOperator(restoredTaskStreamConfig, numberChainedTasks, seed, recoveryTimestamp);
    TestingStreamOperator.numberRestoreCalls = 0;
    restoredTaskHarness.invoke();
    restoredTaskHarness.endInput();
    restoredTaskHarness.waitForTaskCompletion(deadline.timeLeft().toMillis());
    // restore of every chained operator should have been called
    assertEquals(numberChainedTasks, TestingStreamOperator.numberRestoreCalls);
    TestingStreamOperator.numberRestoreCalls = 0;
}
Also used : Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) TaskStateHandles(org.apache.flink.runtime.state.TaskStateHandles) MockInputSplitProvider(org.apache.flink.runtime.operators.testutils.MockInputSplitProvider) Test(org.junit.Test)

Example 10 with FiniteDuration

use of scala.concurrent.duration.FiniteDuration in project flink by apache.

the class JobClient method awaitJobResult.

/**
	 * Given a JobListeningContext, awaits the result of the job execution that this context is bound to
	 * @param listeningContext The listening context of the job execution
	 * @return The result of the execution
	 * @throws JobExecutionException if anything goes wrong while monitoring the job
	 */
public static JobExecutionResult awaitJobResult(JobListeningContext listeningContext) throws JobExecutionException {
    final JobID jobID = listeningContext.getJobID();
    final ActorRef jobClientActor = listeningContext.getJobClientActor();
    final Future<Object> jobSubmissionFuture = listeningContext.getJobResultFuture();
    final FiniteDuration askTimeout = listeningContext.getTimeout();
    // retrieves class loader if necessary
    final ClassLoader classLoader = listeningContext.getClassLoader();
    // ping the JobClientActor from time to time to check if it is still running
    while (!jobSubmissionFuture.isCompleted()) {
        try {
            Await.ready(jobSubmissionFuture, askTimeout);
        } catch (InterruptedException e) {
            throw new JobExecutionException(jobID, "Interrupted while waiting for job completion.");
        } catch (TimeoutException e) {
            try {
                Await.result(Patterns.ask(jobClientActor, // Ping the Actor to see if it is alive
                new Identify(true), Timeout.durationToTimeout(askTimeout)), askTimeout);
            // we got a reply, continue waiting for the job result
            } catch (Exception eInner) {
                // thus the health check failed
                if (!jobSubmissionFuture.isCompleted()) {
                    throw new JobExecutionException(jobID, "JobClientActor seems to have died before the JobExecutionResult could be retrieved.", eInner);
                }
            }
        }
    }
    final Object answer;
    try {
        // we have already awaited the result, zero time to wait here
        answer = Await.result(jobSubmissionFuture, Duration.Zero());
    } catch (Throwable throwable) {
        throw new JobExecutionException(jobID, "Couldn't retrieve the JobExecutionResult from the JobManager.", throwable);
    } finally {
        // failsafe shutdown of the client actor
        jobClientActor.tell(PoisonPill.getInstance(), ActorRef.noSender());
    }
    // second block handles the actual response
    if (answer instanceof JobManagerMessages.JobResultSuccess) {
        LOG.info("Job execution complete");
        SerializedJobExecutionResult result = ((JobManagerMessages.JobResultSuccess) answer).result();
        if (result != null) {
            try {
                return result.toJobExecutionResult(classLoader);
            } catch (Throwable t) {
                throw new JobExecutionException(jobID, "Job was successfully executed but JobExecutionResult could not be deserialized.");
            }
        } else {
            throw new JobExecutionException(jobID, "Job was successfully executed but result contained a null JobExecutionResult.");
        }
    } else if (answer instanceof JobManagerMessages.JobResultFailure) {
        LOG.info("Job execution failed");
        SerializedThrowable serThrowable = ((JobManagerMessages.JobResultFailure) answer).cause();
        if (serThrowable != null) {
            Throwable cause = serThrowable.deserializeError(classLoader);
            if (cause instanceof JobExecutionException) {
                throw (JobExecutionException) cause;
            } else {
                throw new JobExecutionException(jobID, "Job execution failed", cause);
            }
        } else {
            throw new JobExecutionException(jobID, "Job execution failed with null as failure cause.");
        }
    } else if (answer instanceof JobManagerMessages.JobNotFound) {
        throw new JobRetrievalException(((JobManagerMessages.JobNotFound) answer).jobID(), "Couldn't retrieve Job " + jobID + " because it was not running.");
    } else {
        throw new JobExecutionException(jobID, "Unknown answer from JobManager after submitting the job: " + answer);
    }
}
Also used : ActorRef(akka.actor.ActorRef) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) FiniteDuration(scala.concurrent.duration.FiniteDuration) Identify(akka.actor.Identify) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) FlinkUserCodeClassLoader(org.apache.flink.runtime.execution.librarycache.FlinkUserCodeClassLoader) SerializedThrowable(org.apache.flink.runtime.util.SerializedThrowable) JobID(org.apache.flink.api.common.JobID) TimeoutException(java.util.concurrent.TimeoutException) SerializedThrowable(org.apache.flink.runtime.util.SerializedThrowable)

Aggregations

FiniteDuration (scala.concurrent.duration.FiniteDuration)77 Test (org.junit.Test)61 Configuration (org.apache.flink.configuration.Configuration)37 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)30 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)27 ActorRef (akka.actor.ActorRef)25 Deadline (scala.concurrent.duration.Deadline)24 JobID (org.apache.flink.api.common.JobID)19 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)19 JobManagerMessages (org.apache.flink.runtime.messages.JobManagerMessages)17 TestingJobManagerMessages (org.apache.flink.runtime.testingUtils.TestingJobManagerMessages)13 ActorSystem (akka.actor.ActorSystem)12 JavaTestKit (akka.testkit.JavaTestKit)11 Timeout (akka.util.Timeout)11 File (java.io.File)11 TimeoutException (java.util.concurrent.TimeoutException)11 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)11 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)11 Props (akka.actor.Props)10 IOException (java.io.IOException)10