Search in sources :

Example 21 with Deadline

use of org.apache.flink.api.common.time.Deadline in project flink by apache.

the class TestJobExecutor method waitForFailover.

private void waitForFailover(BlockingQueue<TestEvent> queue) throws Exception {
    int timeoutMs = 10_000;
    Deadline deadline = Deadline.fromNow(Duration.ofMillis(timeoutMs));
    String operatorId = null;
    int subtaskId = -1;
    int attemptNumber = -1;
    while (deadline.hasTimeLeft()) {
        TestEvent e = queue.poll(deadline.timeLeft().toMillis(), MILLISECONDS);
        if (e instanceof TestCommandAckEvent) {
            TestCommandAckEvent ack = (TestCommandAckEvent) e;
            if (ack.getCommand() == FAIL) {
                operatorId = ack.operatorId;
                subtaskId = ack.subtaskIndex;
                attemptNumber = ack.getAttemptNumber();
            }
        } else if (e instanceof OperatorStartedEvent && operatorId != null) {
            OperatorStartedEvent started = (OperatorStartedEvent) e;
            if (started.operatorId.equals(operatorId) && started.subtaskIndex == subtaskId && started.getAttemptNumber() >= attemptNumber) {
                return;
            }
        }
    }
    throw new TimeoutException("No subtask restarted in " + timeoutMs + "ms");
}
Also used : TestEvent(org.apache.flink.runtime.operators.lifecycle.event.TestEvent) Deadline(org.apache.flink.api.common.time.Deadline) TestCommandAckEvent(org.apache.flink.runtime.operators.lifecycle.event.TestCommandAckEvent) OperatorStartedEvent(org.apache.flink.runtime.operators.lifecycle.event.OperatorStartedEvent) TimeoutException(java.util.concurrent.TimeoutException)

Example 22 with Deadline

use of org.apache.flink.api.common.time.Deadline in project flink by apache.

the class YarnTestBase method waitApplicationFinishedElseKillIt.

protected void waitApplicationFinishedElseKillIt(ApplicationId applicationId, Duration timeout, YarnClusterDescriptor yarnClusterDescriptor, int sleepIntervalInMS) throws Exception {
    Deadline deadline = Deadline.now().plus(timeout);
    YarnApplicationState state = getYarnClient().getApplicationReport(applicationId).getYarnApplicationState();
    while (state != YarnApplicationState.FINISHED) {
        if (state == YarnApplicationState.FAILED || state == YarnApplicationState.KILLED) {
            Assert.fail("Application became FAILED or KILLED while expecting FINISHED");
        }
        if (deadline.isOverdue()) {
            yarnClusterDescriptor.killCluster(applicationId);
            Assert.fail("Application didn't finish before timeout");
        }
        sleep(sleepIntervalInMS);
        state = getYarnClient().getApplicationReport(applicationId).getYarnApplicationState();
    }
}
Also used : Deadline(org.apache.flink.api.common.time.Deadline) YarnApplicationState(org.apache.hadoop.yarn.api.records.YarnApplicationState)

Example 23 with Deadline

use of org.apache.flink.api.common.time.Deadline in project flink by apache.

the class ZooKeeperLeaderElectionTest method testZooKeeperReelection.

/**
 * Tests repeatedly the reelection of still available LeaderContender. After a contender has
 * been elected as the leader, it is removed. This forces the DefaultLeaderElectionService to
 * elect a new leader.
 */
@Test
public void testZooKeeperReelection() throws Exception {
    Deadline deadline = Deadline.fromNow(Duration.ofMinutes(5L));
    int num = 10;
    DefaultLeaderElectionService[] leaderElectionService = new DefaultLeaderElectionService[num];
    TestingContender[] contenders = new TestingContender[num];
    DefaultLeaderRetrievalService leaderRetrievalService = null;
    TestingListener listener = new TestingListener();
    try {
        leaderRetrievalService = ZooKeeperUtils.createLeaderRetrievalService(curatorFrameworkWrapper.asCuratorFramework());
        LOG.debug("Start leader retrieval service for the TestingListener.");
        leaderRetrievalService.start(listener);
        for (int i = 0; i < num; i++) {
            leaderElectionService[i] = ZooKeeperUtils.createLeaderElectionService(curatorFrameworkWrapper.asCuratorFramework());
            contenders[i] = new TestingContender(createAddress(i), leaderElectionService[i]);
            LOG.debug("Start leader election service for contender #{}.", i);
            leaderElectionService[i].start(contenders[i]);
        }
        String pattern = LEADER_ADDRESS + "_" + "(\\d+)";
        Pattern regex = Pattern.compile(pattern);
        int numberSeenLeaders = 0;
        while (deadline.hasTimeLeft() && numberSeenLeaders < num) {
            LOG.debug("Wait for new leader #{}.", numberSeenLeaders);
            String address = listener.waitForNewLeader(deadline.timeLeft().toMillis());
            Matcher m = regex.matcher(address);
            if (m.find()) {
                int index = Integer.parseInt(m.group(1));
                TestingContender contender = contenders[index];
                // check that the retrieval service has retrieved the correct leader
                if (address.equals(createAddress(index)) && listener.getLeaderSessionID().equals(contender.getLeaderSessionID())) {
                    // kill the election service of the leader
                    LOG.debug("Stop leader election service of contender #{}.", numberSeenLeaders);
                    leaderElectionService[index].stop();
                    leaderElectionService[index] = null;
                    numberSeenLeaders++;
                }
            } else {
                fail("Did not find the leader's index.");
            }
        }
        assertFalse("Did not complete the leader reelection in time.", deadline.isOverdue());
        assertEquals(num, numberSeenLeaders);
    } finally {
        if (leaderRetrievalService != null) {
            leaderRetrievalService.stop();
        }
        for (DefaultLeaderElectionService electionService : leaderElectionService) {
            if (electionService != null) {
                electionService.stop();
            }
        }
    }
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) Deadline(org.apache.flink.api.common.time.Deadline) Mockito.anyString(org.mockito.Mockito.anyString) DefaultLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.DefaultLeaderRetrievalService) Test(org.junit.Test)

Example 24 with Deadline

use of org.apache.flink.api.common.time.Deadline in project flink by apache.

the class SnapshotMigrationTestBase method restoreAndExecute.

@SafeVarargs
protected final void restoreAndExecute(StreamExecutionEnvironment env, String snapshotPath, Tuple2<String, Integer>... expectedAccumulators) throws Exception {
    final Deadline deadLine = Deadline.fromNow(Duration.ofMinutes(5));
    ClusterClient<?> client = miniClusterResource.getClusterClient();
    // Submit the job
    JobGraph jobGraph = env.getStreamGraph().getJobGraph();
    jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(snapshotPath));
    JobID jobID = client.submitJob(jobGraph).get();
    boolean done = false;
    while (deadLine.hasTimeLeft()) {
        try {
            CompletableFuture<JobStatus> jobStatusFuture = client.getJobStatus(jobID);
            JobStatus jobStatus = jobStatusFuture.get(5, TimeUnit.SECONDS);
            if (jobStatus == JobStatus.FAILED) {
                LOG.warn("Job reached status failed", client.requestJobResult(jobID).get().getSerializedThrowable().get().deserializeError(ClassLoader.getSystemClassLoader()));
            }
            assertNotEquals(JobStatus.FAILED, jobStatus);
        } catch (Exception e) {
            fail("Could not connect to job: " + e);
        }
        Thread.sleep(100);
        Map<String, Object> accumulators = client.getAccumulators(jobID).get();
        boolean allDone = true;
        for (Tuple2<String, Integer> acc : expectedAccumulators) {
            Object numFinished = accumulators.get(acc.f0);
            if (numFinished == null) {
                allDone = false;
                break;
            }
            if (!numFinished.equals(acc.f1)) {
                allDone = false;
                break;
            }
        }
        if (allDone) {
            done = true;
            break;
        }
    }
    if (!done) {
        fail("Did not see the expected accumulator results within time limit.");
    }
}
Also used : Deadline(org.apache.flink.api.common.time.Deadline) JobStatus(org.apache.flink.api.common.JobStatus) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobID(org.apache.flink.api.common.JobID)

Example 25 with Deadline

use of org.apache.flink.api.common.time.Deadline in project flink by apache.

the class SnapshotMigrationTestBase method executeAndSnapshot.

@SafeVarargs
protected final void executeAndSnapshot(StreamExecutionEnvironment env, String snapshotPath, SnapshotType snapshotType, Tuple2<String, Integer>... expectedAccumulators) throws Exception {
    final Deadline deadLine = Deadline.fromNow(Duration.ofMinutes(5));
    ClusterClient<?> client = miniClusterResource.getClusterClient();
    // Submit the job
    JobGraph jobGraph = env.getStreamGraph().getJobGraph();
    JobID jobID = client.submitJob(jobGraph).get();
    LOG.info("Submitted job {} and waiting...", jobID);
    boolean done = false;
    while (deadLine.hasTimeLeft()) {
        Thread.sleep(100);
        Map<String, Object> accumulators = client.getAccumulators(jobID).get();
        boolean allDone = true;
        for (Tuple2<String, Integer> acc : expectedAccumulators) {
            Object accumOpt = accumulators.get(acc.f0);
            if (accumOpt == null) {
                allDone = false;
                break;
            }
            Integer numFinished = (Integer) accumOpt;
            if (!numFinished.equals(acc.f1)) {
                allDone = false;
                break;
            }
        }
        if (allDone) {
            done = true;
            break;
        }
    }
    if (!done) {
        fail("Did not see the expected accumulator results within time limit.");
    }
    LOG.info("Triggering snapshot.");
    CompletableFuture<String> snapshotPathFuture;
    switch(snapshotType) {
        case SAVEPOINT_CANONICAL:
            snapshotPathFuture = client.triggerSavepoint(jobID, null, SavepointFormatType.CANONICAL);
            break;
        case SAVEPOINT_NATIVE:
            snapshotPathFuture = client.triggerSavepoint(jobID, null, SavepointFormatType.NATIVE);
            break;
        case CHECKPOINT:
            snapshotPathFuture = miniClusterResource.getMiniCluster().triggerCheckpoint(jobID);
            break;
        default:
            throw new UnsupportedOperationException("Snapshot type not supported/implemented.");
    }
    String jobmanagerSnapshotPath = snapshotPathFuture.get(deadLine.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
    File jobManagerSnapshot = new File(new URI(jobmanagerSnapshotPath).getPath());
    // savepoints were changed to be directories in Flink 1.3
    if (jobManagerSnapshot.isDirectory()) {
        FileUtils.moveDirectory(jobManagerSnapshot, new File(snapshotPath));
    } else {
        FileUtils.moveFile(jobManagerSnapshot, new File(snapshotPath));
    }
}
Also used : Deadline(org.apache.flink.api.common.time.Deadline) URI(java.net.URI) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) File(java.io.File) JobID(org.apache.flink.api.common.JobID)

Aggregations

Deadline (org.apache.flink.api.common.time.Deadline)75 Test (org.junit.Test)34 JobID (org.apache.flink.api.common.JobID)29 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)26 Duration (java.time.Duration)19 Configuration (org.apache.flink.configuration.Configuration)15 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)14 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)13 IOException (java.io.IOException)12 ExecutionException (java.util.concurrent.ExecutionException)12 KeySelector (org.apache.flink.api.java.functions.KeySelector)12 AtomicLong (java.util.concurrent.atomic.AtomicLong)11 MiniCluster (org.apache.flink.runtime.minicluster.MiniCluster)10 File (java.io.File)9 TimeUnit (java.util.concurrent.TimeUnit)9 JobStatus (org.apache.flink.api.common.JobStatus)9 List (java.util.List)8 Test (org.junit.jupiter.api.Test)8 CompletableFuture (java.util.concurrent.CompletableFuture)7 CountDownLatch (java.util.concurrent.CountDownLatch)7