use of org.apache.flink.api.common.time.Deadline in project flink by apache.
the class TestJobExecutor method waitForFailover.
private void waitForFailover(BlockingQueue<TestEvent> queue) throws Exception {
int timeoutMs = 10_000;
Deadline deadline = Deadline.fromNow(Duration.ofMillis(timeoutMs));
String operatorId = null;
int subtaskId = -1;
int attemptNumber = -1;
while (deadline.hasTimeLeft()) {
TestEvent e = queue.poll(deadline.timeLeft().toMillis(), MILLISECONDS);
if (e instanceof TestCommandAckEvent) {
TestCommandAckEvent ack = (TestCommandAckEvent) e;
if (ack.getCommand() == FAIL) {
operatorId = ack.operatorId;
subtaskId = ack.subtaskIndex;
attemptNumber = ack.getAttemptNumber();
}
} else if (e instanceof OperatorStartedEvent && operatorId != null) {
OperatorStartedEvent started = (OperatorStartedEvent) e;
if (started.operatorId.equals(operatorId) && started.subtaskIndex == subtaskId && started.getAttemptNumber() >= attemptNumber) {
return;
}
}
}
throw new TimeoutException("No subtask restarted in " + timeoutMs + "ms");
}
use of org.apache.flink.api.common.time.Deadline in project flink by apache.
the class YarnTestBase method waitApplicationFinishedElseKillIt.
protected void waitApplicationFinishedElseKillIt(ApplicationId applicationId, Duration timeout, YarnClusterDescriptor yarnClusterDescriptor, int sleepIntervalInMS) throws Exception {
Deadline deadline = Deadline.now().plus(timeout);
YarnApplicationState state = getYarnClient().getApplicationReport(applicationId).getYarnApplicationState();
while (state != YarnApplicationState.FINISHED) {
if (state == YarnApplicationState.FAILED || state == YarnApplicationState.KILLED) {
Assert.fail("Application became FAILED or KILLED while expecting FINISHED");
}
if (deadline.isOverdue()) {
yarnClusterDescriptor.killCluster(applicationId);
Assert.fail("Application didn't finish before timeout");
}
sleep(sleepIntervalInMS);
state = getYarnClient().getApplicationReport(applicationId).getYarnApplicationState();
}
}
use of org.apache.flink.api.common.time.Deadline in project flink by apache.
the class ZooKeeperLeaderElectionTest method testZooKeeperReelection.
/**
* Tests repeatedly the reelection of still available LeaderContender. After a contender has
* been elected as the leader, it is removed. This forces the DefaultLeaderElectionService to
* elect a new leader.
*/
@Test
public void testZooKeeperReelection() throws Exception {
Deadline deadline = Deadline.fromNow(Duration.ofMinutes(5L));
int num = 10;
DefaultLeaderElectionService[] leaderElectionService = new DefaultLeaderElectionService[num];
TestingContender[] contenders = new TestingContender[num];
DefaultLeaderRetrievalService leaderRetrievalService = null;
TestingListener listener = new TestingListener();
try {
leaderRetrievalService = ZooKeeperUtils.createLeaderRetrievalService(curatorFrameworkWrapper.asCuratorFramework());
LOG.debug("Start leader retrieval service for the TestingListener.");
leaderRetrievalService.start(listener);
for (int i = 0; i < num; i++) {
leaderElectionService[i] = ZooKeeperUtils.createLeaderElectionService(curatorFrameworkWrapper.asCuratorFramework());
contenders[i] = new TestingContender(createAddress(i), leaderElectionService[i]);
LOG.debug("Start leader election service for contender #{}.", i);
leaderElectionService[i].start(contenders[i]);
}
String pattern = LEADER_ADDRESS + "_" + "(\\d+)";
Pattern regex = Pattern.compile(pattern);
int numberSeenLeaders = 0;
while (deadline.hasTimeLeft() && numberSeenLeaders < num) {
LOG.debug("Wait for new leader #{}.", numberSeenLeaders);
String address = listener.waitForNewLeader(deadline.timeLeft().toMillis());
Matcher m = regex.matcher(address);
if (m.find()) {
int index = Integer.parseInt(m.group(1));
TestingContender contender = contenders[index];
// check that the retrieval service has retrieved the correct leader
if (address.equals(createAddress(index)) && listener.getLeaderSessionID().equals(contender.getLeaderSessionID())) {
// kill the election service of the leader
LOG.debug("Stop leader election service of contender #{}.", numberSeenLeaders);
leaderElectionService[index].stop();
leaderElectionService[index] = null;
numberSeenLeaders++;
}
} else {
fail("Did not find the leader's index.");
}
}
assertFalse("Did not complete the leader reelection in time.", deadline.isOverdue());
assertEquals(num, numberSeenLeaders);
} finally {
if (leaderRetrievalService != null) {
leaderRetrievalService.stop();
}
for (DefaultLeaderElectionService electionService : leaderElectionService) {
if (electionService != null) {
electionService.stop();
}
}
}
}
use of org.apache.flink.api.common.time.Deadline in project flink by apache.
the class SnapshotMigrationTestBase method restoreAndExecute.
@SafeVarargs
protected final void restoreAndExecute(StreamExecutionEnvironment env, String snapshotPath, Tuple2<String, Integer>... expectedAccumulators) throws Exception {
final Deadline deadLine = Deadline.fromNow(Duration.ofMinutes(5));
ClusterClient<?> client = miniClusterResource.getClusterClient();
// Submit the job
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(snapshotPath));
JobID jobID = client.submitJob(jobGraph).get();
boolean done = false;
while (deadLine.hasTimeLeft()) {
try {
CompletableFuture<JobStatus> jobStatusFuture = client.getJobStatus(jobID);
JobStatus jobStatus = jobStatusFuture.get(5, TimeUnit.SECONDS);
if (jobStatus == JobStatus.FAILED) {
LOG.warn("Job reached status failed", client.requestJobResult(jobID).get().getSerializedThrowable().get().deserializeError(ClassLoader.getSystemClassLoader()));
}
assertNotEquals(JobStatus.FAILED, jobStatus);
} catch (Exception e) {
fail("Could not connect to job: " + e);
}
Thread.sleep(100);
Map<String, Object> accumulators = client.getAccumulators(jobID).get();
boolean allDone = true;
for (Tuple2<String, Integer> acc : expectedAccumulators) {
Object numFinished = accumulators.get(acc.f0);
if (numFinished == null) {
allDone = false;
break;
}
if (!numFinished.equals(acc.f1)) {
allDone = false;
break;
}
}
if (allDone) {
done = true;
break;
}
}
if (!done) {
fail("Did not see the expected accumulator results within time limit.");
}
}
use of org.apache.flink.api.common.time.Deadline in project flink by apache.
the class SnapshotMigrationTestBase method executeAndSnapshot.
@SafeVarargs
protected final void executeAndSnapshot(StreamExecutionEnvironment env, String snapshotPath, SnapshotType snapshotType, Tuple2<String, Integer>... expectedAccumulators) throws Exception {
final Deadline deadLine = Deadline.fromNow(Duration.ofMinutes(5));
ClusterClient<?> client = miniClusterResource.getClusterClient();
// Submit the job
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
JobID jobID = client.submitJob(jobGraph).get();
LOG.info("Submitted job {} and waiting...", jobID);
boolean done = false;
while (deadLine.hasTimeLeft()) {
Thread.sleep(100);
Map<String, Object> accumulators = client.getAccumulators(jobID).get();
boolean allDone = true;
for (Tuple2<String, Integer> acc : expectedAccumulators) {
Object accumOpt = accumulators.get(acc.f0);
if (accumOpt == null) {
allDone = false;
break;
}
Integer numFinished = (Integer) accumOpt;
if (!numFinished.equals(acc.f1)) {
allDone = false;
break;
}
}
if (allDone) {
done = true;
break;
}
}
if (!done) {
fail("Did not see the expected accumulator results within time limit.");
}
LOG.info("Triggering snapshot.");
CompletableFuture<String> snapshotPathFuture;
switch(snapshotType) {
case SAVEPOINT_CANONICAL:
snapshotPathFuture = client.triggerSavepoint(jobID, null, SavepointFormatType.CANONICAL);
break;
case SAVEPOINT_NATIVE:
snapshotPathFuture = client.triggerSavepoint(jobID, null, SavepointFormatType.NATIVE);
break;
case CHECKPOINT:
snapshotPathFuture = miniClusterResource.getMiniCluster().triggerCheckpoint(jobID);
break;
default:
throw new UnsupportedOperationException("Snapshot type not supported/implemented.");
}
String jobmanagerSnapshotPath = snapshotPathFuture.get(deadLine.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
File jobManagerSnapshot = new File(new URI(jobmanagerSnapshotPath).getPath());
// savepoints were changed to be directories in Flink 1.3
if (jobManagerSnapshot.isDirectory()) {
FileUtils.moveDirectory(jobManagerSnapshot, new File(snapshotPath));
} else {
FileUtils.moveFile(jobManagerSnapshot, new File(snapshotPath));
}
}
Aggregations