use of scala.concurrent.duration.Deadline in project flink by apache.
the class WebFrontendITCase method testStopYarn.
@Test
public void testStopYarn() throws Exception {
// this only works if there is no active job at this point
assertTrue(cluster.getCurrentlyRunningJobsJava().isEmpty());
// Create a task
final JobVertex sender = new JobVertex("Sender");
sender.setParallelism(2);
sender.setInvokableClass(StoppableInvokable.class);
final JobGraph jobGraph = new JobGraph("Stoppable streaming test job", sender);
final JobID jid = jobGraph.getJobID();
cluster.submitJobDetached(jobGraph);
// wait for job to show up
while (cluster.getCurrentlyRunningJobsJava().isEmpty()) {
Thread.sleep(10);
}
final FiniteDuration testTimeout = new FiniteDuration(2, TimeUnit.MINUTES);
final Deadline deadline = testTimeout.fromNow();
while (!cluster.getCurrentlyRunningJobsJava().isEmpty()) {
try (HttpTestClient client = new HttpTestClient("localhost", port)) {
// Request the file from the web server
client.sendGetRequest("/jobs/" + jid + "/yarn-stop", deadline.timeLeft());
HttpTestClient.SimpleHttpResponse response = client.getNextResponse(deadline.timeLeft());
assertEquals(HttpResponseStatus.OK, response.getStatus());
assertEquals(response.getType(), MimeTypes.getMimeTypeForExtension("json"));
assertEquals("{}", response.getContent());
}
Thread.sleep(20);
}
}
use of scala.concurrent.duration.Deadline in project flink by apache.
the class TaskManagerRegistrationTest method testTaskManagerNoExcessiveRegistrationMessages.
/**
* Tests that the TaskManager does not send an excessive amount of registration messages to
* the job manager if its registration was rejected.
*/
@Test
public void testTaskManagerNoExcessiveRegistrationMessages() throws Exception {
new JavaTestKit(actorSystem) {
{
ActorGateway jm = null;
ActorGateway taskManager = null;
try {
FiniteDuration timeout = new FiniteDuration(5, TimeUnit.SECONDS);
jm = TestingUtils.createForwardingActor(actorSystem, getTestActor(), Option.<String>empty());
final ActorGateway jmGateway = jm;
long refusedRegistrationPause = 500;
long initialRegistrationPause = 100;
long maxDelay = 30000;
Configuration tmConfig = new Configuration(config);
tmConfig.setString(ConfigConstants.TASK_MANAGER_REFUSED_REGISTRATION_PAUSE, refusedRegistrationPause + " ms");
tmConfig.setString(ConfigConstants.TASK_MANAGER_INITIAL_REGISTRATION_PAUSE, initialRegistrationPause + " ms");
// we make the test actor (the test kit) the JobManager to intercept
// the messages
taskManager = createTaskManager(actorSystem, jmGateway, tmConfig, true, false);
final ActorGateway taskManagerGateway = taskManager;
final Deadline deadline = timeout.fromNow();
try {
while (deadline.hasTimeLeft()) {
// the TaskManager should try to register
expectMsgClass(deadline.timeLeft(), RegisterTaskManager.class);
// we decline the registration
taskManagerGateway.tell(new RefuseRegistration(new Exception("test reason")), jmGateway);
}
} catch (AssertionError error) {
// ignore since it simply means that we have used up all our time
}
RegisterTaskManager[] registerTaskManagerMessages = new ReceiveWhile<RegisterTaskManager>(RegisterTaskManager.class, timeout) {
@Override
protected RegisterTaskManager match(Object msg) throws Exception {
if (msg instanceof RegisterTaskManager) {
return (RegisterTaskManager) msg;
} else {
throw noMatch();
}
}
}.get();
int maxExponent = (int) Math.floor(Math.log(((double) maxDelay / initialRegistrationPause + 1)) / Math.log(2));
int exponent = (int) Math.ceil(Math.log(((double) timeout.toMillis() / initialRegistrationPause + 1)) / Math.log(2));
int exp = Math.min(maxExponent, exponent);
long difference = timeout.toMillis() - (initialRegistrationPause * (1 << exp));
int numberRegisterTaskManagerMessages = exp;
if (difference > 0) {
numberRegisterTaskManagerMessages += Math.ceil((double) difference / maxDelay);
}
int maxExpectedNumberOfRegisterTaskManagerMessages = numberRegisterTaskManagerMessages * 2;
assertTrue("The number of RegisterTaskManager messages #" + registerTaskManagerMessages.length + " should be less than #" + maxExpectedNumberOfRegisterTaskManagerMessages, registerTaskManagerMessages.length <= maxExpectedNumberOfRegisterTaskManagerMessages);
} finally {
stopActor(taskManager);
stopActor(jm);
}
}
};
}
use of scala.concurrent.duration.Deadline in project flink by apache.
the class AbstractQueryableStateITCase method testFoldingState.
/**
* Tests simple folding state queryable state instance. Each source emits
* (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
* queried. The folding state sums these up and maps them to Strings. The
* test succeeds after each subtask index is queried with result n*(n+1)/2
* (as a String).
*/
@Test
public void testFoldingState() throws Exception {
// Config
final Deadline deadline = TEST_TIMEOUT.fromNow();
final int numElements = 1024;
final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
JobID jobId = null;
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(NUM_SLOTS);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
// Folding state
FoldingStateDescriptor<Tuple2<Integer, Long>, String> foldingState = new FoldingStateDescriptor<>("any", "0", new SumFold(), StringSerializer.INSTANCE);
QueryableStateStream<Integer, String> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
@Override
public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
return value.f0;
}
}).asQueryableState("pumba", foldingState);
// Submit the job graph
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
jobId = jobGraph.getJobID();
cluster.submitJobDetached(jobGraph);
// Now query
String expected = Integer.toString(numElements * (numElements + 1) / 2);
for (int key = 0; key < NUM_SLOTS; key++) {
final byte[] serializedKey = KvStateRequestSerializer.serializeKeyAndNamespace(key, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE);
boolean success = false;
while (deadline.hasTimeLeft() && !success) {
Future<byte[]> future = getKvStateWithRetries(client, jobId, queryableState.getQueryableStateName(), key, serializedKey, QUERY_RETRY_DELAY, false);
byte[] serializedValue = Await.result(future, deadline.timeLeft());
String value = KvStateRequestSerializer.deserializeValue(serializedValue, queryableState.getValueSerializer());
if (expected.equals(value)) {
success = true;
} else {
// Retry
Thread.sleep(50);
}
}
assertTrue("Did not succeed query", success);
}
} finally {
// Free cluster resources
if (jobId != null) {
Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
Await.ready(cancellation, deadline.timeLeft());
}
client.shutDown();
}
}
use of scala.concurrent.duration.Deadline in project flink by apache.
the class AbstractQueryableStateITCase method testReducingState.
/**
* Tests simple reducing state queryable state instance. Each source emits
* (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
* queried. The reducing state instance sums these up. The test succeeds
* after each subtask index is queried with result n*(n+1)/2.
*/
@Test
public void testReducingState() throws Exception {
// Config
final Deadline deadline = TEST_TIMEOUT.fromNow();
final int numElements = 1024;
final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
JobID jobId = null;
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setParallelism(NUM_SLOTS);
// Very important, because cluster is shared between tests and we
// don't explicitly check that all slots are available before
// submitting.
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
// Reducing state
ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any", new SumReduce(), source.getType());
QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {
@Override
public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
return value.f0;
}
}).asQueryableState("jungle", reducingState);
// Submit the job graph
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
jobId = jobGraph.getJobID();
cluster.submitJobDetached(jobGraph);
// Wait until job is running
// Now query
long expected = numElements * (numElements + 1) / 2;
executeValueQuery(deadline, client, jobId, queryableState, expected);
} finally {
// Free cluster resources
if (jobId != null) {
Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
Await.ready(cancellation, deadline.timeLeft());
}
client.shutDown();
}
}
use of scala.concurrent.duration.Deadline in project flink by apache.
the class RescalingITCase method testSavepointRescalingPartitionedOperatorState.
/**
* Tests rescaling of partitioned operator state. More specific, we test the mechanism with {@link ListCheckpointed}
* as it subsumes {@link org.apache.flink.streaming.api.checkpoint.CheckpointedFunction}.
*/
public void testSavepointRescalingPartitionedOperatorState(boolean scaleOut, OperatorCheckpointMethod checkpointMethod) throws Exception {
final int parallelism = scaleOut ? numSlots : numSlots / 2;
final int parallelism2 = scaleOut ? numSlots / 2 : numSlots;
final int maxParallelism = 13;
FiniteDuration timeout = new FiniteDuration(3, TimeUnit.MINUTES);
Deadline deadline = timeout.fromNow();
JobID jobID = null;
ActorGateway jobManager = null;
int counterSize = Math.max(parallelism, parallelism2);
if (checkpointMethod == OperatorCheckpointMethod.CHECKPOINTED_FUNCTION || checkpointMethod == OperatorCheckpointMethod.CHECKPOINTED_FUNCTION_BROADCAST) {
PartitionedStateSource.CHECK_CORRECT_SNAPSHOT = new int[counterSize];
PartitionedStateSource.CHECK_CORRECT_RESTORE = new int[counterSize];
} else {
PartitionedStateSourceListCheckpointed.CHECK_CORRECT_SNAPSHOT = new int[counterSize];
PartitionedStateSourceListCheckpointed.CHECK_CORRECT_RESTORE = new int[counterSize];
}
try {
jobManager = cluster.getLeaderGateway(deadline.timeLeft());
JobGraph jobGraph = createJobGraphWithOperatorState(parallelism, maxParallelism, checkpointMethod);
jobID = jobGraph.getJobID();
cluster.submitJobDetached(jobGraph);
Object savepointResponse = null;
// wait until the operator is started
StateSourceBase.workStartedLatch.await();
while (deadline.hasTimeLeft()) {
Future<Object> savepointPathFuture = jobManager.ask(new JobManagerMessages.TriggerSavepoint(jobID, Option.<String>empty()), deadline.timeLeft());
FiniteDuration waitingTime = new FiniteDuration(10, TimeUnit.SECONDS);
savepointResponse = Await.result(savepointPathFuture, waitingTime);
if (savepointResponse instanceof JobManagerMessages.TriggerSavepointSuccess) {
break;
}
System.out.println(savepointResponse);
}
assertTrue(savepointResponse instanceof JobManagerMessages.TriggerSavepointSuccess);
final String savepointPath = ((JobManagerMessages.TriggerSavepointSuccess) savepointResponse).savepointPath();
Future<Object> jobRemovedFuture = jobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), deadline.timeLeft());
Future<Object> cancellationResponseFuture = jobManager.ask(new JobManagerMessages.CancelJob(jobID), deadline.timeLeft());
Object cancellationResponse = Await.result(cancellationResponseFuture, deadline.timeLeft());
assertTrue(cancellationResponse instanceof JobManagerMessages.CancellationSuccess);
Await.ready(jobRemovedFuture, deadline.timeLeft());
// job successfully removed
jobID = null;
JobGraph scaledJobGraph = createJobGraphWithOperatorState(parallelism2, maxParallelism, checkpointMethod);
scaledJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
jobID = scaledJobGraph.getJobID();
cluster.submitJobAndWait(scaledJobGraph, false);
int sumExp = 0;
int sumAct = 0;
if (checkpointMethod == OperatorCheckpointMethod.CHECKPOINTED_FUNCTION) {
for (int c : PartitionedStateSource.CHECK_CORRECT_SNAPSHOT) {
sumExp += c;
}
for (int c : PartitionedStateSource.CHECK_CORRECT_RESTORE) {
sumAct += c;
}
} else if (checkpointMethod == OperatorCheckpointMethod.CHECKPOINTED_FUNCTION_BROADCAST) {
for (int c : PartitionedStateSource.CHECK_CORRECT_SNAPSHOT) {
sumExp += c;
}
for (int c : PartitionedStateSource.CHECK_CORRECT_RESTORE) {
sumAct += c;
}
sumExp *= parallelism2;
} else {
for (int c : PartitionedStateSourceListCheckpointed.CHECK_CORRECT_SNAPSHOT) {
sumExp += c;
}
for (int c : PartitionedStateSourceListCheckpointed.CHECK_CORRECT_RESTORE) {
sumAct += c;
}
}
assertEquals(sumExp, sumAct);
jobID = null;
} finally {
// clear any left overs from a possibly failed job
if (jobID != null && jobManager != null) {
Future<Object> jobRemovedFuture = jobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), timeout);
try {
Await.ready(jobRemovedFuture, timeout);
} catch (TimeoutException | InterruptedException ie) {
fail("Failed while cleaning up the cluster.");
}
}
}
}
Aggregations