use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.
the class TaskCancelAsyncProducerConsumerITCase method testCancelAsyncProducerAndConsumer.
/**
* Tests that a task waiting on an async producer/consumer that is stuck
* in a blocking buffer request can be properly cancelled.
*
* <p>This is currently required for the Flink Kafka sources, which spawn
* a separate Thread consuming from Kafka and producing the intermediate
* streams in the spawned Thread instead of the main task Thread.
*/
@Test
public void testCancelAsyncProducerAndConsumer() throws Exception {
Deadline deadline = new FiniteDuration(2, TimeUnit.MINUTES).fromNow();
TestingCluster flink = null;
try {
// Cluster
Configuration config = new Configuration();
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 1);
config.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SEGMENT_SIZE_KEY, 4096);
config.setInteger(ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY, 8);
flink = new TestingCluster(config, true);
flink.start();
// Job with async producer and consumer
JobVertex producer = new JobVertex("AsyncProducer");
producer.setParallelism(1);
producer.setInvokableClass(AsyncProducer.class);
JobVertex consumer = new JobVertex("AsyncConsumer");
consumer.setParallelism(1);
consumer.setInvokableClass(AsyncConsumer.class);
consumer.connectNewDataSetAsInput(producer, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
SlotSharingGroup slot = new SlotSharingGroup(producer.getID(), consumer.getID());
producer.setSlotSharingGroup(slot);
consumer.setSlotSharingGroup(slot);
JobGraph jobGraph = new JobGraph(producer, consumer);
// Submit job and wait until running
ActorGateway jobManager = flink.getLeaderGateway(deadline.timeLeft());
flink.submitJobDetached(jobGraph);
Object msg = new WaitForAllVerticesToBeRunning(jobGraph.getJobID());
Future<?> runningFuture = jobManager.ask(msg, deadline.timeLeft());
Await.ready(runningFuture, deadline.timeLeft());
// Wait for blocking requests, cancel and wait for cancellation
msg = new NotifyWhenJobStatus(jobGraph.getJobID(), JobStatus.CANCELED);
Future<?> cancelledFuture = jobManager.ask(msg, deadline.timeLeft());
boolean producerBlocked = false;
for (int i = 0; i < 50; i++) {
Thread thread = ASYNC_PRODUCER_THREAD;
if (thread != null && thread.isAlive()) {
StackTraceElement[] stackTrace = thread.getStackTrace();
producerBlocked = isInBlockingBufferRequest(stackTrace);
}
if (producerBlocked) {
break;
} else {
// Retry
Thread.sleep(500);
}
}
// Verify that async producer is in blocking request
assertTrue("Producer thread is not blocked: " + Arrays.toString(ASYNC_CONSUMER_THREAD.getStackTrace()), producerBlocked);
boolean consumerWaiting = false;
for (int i = 0; i < 50; i++) {
Thread thread = ASYNC_CONSUMER_THREAD;
if (thread != null && thread.isAlive()) {
consumerWaiting = thread.getState() == Thread.State.WAITING;
}
if (consumerWaiting) {
break;
} else {
// Retry
Thread.sleep(500);
}
}
// Verify that async consumer is in blocking request
assertTrue("Consumer thread is not blocked.", consumerWaiting);
msg = new CancelJob(jobGraph.getJobID());
Future<?> cancelFuture = jobManager.ask(msg, deadline.timeLeft());
Await.ready(cancelFuture, deadline.timeLeft());
Await.ready(cancelledFuture, deadline.timeLeft());
// Verify the expected Exceptions
assertNotNull(ASYNC_PRODUCER_EXCEPTION);
assertEquals(IllegalStateException.class, ASYNC_PRODUCER_EXCEPTION.getClass());
assertNotNull(ASYNC_CONSUMER_EXCEPTION);
assertEquals(IllegalStateException.class, ASYNC_CONSUMER_EXCEPTION.getClass());
} finally {
if (flink != null) {
flink.shutdown();
}
}
}
use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.
the class TaskInputSplitProviderTest method testRequestNextInputSplitWithInvalidExecutionID.
@Test
public void testRequestNextInputSplitWithInvalidExecutionID() throws InputSplitProviderException {
final JobID jobID = new JobID();
final JobVertexID vertexID = new JobVertexID();
final ExecutionAttemptID executionID = new ExecutionAttemptID();
final FiniteDuration timeout = new FiniteDuration(10, TimeUnit.SECONDS);
final ActorGateway gateway = new NullInputSplitGateway();
final TaskInputSplitProvider provider = new TaskInputSplitProvider(gateway, jobID, vertexID, executionID, timeout);
// The jobManager will return a
InputSplit nextInputSplit = provider.getNextInputSplit(getClass().getClassLoader());
assertTrue(nextInputSplit == null);
}
use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.
the class TaskManagerRegistrationTest method testSimpleRegistration.
/**
* A test that verifies that two TaskManagers correctly register at the
* JobManager.
*/
@Test
public void testSimpleRegistration() {
new JavaTestKit(actorSystem) {
{
ActorGateway jobManager = null;
ActorGateway taskManager1 = null;
ActorGateway taskManager2 = null;
try {
// a simple JobManager
jobManager = createJobManager(actorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), config);
startResourceManager(config, jobManager.actor());
// start two TaskManagers. it will automatically try to register
taskManager1 = createTaskManager(actorSystem, jobManager, config, true, false);
taskManager2 = createTaskManager(actorSystem, jobManager, config, true, false);
// check that the TaskManagers are registered
Future<Object> responseFuture1 = taskManager1.ask(TaskManagerMessages.getNotifyWhenRegisteredAtJobManagerMessage(), timeout);
Future<Object> responseFuture2 = taskManager2.ask(TaskManagerMessages.getNotifyWhenRegisteredAtJobManagerMessage(), timeout);
Object response1 = Await.result(responseFuture1, timeout);
Object response2 = Await.result(responseFuture2, timeout);
// this is a hack to work around the way Java can interact with scala case objects
Class<?> confirmClass = TaskManagerMessages.getRegisteredAtJobManagerMessage().getClass();
assertTrue(response1 != null && confirmClass.isAssignableFrom(response1.getClass()));
assertTrue(response2 != null && confirmClass.isAssignableFrom(response2.getClass()));
// check that the JobManager has 2 TaskManagers registered
Future<Object> numTaskManagersFuture = jobManager.ask(JobManagerMessages.getRequestNumberRegisteredTaskManager(), timeout);
Integer count = (Integer) Await.result(numTaskManagersFuture, timeout);
assertEquals(2, count.intValue());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
stopActor(taskManager1);
stopActor(taskManager2);
stopActor(jobManager);
}
}
};
}
use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.
the class TaskManagerRegistrationTest method testTaskManagerResumesConnectAfterJobManagerFailure.
/**
* Validate that the TaskManager attempts to re-connect after it lost the connection
* to the JobManager.
*/
@Test
public void testTaskManagerResumesConnectAfterJobManagerFailure() {
new JavaTestKit(actorSystem) {
{
ActorGateway fakeJobManager1Gateway = null;
ActorGateway fakeJobManager2Gateway = null;
ActorGateway taskManagerGateway = null;
final String JOB_MANAGER_NAME = "ForwardingJobManager";
try {
fakeJobManager1Gateway = TestingUtils.createForwardingActor(actorSystem, getTestActor(), Option.apply(JOB_MANAGER_NAME));
final ActorGateway fakeJM1Gateway = fakeJobManager1Gateway;
// we make the test actor (the test kit) the JobManager to intercept
// the messages
taskManagerGateway = createTaskManager(actorSystem, fakeJobManager1Gateway, config, true, false);
final ActorGateway tm = taskManagerGateway;
// validate initial registration
new Within(timeout) {
@Override
protected void run() {
// the TaskManager should try to register
expectMsgClass(RegisterTaskManager.class);
// we accept the registration
tm.tell(new AcknowledgeRegistration(new InstanceID(), 45234), fakeJM1Gateway);
}
};
// kill the first forwarding JobManager
watch(fakeJobManager1Gateway.actor());
stopActor(fakeJobManager1Gateway.actor());
final ActorGateway gateway = fakeJobManager1Gateway;
new Within(timeout) {
@Override
protected void run() {
Object message = null;
// are queued up in the testing actor's mailbox
while (message == null || !(message instanceof Terminated)) {
message = receiveOne(timeout);
}
Terminated terminatedMessage = (Terminated) message;
assertEquals(gateway.actor(), terminatedMessage.actor());
}
};
fakeJobManager1Gateway = null;
// now start the second fake JobManager and expect that
// the TaskManager registers again
// the second fake JM needs to have the same actor URL
// since we cannot reliably wait until the actor is unregistered (name is
// available again) we loop with multiple tries for 20 seconds
long deadline = 20000000000L + System.nanoTime();
do {
try {
fakeJobManager2Gateway = TestingUtils.createForwardingActor(actorSystem, getTestActor(), Option.apply(JOB_MANAGER_NAME));
} catch (InvalidActorNameException e) {
// wait and retry
Thread.sleep(100);
}
} while (fakeJobManager2Gateway == null && System.nanoTime() < deadline);
final ActorGateway fakeJM2GatewayClosure = fakeJobManager2Gateway;
// expect the next registration
new Within(timeout) {
@Override
protected void run() {
expectMsgClass(RegisterTaskManager.class);
// we accept the registration
tm.tell(new AcknowledgeRegistration(new InstanceID(), 45234), fakeJM2GatewayClosure);
}
};
} catch (Throwable e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
stopActor(taskManagerGateway);
stopActor(fakeJobManager1Gateway);
stopActor(fakeJobManager2Gateway);
}
}
};
}
use of org.apache.flink.runtime.instance.ActorGateway in project flink by apache.
the class TaskManagerRegistrationTest method testCheckForValidRegistrationSessionIDs.
@Test
public void testCheckForValidRegistrationSessionIDs() {
new JavaTestKit(actorSystem) {
{
ActorGateway taskManagerGateway = null;
try {
// we make the test actor (the test kit) the JobManager to intercept
// the messages
taskManagerGateway = createTaskManager(actorSystem, getTestActor(), config, true, false);
final ActorRef taskManager = taskManagerGateway.actor();
final UUID falseLeaderSessionID = UUID.randomUUID();
final UUID trueLeaderSessionID = null;
new Within(timeout) {
@Override
protected void run() {
taskManager.tell(TaskManagerMessages.getNotifyWhenRegisteredAtJobManagerMessage(), getTestActor());
// the TaskManager should try to register
LeaderSessionMessage lsm = expectMsgClass(LeaderSessionMessage.class);
assertTrue(lsm.leaderSessionID() == trueLeaderSessionID);
assertTrue(lsm.message() instanceof RegisterTaskManager);
final ActorRef tm = getLastSender();
// This AcknowledgeRegistration message should be discarded because the
// registration session ID is wrong
tm.tell(new LeaderSessionMessage(falseLeaderSessionID, new AcknowledgeRegistration(new InstanceID(), 1)), getTestActor());
// Valid AcknowledgeRegistration message
tm.tell(new LeaderSessionMessage(trueLeaderSessionID, new AcknowledgeRegistration(new InstanceID(), 1)), getTestActor());
Object message = null;
Object confirmMessageClass = TaskManagerMessages.getRegisteredAtJobManagerMessage().getClass();
while (message == null || !(message.getClass().equals(confirmMessageClass))) {
message = receiveOne(TestingUtils.TESTING_DURATION());
}
tm.tell(JobManagerMessages.getRequestLeaderSessionID(), getTestActor());
expectMsgEquals(new JobManagerMessages.ResponseLeaderSessionID(trueLeaderSessionID));
}
};
} catch (Throwable e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
stopActor(taskManagerGateway);
}
}
};
}
Aggregations