use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.
the class TaskCancelAsyncProducerConsumerITCase method testCancelAsyncProducerAndConsumer.
/**
* Tests that a task waiting on an async producer/consumer that is stuck
* in a blocking buffer request can be properly cancelled.
*
* <p>This is currently required for the Flink Kafka sources, which spawn
* a separate Thread consuming from Kafka and producing the intermediate
* streams in the spawned Thread instead of the main task Thread.
*/
@Test
public void testCancelAsyncProducerAndConsumer() throws Exception {
Deadline deadline = new FiniteDuration(2, TimeUnit.MINUTES).fromNow();
TestingCluster flink = null;
try {
// Cluster
Configuration config = new Configuration();
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 1);
config.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SEGMENT_SIZE_KEY, 4096);
config.setInteger(ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY, 8);
flink = new TestingCluster(config, true);
flink.start();
// Job with async producer and consumer
JobVertex producer = new JobVertex("AsyncProducer");
producer.setParallelism(1);
producer.setInvokableClass(AsyncProducer.class);
JobVertex consumer = new JobVertex("AsyncConsumer");
consumer.setParallelism(1);
consumer.setInvokableClass(AsyncConsumer.class);
consumer.connectNewDataSetAsInput(producer, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
SlotSharingGroup slot = new SlotSharingGroup(producer.getID(), consumer.getID());
producer.setSlotSharingGroup(slot);
consumer.setSlotSharingGroup(slot);
JobGraph jobGraph = new JobGraph(producer, consumer);
// Submit job and wait until running
ActorGateway jobManager = flink.getLeaderGateway(deadline.timeLeft());
flink.submitJobDetached(jobGraph);
Object msg = new WaitForAllVerticesToBeRunning(jobGraph.getJobID());
Future<?> runningFuture = jobManager.ask(msg, deadline.timeLeft());
Await.ready(runningFuture, deadline.timeLeft());
// Wait for blocking requests, cancel and wait for cancellation
msg = new NotifyWhenJobStatus(jobGraph.getJobID(), JobStatus.CANCELED);
Future<?> cancelledFuture = jobManager.ask(msg, deadline.timeLeft());
boolean producerBlocked = false;
for (int i = 0; i < 50; i++) {
Thread thread = ASYNC_PRODUCER_THREAD;
if (thread != null && thread.isAlive()) {
StackTraceElement[] stackTrace = thread.getStackTrace();
producerBlocked = isInBlockingBufferRequest(stackTrace);
}
if (producerBlocked) {
break;
} else {
// Retry
Thread.sleep(500);
}
}
// Verify that async producer is in blocking request
assertTrue("Producer thread is not blocked: " + Arrays.toString(ASYNC_CONSUMER_THREAD.getStackTrace()), producerBlocked);
boolean consumerWaiting = false;
for (int i = 0; i < 50; i++) {
Thread thread = ASYNC_CONSUMER_THREAD;
if (thread != null && thread.isAlive()) {
consumerWaiting = thread.getState() == Thread.State.WAITING;
}
if (consumerWaiting) {
break;
} else {
// Retry
Thread.sleep(500);
}
}
// Verify that async consumer is in blocking request
assertTrue("Consumer thread is not blocked.", consumerWaiting);
msg = new CancelJob(jobGraph.getJobID());
Future<?> cancelFuture = jobManager.ask(msg, deadline.timeLeft());
Await.ready(cancelFuture, deadline.timeLeft());
Await.ready(cancelledFuture, deadline.timeLeft());
// Verify the expected Exceptions
assertNotNull(ASYNC_PRODUCER_EXCEPTION);
assertEquals(IllegalStateException.class, ASYNC_PRODUCER_EXCEPTION.getClass());
assertNotNull(ASYNC_CONSUMER_EXCEPTION);
assertEquals(IllegalStateException.class, ASYNC_CONSUMER_EXCEPTION.getClass());
} finally {
if (flink != null) {
flink.shutdown();
}
}
}
use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.
the class RescalingITCase method setup.
@BeforeClass
public static void setup() throws Exception {
Configuration config = new Configuration();
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTaskManagers);
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, slotsPerTaskManager);
final File checkpointDir = temporaryFolder.newFolder();
final File savepointDir = temporaryFolder.newFolder();
config.setString(CoreOptions.STATE_BACKEND, "filesystem");
config.setString(FsStateBackendFactory.CHECKPOINT_DIRECTORY_URI_CONF_KEY, checkpointDir.toURI().toString());
config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, savepointDir.toURI().toString());
cluster = new TestingCluster(config);
cluster.start();
}
use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.
the class JobManagerTest method testStopSignalFail.
@Test
public void testStopSignalFail() throws Exception {
new JavaTestKit(system) {
{
new Within(duration("15 seconds")) {
@Override
protected void run() {
// Setup
TestingCluster cluster = null;
try {
cluster = startTestingCluster(2, 1, DEFAULT_AKKA_ASK_TIMEOUT());
// Create a task
final JobVertex sender = new JobVertex("Sender");
sender.setParallelism(1);
// just block
sender.setInvokableClass(BlockingNoOpInvokable.class);
final JobGraph jobGraph = new JobGraph("Non-Stoppable batching test job", sender);
final JobID jid = jobGraph.getJobID();
final ActorGateway jobManagerGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
// we can set the leader session ID to None because we don't use this gateway to send messages
final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), null);
// Submit the job and wait for all vertices to be running
jobManagerGateway.tell(new SubmitJob(jobGraph, ListeningBehaviour.EXECUTION_RESULT), testActorGateway);
expectMsgClass(JobSubmitSuccess.class);
jobManagerGateway.tell(new WaitForAllVerticesToBeRunning(jid), testActorGateway);
expectMsgClass(AllVerticesRunning.class);
jobManagerGateway.tell(new StopJob(jid), testActorGateway);
// - The test ----------------------------------------------------------------------
expectMsgClass(StoppingFailure.class);
jobManagerGateway.tell(new RequestExecutionGraph(jid), testActorGateway);
expectMsgClass(ExecutionGraphFound.class);
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
};
}
};
}
use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.
the class PartialConsumePipelinedResultTest method setUp.
@BeforeClass
public static void setUp() throws Exception {
final Configuration config = new Configuration();
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, NUMBER_OF_TMS);
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, NUMBER_OF_SLOTS_PER_TM);
config.setString(ConfigConstants.AKKA_ASK_TIMEOUT, TestingUtils.DEFAULT_AKKA_ASK_TIMEOUT());
config.setInteger(ConfigConstants.TASK_MANAGER_NETWORK_NUM_BUFFERS_KEY, NUMBER_OF_NETWORK_BUFFERS);
flink = new TestingCluster(config, true);
flink.start();
}
use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.
the class ZooKeeperLeaderElectionITCase method testJobExecutionOnClusterWithLeaderReelection.
/**
* Tests that a job can be executed after a new leader has been elected. For all except for the
* last leader, the job is blocking. The JobManager will be terminated while executing the
* blocking job. Once only one JobManager is left, it is checked that a non-blocking can be
* successfully executed.
*/
@Test
public void testJobExecutionOnClusterWithLeaderReelection() throws Exception {
int numJMs = 10;
int numTMs = 2;
int numSlotsPerTM = 3;
int parallelism = numTMs * numSlotsPerTM;
File rootFolder = tempFolder.getRoot();
Configuration configuration = ZooKeeperTestUtils.createZooKeeperHAConfig(zkServer.getConnectString(), rootFolder.getPath());
configuration.setInteger(ConfigConstants.LOCAL_NUMBER_JOB_MANAGER, numJMs);
configuration.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTMs);
configuration.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, numSlotsPerTM);
// we "effectively" disable the automatic RecoverAllJobs message and sent it manually to make
// sure that all TMs have registered to the JM prior to issueing the RecoverAllJobs message
configuration.setString(ConfigConstants.AKKA_ASK_TIMEOUT, AkkaUtils.INF_TIMEOUT().toString());
Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(true);
JobVertex sender = new JobVertex("sender");
JobVertex receiver = new JobVertex("receiver");
sender.setInvokableClass(Tasks.Sender.class);
receiver.setInvokableClass(Tasks.BlockingOnceReceiver.class);
sender.setParallelism(parallelism);
receiver.setParallelism(parallelism);
receiver.connectNewDataSetAsInput(sender, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
SlotSharingGroup slotSharingGroup = new SlotSharingGroup();
sender.setSlotSharingGroup(slotSharingGroup);
receiver.setSlotSharingGroup(slotSharingGroup);
final JobGraph graph = new JobGraph("Blocking test job", sender, receiver);
final TestingCluster cluster = new TestingCluster(configuration);
ActorSystem clientActorSystem = null;
Thread thread = null;
JobSubmitterRunnable jobSubmission = null;
try {
cluster.start();
clientActorSystem = cluster.startJobClientActorSystem(graph.getJobID());
final ActorSystem clientAS = clientActorSystem;
jobSubmission = new JobSubmitterRunnable(clientAS, cluster, graph);
thread = new Thread(jobSubmission);
thread.start();
Deadline deadline = timeout.$times(3).fromNow();
// Kill all JobManager except for two
for (int i = 0; i < numJMs; i++) {
ActorGateway jm = cluster.getLeaderGateway(deadline.timeLeft());
cluster.waitForTaskManagersToBeRegisteredAtJobManager(jm.actor());
// recover all jobs, sent manually
log.info("Sent recover all jobs manually to job manager {}.", jm.path());
jm.tell(JobManagerMessages.getRecoverAllJobs());
if (i < numJMs - 1) {
Future<Object> future = jm.ask(new WaitForAllVerticesToBeRunningOrFinished(graph.getJobID()), deadline.timeLeft());
Await.ready(future, deadline.timeLeft());
cluster.clearLeader();
if (i == numJMs - 2) {
Tasks.BlockingOnceReceiver$.MODULE$.blocking_$eq(false);
}
log.info("Kill job manager {}.", jm.path());
jm.tell(TestingJobManagerMessages.getDisablePostStop());
jm.tell(Kill.getInstance());
}
}
log.info("Waiting for submitter thread to terminate.");
thread.join(deadline.timeLeft().toMillis());
log.info("Submitter thread has terminated.");
if (thread.isAlive()) {
fail("The job submission thread did not stop (meaning it did not succeeded in" + "executing the test job.");
}
Await.result(jobSubmission.resultPromise.future(), deadline.timeLeft());
} finally {
if (clientActorSystem != null) {
cluster.shutdownJobClientActorSystem(clientActorSystem);
}
if (thread != null && thread.isAlive()) {
jobSubmission.finished = true;
}
cluster.stop();
}
}
Aggregations