use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.
the class JobManagerTest method testRequestPartitionStateMoreRecentExecutionAttempt.
/**
* Tests the JobManager response when the execution is not registered with
* the ExecutionGraph anymore and a new execution attempt is available.
*/
@Test
public void testRequestPartitionStateMoreRecentExecutionAttempt() throws Exception {
new JavaTestKit(system) {
{
new Within(duration("15 seconds")) {
@Override
protected void run() {
// Setup
TestingCluster cluster = null;
try {
cluster = startTestingCluster(4, 1, DEFAULT_AKKA_ASK_TIMEOUT());
final IntermediateDataSetID rid = new IntermediateDataSetID();
// Create a task
final JobVertex sender = new JobVertex("Sender");
sender.setParallelism(1);
// just finish
sender.setInvokableClass(NoOpInvokable.class);
sender.createAndAddResultDataSet(rid, PIPELINED);
final JobVertex sender2 = new JobVertex("Blocking Sender");
sender2.setParallelism(1);
// just block
sender2.setInvokableClass(BlockingNoOpInvokable.class);
sender2.createAndAddResultDataSet(new IntermediateDataSetID(), PIPELINED);
final JobGraph jobGraph = new JobGraph("Fast finishing producer test job", sender, sender2);
final JobID jid = jobGraph.getJobID();
final ActorGateway jobManagerGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
// we can set the leader session ID to None because we don't use this gateway to send messages
final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), null);
// Submit the job and wait for all vertices to be running
jobManagerGateway.tell(new SubmitJob(jobGraph, ListeningBehaviour.EXECUTION_RESULT), testActorGateway);
expectMsgClass(JobManagerMessages.JobSubmitSuccess.class);
jobManagerGateway.tell(new WaitForAllVerticesToBeRunningOrFinished(jid), testActorGateway);
expectMsgClass(TestingJobManagerMessages.AllVerticesRunning.class);
Future<Object> egFuture = jobManagerGateway.ask(new RequestExecutionGraph(jobGraph.getJobID()), remaining());
ExecutionGraphFound egFound = (ExecutionGraphFound) Await.result(egFuture, remaining());
ExecutionGraph eg = (ExecutionGraph) egFound.executionGraph();
ExecutionVertex vertex = eg.getJobVertex(sender.getID()).getTaskVertices()[0];
while (vertex.getExecutionState() != ExecutionState.FINISHED) {
Thread.sleep(1);
}
IntermediateResultPartition partition = vertex.getProducedPartitions().values().iterator().next();
ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), vertex.getCurrentExecutionAttempt().getAttemptId());
// Reset execution => new execution attempt
vertex.resetForNewExecution();
// Producer finished, request state
Object request = new JobManagerMessages.RequestPartitionProducerState(jid, rid, partitionId);
Future<?> producerStateFuture = jobManagerGateway.ask(request, getRemainingTime());
try {
Await.result(producerStateFuture, getRemainingTime());
fail("Did not fail with expected Exception");
} catch (PartitionProducerDisposedException ignored) {
}
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
};
}
};
}
use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.
the class JobManagerHAJobGraphRecoveryITCase method testJobPersistencyWhenJobManagerShutdown.
// ---------------------------------------------------------------------------------------------
/**
* Tests that the HA job is not cleaned up when the jobmanager is stopped.
*/
@Test
public void testJobPersistencyWhenJobManagerShutdown() throws Exception {
Configuration config = ZooKeeperTestUtils.createZooKeeperHAConfig(ZooKeeper.getConnectString(), FileStateBackendBasePath.getPath());
// Configure the cluster
config.setInteger(ConfigConstants.LOCAL_NUMBER_JOB_MANAGER, 1);
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
TestingCluster flink = new TestingCluster(config, false, false);
try {
final Deadline deadline = TestTimeOut.fromNow();
// Start the JobManager and TaskManager
flink.start(true);
JobGraph jobGraph = createBlockingJobGraph();
// Set restart strategy to guard against shut down races.
// If the TM fails before the JM, it might happen that the
// Job is failed, leading to state removal.
ExecutionConfig ec = new ExecutionConfig();
ec.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 100));
jobGraph.setExecutionConfig(ec);
ActorGateway jobManager = flink.getLeaderGateway(deadline.timeLeft());
// Submit the job
jobManager.tell(new SubmitJob(jobGraph, ListeningBehaviour.DETACHED));
// Wait for the job to start
JobManagerActorTestUtils.waitForJobStatus(jobGraph.getJobID(), JobStatus.RUNNING, jobManager, deadline.timeLeft());
} finally {
flink.shutdown();
}
// verify that the persisted job data has not been removed from ZooKeeper when the JM has
// been shutdown
verifyRecoveryState(config);
}
use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.
the class JobManagerHAJobGraphRecoveryITCase method testSubmitJobToNonLeader.
/**
* Tests that submissions to non-leaders are handled.
*/
@Test
public void testSubmitJobToNonLeader() throws Exception {
Configuration config = ZooKeeperTestUtils.createZooKeeperHAConfig(ZooKeeper.getConnectString(), FileStateBackendBasePath.getPath());
// Configure the cluster
config.setInteger(ConfigConstants.LOCAL_NUMBER_JOB_MANAGER, 2);
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
TestingCluster flink = new TestingCluster(config, false, false);
try {
final Deadline deadline = TestTimeOut.fromNow();
// Start the JobManager and TaskManager
flink.start(true);
JobGraph jobGraph = createBlockingJobGraph();
List<ActorRef> bothJobManagers = flink.getJobManagersAsJava();
ActorGateway leadingJobManager = flink.getLeaderGateway(deadline.timeLeft());
ActorGateway nonLeadingJobManager;
if (bothJobManagers.get(0).equals(leadingJobManager.actor())) {
nonLeadingJobManager = new AkkaActorGateway(bothJobManagers.get(1), null);
} else {
nonLeadingJobManager = new AkkaActorGateway(bothJobManagers.get(0), null);
}
log.info("Leading job manager: " + leadingJobManager);
log.info("Non-leading job manager: " + nonLeadingJobManager);
// Submit the job
nonLeadingJobManager.tell(new SubmitJob(jobGraph, ListeningBehaviour.DETACHED));
log.info("Submitted job graph to " + nonLeadingJobManager);
// Wait for the job to start. We are asking the *leading** JM here although we've
// submitted the job to the non-leading JM. This is the behaviour under test.
JobManagerActorTestUtils.waitForJobStatus(jobGraph.getJobID(), JobStatus.RUNNING, leadingJobManager, deadline.timeLeft());
log.info("Wait that the non-leader removes the submitted job.");
// Make sure that the **non-leading** JM has actually removed the job graph from its
// local state.
boolean success = false;
while (!success && deadline.hasTimeLeft()) {
JobStatusResponse jobStatusResponse = JobManagerActorTestUtils.requestJobStatus(jobGraph.getJobID(), nonLeadingJobManager, deadline.timeLeft());
if (jobStatusResponse instanceof JobManagerMessages.JobNotFound) {
success = true;
} else {
log.info(((JobManagerMessages.CurrentJobStatus) jobStatusResponse).status().toString());
Thread.sleep(100);
}
}
if (!success) {
fail("Non-leading JM was still holding reference to the job graph.");
}
Future<Object> jobRemoved = leadingJobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobGraph.getJobID()), deadline.timeLeft());
leadingJobManager.tell(new JobManagerMessages.CancelJob(jobGraph.getJobID()));
Await.ready(jobRemoved, deadline.timeLeft());
} finally {
flink.shutdown();
}
// Verify that everything is clean
verifyCleanRecoveryState(config);
}
use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.
the class ZooKeeperLeaderElectionITCase method testTaskManagerRegistrationAtReelectedLeader.
/**
* Tests that the TaskManagers successfully register at the new leader once the old leader
* is terminated.
*/
@Test
public void testTaskManagerRegistrationAtReelectedLeader() throws Exception {
File rootFolder = tempFolder.getRoot();
Configuration configuration = ZooKeeperTestUtils.createZooKeeperHAConfig(zkServer.getConnectString(), rootFolder.getPath());
int numJMs = 10;
int numTMs = 3;
configuration.setInteger(ConfigConstants.LOCAL_NUMBER_JOB_MANAGER, numJMs);
configuration.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTMs);
TestingCluster cluster = new TestingCluster(configuration);
try {
cluster.start();
for (int i = 0; i < numJMs; i++) {
ActorGateway leadingJM = cluster.getLeaderGateway(timeout);
cluster.waitForTaskManagersToBeRegisteredAtJobManager(leadingJM.actor());
Future<Object> registeredTMs = leadingJM.ask(JobManagerMessages.getRequestNumberRegisteredTaskManager(), timeout);
int numRegisteredTMs = (Integer) Await.result(registeredTMs, timeout);
assertEquals(numTMs, numRegisteredTMs);
cluster.clearLeader();
leadingJM.tell(PoisonPill.getInstance());
}
} finally {
cluster.stop();
}
}
use of org.apache.flink.runtime.testingUtils.TestingCluster in project flink by apache.
the class AbstractQueryableStateITCase method setup.
@BeforeClass
public static void setup() {
try {
Configuration config = new Configuration();
config.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, 4);
config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, NUM_TMS);
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, NUM_SLOTS_PER_TM);
config.setInteger(QueryableStateOptions.CLIENT_NETWORK_THREADS, 1);
config.setBoolean(QueryableStateOptions.SERVER_ENABLE, true);
config.setInteger(QueryableStateOptions.SERVER_NETWORK_THREADS, 1);
cluster = new TestingCluster(config, false);
cluster.start(true);
TEST_ACTOR_SYSTEM = AkkaUtils.createDefaultActorSystem();
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations