use of org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph in project flink by apache.
the class BackPressureStatsTrackerITCase method testBackPressuredProducer.
/**
* Tests a simple fake-back pressured task. Back pressure is assumed when
* sampled stack traces are in blocking buffer requests.
*/
@Test
public void testBackPressuredProducer() throws Exception {
new JavaTestKit(testActorSystem) {
{
final FiniteDuration deadline = new FiniteDuration(60, TimeUnit.SECONDS);
// The JobGraph
final JobGraph jobGraph = new JobGraph();
final int parallelism = 4;
final JobVertex task = new JobVertex("Task");
task.setInvokableClass(BackPressuredTask.class);
task.setParallelism(parallelism);
jobGraph.addVertex(task);
ActorGateway jobManger = null;
ActorGateway taskManager = null;
//
// 1) Consume all buffers at first (no buffers for the test task)
//
testBufferPool = networkBufferPool.createBufferPool(1, Integer.MAX_VALUE);
final List<Buffer> buffers = new ArrayList<>();
while (true) {
Buffer buffer = testBufferPool.requestBuffer();
if (buffer != null) {
buffers.add(buffer);
} else {
break;
}
}
try {
jobManger = TestingUtils.createJobManager(testActorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), new Configuration());
final Configuration config = new Configuration();
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, parallelism);
taskManager = TestingUtils.createTaskManager(testActorSystem, jobManger, config, true, true);
final ActorGateway jm = jobManger;
new Within(deadline) {
@Override
protected void run() {
try {
ActorGateway testActor = new AkkaActorGateway(getTestActor(), null);
// Submit the job and wait until it is running
JobClient.submitJobDetached(jm, config, jobGraph, deadline, ClassLoader.getSystemClassLoader());
jm.tell(new WaitForAllVerticesToBeRunning(jobGraph.getJobID()), testActor);
expectMsgEquals(new AllVerticesRunning(jobGraph.getJobID()));
// Get the ExecutionGraph
jm.tell(new RequestExecutionGraph(jobGraph.getJobID()), testActor);
ExecutionGraphFound executionGraphResponse = expectMsgClass(ExecutionGraphFound.class);
ExecutionGraph executionGraph = (ExecutionGraph) executionGraphResponse.executionGraph();
ExecutionJobVertex vertex = executionGraph.getJobVertex(task.getID());
StackTraceSampleCoordinator coordinator = new StackTraceSampleCoordinator(testActorSystem.dispatcher(), 60000);
// Verify back pressure (clean up interval can be ignored)
BackPressureStatsTracker statsTracker = new BackPressureStatsTracker(coordinator, 100 * 1000, 20, Time.milliseconds(10L));
int numAttempts = 10;
int nextSampleId = 0;
// the buffer.
for (int attempt = 0; attempt < numAttempts; attempt++) {
try {
OperatorBackPressureStats stats = triggerStatsSample(statsTracker, vertex);
assertEquals(nextSampleId + attempt, stats.getSampleId());
assertEquals(parallelism, stats.getNumberOfSubTasks());
assertEquals(1.0, stats.getMaxBackPressureRatio(), 0.0);
for (int i = 0; i < parallelism; i++) {
assertEquals(1.0, stats.getBackPressureRatio(i), 0.0);
}
nextSampleId = stats.getSampleId() + 1;
break;
} catch (Throwable t) {
if (attempt == numAttempts - 1) {
throw t;
} else {
Thread.sleep(500);
}
}
}
//
for (Buffer buf : buffers) {
buf.recycle();
}
// grab them and then immediately release them.
while (testBufferPool.getNumberOfAvailableMemorySegments() < 100) {
Thread.sleep(100);
}
// Verify that no task is back pressured any more.
for (int attempt = 0; attempt < numAttempts; attempt++) {
try {
OperatorBackPressureStats stats = triggerStatsSample(statsTracker, vertex);
assertEquals(nextSampleId + attempt, stats.getSampleId());
assertEquals(parallelism, stats.getNumberOfSubTasks());
// Verify that no task is back pressured
for (int i = 0; i < parallelism; i++) {
assertEquals(0.0, stats.getBackPressureRatio(i), 0.0);
}
break;
} catch (Throwable t) {
if (attempt == numAttempts - 1) {
throw t;
} else {
Thread.sleep(500);
}
}
}
// Shut down
jm.tell(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobGraph.getJobID()), testActor);
// Cancel job
jm.tell(new JobManagerMessages.CancelJob(jobGraph.getJobID()));
// Response to removal notification
expectMsgEquals(true);
//
// 3) Trigger stats for archived job
//
statsTracker.invalidateOperatorStatsCache();
assertFalse("Unexpected trigger", statsTracker.triggerStackTraceSample(vertex));
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
};
} finally {
TestingUtils.stopActor(jobManger);
TestingUtils.stopActor(taskManager);
for (Buffer buf : buffers) {
buf.recycle();
}
testBufferPool.lazyDestroy();
}
}
};
}
use of org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph in project flink by apache.
the class JobManagerTest method testRequestPartitionStateUnregisteredExecution.
/**
* Tests the JobManager response when the execution is not registered with
* the ExecutionGraph.
*/
@Test
public void testRequestPartitionStateUnregisteredExecution() throws Exception {
new JavaTestKit(system) {
{
new Within(duration("15 seconds")) {
@Override
protected void run() {
// Setup
TestingCluster cluster = null;
try {
cluster = startTestingCluster(4, 1, DEFAULT_AKKA_ASK_TIMEOUT());
final IntermediateDataSetID rid = new IntermediateDataSetID();
// Create a task
final JobVertex sender = new JobVertex("Sender");
sender.setParallelism(1);
// just finish
sender.setInvokableClass(NoOpInvokable.class);
sender.createAndAddResultDataSet(rid, PIPELINED);
final JobVertex sender2 = new JobVertex("Blocking Sender");
sender2.setParallelism(1);
// just block
sender2.setInvokableClass(BlockingNoOpInvokable.class);
sender2.createAndAddResultDataSet(new IntermediateDataSetID(), PIPELINED);
final JobGraph jobGraph = new JobGraph("Fast finishing producer test job", sender, sender2);
final JobID jid = jobGraph.getJobID();
final ActorGateway jobManagerGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
// we can set the leader session ID to None because we don't use this gateway to send messages
final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), null);
// Submit the job and wait for all vertices to be running
jobManagerGateway.tell(new SubmitJob(jobGraph, ListeningBehaviour.EXECUTION_RESULT), testActorGateway);
expectMsgClass(JobSubmitSuccess.class);
jobManagerGateway.tell(new WaitForAllVerticesToBeRunningOrFinished(jid), testActorGateway);
expectMsgClass(AllVerticesRunning.class);
Future<Object> egFuture = jobManagerGateway.ask(new RequestExecutionGraph(jobGraph.getJobID()), remaining());
ExecutionGraphFound egFound = (ExecutionGraphFound) Await.result(egFuture, remaining());
ExecutionGraph eg = (ExecutionGraph) egFound.executionGraph();
ExecutionVertex vertex = eg.getJobVertex(sender.getID()).getTaskVertices()[0];
while (vertex.getExecutionState() != ExecutionState.FINISHED) {
Thread.sleep(1);
}
IntermediateResultPartition partition = vertex.getProducedPartitions().values().iterator().next();
ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), vertex.getCurrentExecutionAttempt().getAttemptId());
// Producer finished, request state
Object request = new RequestPartitionProducerState(jid, rid, partitionId);
Future<ExecutionState> producerStateFuture = jobManagerGateway.ask(request, getRemainingTime()).mapTo(ClassTag$.MODULE$.<ExecutionState>apply(ExecutionState.class));
assertEquals(ExecutionState.FINISHED, Await.result(producerStateFuture, getRemainingTime()));
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
};
}
};
}
use of org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph in project flink by apache.
the class JobManagerTest method testRequestPartitionState.
/**
* Tests responses to partition state requests.
*/
@Test
public void testRequestPartitionState() throws Exception {
new JavaTestKit(system) {
{
new Within(duration("15 seconds")) {
@Override
protected void run() {
// Setup
TestingCluster cluster = null;
try {
cluster = startTestingCluster(2, 1, DEFAULT_AKKA_ASK_TIMEOUT());
final IntermediateDataSetID rid = new IntermediateDataSetID();
// Create a task
final JobVertex sender = new JobVertex("Sender");
sender.setParallelism(1);
// just block
sender.setInvokableClass(BlockingNoOpInvokable.class);
sender.createAndAddResultDataSet(rid, PIPELINED);
final JobGraph jobGraph = new JobGraph("Blocking test job", sender);
final JobID jid = jobGraph.getJobID();
final ActorGateway jobManagerGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
// we can set the leader session ID to None because we don't use this gateway to send messages
final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), null);
// Submit the job and wait for all vertices to be running
jobManagerGateway.tell(new SubmitJob(jobGraph, ListeningBehaviour.EXECUTION_RESULT), testActorGateway);
expectMsgClass(JobSubmitSuccess.class);
jobManagerGateway.tell(new WaitForAllVerticesToBeRunningOrFinished(jid), testActorGateway);
expectMsgClass(AllVerticesRunning.class);
// This is the mock execution ID of the task requesting the state of the partition
final ExecutionAttemptID receiver = new ExecutionAttemptID();
// Request the execution graph to get the runtime info
jobManagerGateway.tell(new RequestExecutionGraph(jid), testActorGateway);
final ExecutionGraph eg = (ExecutionGraph) expectMsgClass(ExecutionGraphFound.class).executionGraph();
final ExecutionVertex vertex = eg.getJobVertex(sender.getID()).getTaskVertices()[0];
final IntermediateResultPartition partition = vertex.getProducedPartitions().values().iterator().next();
final ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), vertex.getCurrentExecutionAttempt().getAttemptId());
// - The test ----------------------------------------------------------------------
// 1. All execution states
RequestPartitionProducerState request = new RequestPartitionProducerState(jid, rid, partitionId);
for (ExecutionState state : ExecutionState.values()) {
ExecutionGraphTestUtils.setVertexState(vertex, state);
Future<ExecutionState> futurePartitionState = jobManagerGateway.ask(request, getRemainingTime()).mapTo(ClassTag$.MODULE$.<ExecutionState>apply(ExecutionState.class));
ExecutionState resp = Await.result(futurePartitionState, getRemainingTime());
assertEquals(state, resp);
}
// 2. Non-existing execution
request = new RequestPartitionProducerState(jid, rid, new ResultPartitionID());
Future<?> futurePartitionState = jobManagerGateway.ask(request, getRemainingTime());
try {
Await.result(futurePartitionState, getRemainingTime());
fail("Did not fail with expected RuntimeException");
} catch (RuntimeException e) {
assertEquals(IllegalArgumentException.class, e.getCause().getClass());
}
// 3. Non-existing job
request = new RequestPartitionProducerState(new JobID(), rid, new ResultPartitionID());
futurePartitionState = jobManagerGateway.ask(request, getRemainingTime());
try {
Await.result(futurePartitionState, getRemainingTime());
fail("Did not fail with expected IllegalArgumentException");
} catch (IllegalArgumentException ignored) {
}
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
};
}
};
}
use of org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph in project flink by apache.
the class JobManagerTest method testRequestPartitionStateMoreRecentExecutionAttempt.
/**
* Tests the JobManager response when the execution is not registered with
* the ExecutionGraph anymore and a new execution attempt is available.
*/
@Test
public void testRequestPartitionStateMoreRecentExecutionAttempt() throws Exception {
new JavaTestKit(system) {
{
new Within(duration("15 seconds")) {
@Override
protected void run() {
// Setup
TestingCluster cluster = null;
try {
cluster = startTestingCluster(4, 1, DEFAULT_AKKA_ASK_TIMEOUT());
final IntermediateDataSetID rid = new IntermediateDataSetID();
// Create a task
final JobVertex sender = new JobVertex("Sender");
sender.setParallelism(1);
// just finish
sender.setInvokableClass(NoOpInvokable.class);
sender.createAndAddResultDataSet(rid, PIPELINED);
final JobVertex sender2 = new JobVertex("Blocking Sender");
sender2.setParallelism(1);
// just block
sender2.setInvokableClass(BlockingNoOpInvokable.class);
sender2.createAndAddResultDataSet(new IntermediateDataSetID(), PIPELINED);
final JobGraph jobGraph = new JobGraph("Fast finishing producer test job", sender, sender2);
final JobID jid = jobGraph.getJobID();
final ActorGateway jobManagerGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
// we can set the leader session ID to None because we don't use this gateway to send messages
final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), null);
// Submit the job and wait for all vertices to be running
jobManagerGateway.tell(new SubmitJob(jobGraph, ListeningBehaviour.EXECUTION_RESULT), testActorGateway);
expectMsgClass(JobManagerMessages.JobSubmitSuccess.class);
jobManagerGateway.tell(new WaitForAllVerticesToBeRunningOrFinished(jid), testActorGateway);
expectMsgClass(TestingJobManagerMessages.AllVerticesRunning.class);
Future<Object> egFuture = jobManagerGateway.ask(new RequestExecutionGraph(jobGraph.getJobID()), remaining());
ExecutionGraphFound egFound = (ExecutionGraphFound) Await.result(egFuture, remaining());
ExecutionGraph eg = (ExecutionGraph) egFound.executionGraph();
ExecutionVertex vertex = eg.getJobVertex(sender.getID()).getTaskVertices()[0];
while (vertex.getExecutionState() != ExecutionState.FINISHED) {
Thread.sleep(1);
}
IntermediateResultPartition partition = vertex.getProducedPartitions().values().iterator().next();
ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), vertex.getCurrentExecutionAttempt().getAttemptId());
// Reset execution => new execution attempt
vertex.resetForNewExecution();
// Producer finished, request state
Object request = new JobManagerMessages.RequestPartitionProducerState(jid, rid, partitionId);
Future<?> producerStateFuture = jobManagerGateway.ask(request, getRemainingTime());
try {
Await.result(producerStateFuture, getRemainingTime());
fail("Did not fail with expected Exception");
} catch (PartitionProducerDisposedException ignored) {
}
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
};
}
};
}
use of org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph in project flink by apache.
the class StackTraceSampleCoordinatorITCase method testTaskClearedWhileSampling.
/**
* Tests that a cleared task is answered with a partial success response.
*/
@Test
public void testTaskClearedWhileSampling() throws Exception {
new JavaTestKit(testActorSystem) {
{
final FiniteDuration deadline = new FiniteDuration(60, TimeUnit.SECONDS);
// The JobGraph
final JobGraph jobGraph = new JobGraph();
final int parallelism = 1;
final JobVertex task = new JobVertex("Task");
task.setInvokableClass(BlockingNoOpInvokable.class);
task.setParallelism(parallelism);
jobGraph.addVertex(task);
ActorGateway jobManger = null;
ActorGateway taskManager = null;
try {
jobManger = TestingUtils.createJobManager(testActorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), new Configuration());
final Configuration config = new Configuration();
config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, parallelism);
taskManager = TestingUtils.createTaskManager(testActorSystem, jobManger, config, true, true);
final ActorGateway jm = jobManger;
new Within(deadline) {
@Override
protected void run() {
try {
ActorGateway testActor = new AkkaActorGateway(getTestActor(), null);
int maxAttempts = 10;
int sleepTime = 100;
for (int i = 0; i < maxAttempts; i++, sleepTime *= 2) {
// Submit the job and wait until it is running
JobClient.submitJobDetached(jm, config, jobGraph, deadline, ClassLoader.getSystemClassLoader());
jm.tell(new WaitForAllVerticesToBeRunning(jobGraph.getJobID()), testActor);
expectMsgEquals(new AllVerticesRunning(jobGraph.getJobID()));
// Get the ExecutionGraph
jm.tell(new RequestExecutionGraph(jobGraph.getJobID()), testActor);
ExecutionGraphFound executionGraphResponse = expectMsgClass(ExecutionGraphFound.class);
ExecutionGraph executionGraph = (ExecutionGraph) executionGraphResponse.executionGraph();
ExecutionJobVertex vertex = executionGraph.getJobVertex(task.getID());
StackTraceSampleCoordinator coordinator = new StackTraceSampleCoordinator(testActorSystem.dispatcher(), 60000);
Future<StackTraceSample> sampleFuture = coordinator.triggerStackTraceSample(vertex.getTaskVertices(), // sampling.
21474700 * 100, Time.milliseconds(10L), 0);
// Wait before cancelling so that some samples
// are actually taken.
Thread.sleep(sleepTime);
// Cancel job
scala.concurrent.Future<?> removeFuture = jm.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobGraph.getJobID()), remaining());
jm.tell(new JobManagerMessages.CancelJob(jobGraph.getJobID()));
try {
// Throws Exception on failure
sampleFuture.get(remaining().toMillis(), TimeUnit.MILLISECONDS);
// partial result.
break;
} catch (Throwable t) {
// We were too fast in cancelling the job.
// Fall through and retry.
} finally {
Await.ready(removeFuture, remaining());
}
}
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
};
} finally {
TestingUtils.stopActor(jobManger);
TestingUtils.stopActor(taskManager);
}
}
};
}
Aggregations