use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.
the class TaskExecutorTest method testSubmitTaskBeforeAcceptSlot.
/**
* This tests task executor receive SubmitTask before OfferSlot response.
*/
@Test
public void testSubmitTaskBeforeAcceptSlot() throws Exception {
final JobID jobId = new JobID();
final TestingSerialRpcService rpc = new TestingSerialRpcService();
final Configuration configuration = new Configuration();
final TaskManagerConfiguration taskManagerConfiguration = TaskManagerConfiguration.fromConfiguration(configuration);
final ResourceID resourceId = new ResourceID("foobar");
final TaskManagerLocation taskManagerLocation = new TaskManagerLocation(resourceId, InetAddress.getLoopbackAddress(), 1234);
final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
final TimerService<AllocationID> timerService = mock(TimerService.class);
final TaskSlotTable taskSlotTable = new TaskSlotTable(Arrays.asList(mock(ResourceProfile.class), mock(ResourceProfile.class)), timerService);
final JobManagerTable jobManagerTable = new JobManagerTable();
final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation);
final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
final String resourceManagerAddress = "rm";
final UUID resourceManagerLeaderId = UUID.randomUUID();
final String jobManagerAddress = "jm";
final UUID jobManagerLeaderId = UUID.randomUUID();
final LeaderRetrievalService resourceManagerLeaderRetrievalService = new TestingLeaderRetrievalService(resourceManagerAddress, resourceManagerLeaderId);
final LeaderRetrievalService jobManagerLeaderRetrievalService = new TestingLeaderRetrievalService(jobManagerAddress, jobManagerLeaderId);
haServices.setResourceManagerLeaderRetriever(resourceManagerLeaderRetrievalService);
haServices.setJobMasterLeaderRetriever(jobId, jobManagerLeaderRetrievalService);
final ResourceManagerGateway resourceManagerGateway = mock(ResourceManagerGateway.class);
final InstanceID registrationId = new InstanceID();
when(resourceManagerGateway.registerTaskExecutor(eq(resourceManagerLeaderId), any(String.class), eq(resourceId), any(SlotReport.class), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new TaskExecutorRegistrationSuccess(registrationId, 1000L)));
final ResourceID jmResourceId = new ResourceID(jobManagerAddress);
final int blobPort = 42;
final AllocationID allocationId1 = new AllocationID();
final AllocationID allocationId2 = new AllocationID();
final SlotOffer offer1 = new SlotOffer(allocationId1, 0, ResourceProfile.UNKNOWN);
final JobMasterGateway jobMasterGateway = mock(JobMasterGateway.class);
when(jobMasterGateway.registerTaskManager(any(String.class), eq(taskManagerLocation), eq(jobManagerLeaderId), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new JMTMRegistrationSuccess(jmResourceId, blobPort)));
when(jobMasterGateway.getHostname()).thenReturn(jobManagerAddress);
rpc.registerGateway(resourceManagerAddress, resourceManagerGateway);
rpc.registerGateway(jobManagerAddress, jobMasterGateway);
final LibraryCacheManager libraryCacheManager = mock(LibraryCacheManager.class);
when(libraryCacheManager.getClassLoader(eq(jobId))).thenReturn(getClass().getClassLoader());
final JobManagerConnection jobManagerConnection = new JobManagerConnection(jobId, jmResourceId, jobMasterGateway, jobManagerLeaderId, mock(TaskManagerActions.class), mock(CheckpointResponder.class), libraryCacheManager, mock(ResultPartitionConsumableNotifier.class), mock(PartitionProducerStateChecker.class));
jobManagerTable.put(jobId, jobManagerConnection);
try {
final TaskExecutor taskManager = new TaskExecutor(taskManagerConfiguration, taskManagerLocation, rpc, mock(MemoryManager.class), mock(IOManager.class), mock(NetworkEnvironment.class), haServices, mock(HeartbeatServices.class, RETURNS_MOCKS), mock(MetricRegistry.class), mock(TaskManagerMetricGroup.class), mock(BroadcastVariableManager.class), mock(FileCache.class), taskSlotTable, jobManagerTable, jobLeaderService, testingFatalErrorHandler);
taskManager.start();
taskSlotTable.allocateSlot(0, jobId, allocationId1, Time.milliseconds(10000L));
taskSlotTable.allocateSlot(1, jobId, allocationId2, Time.milliseconds(10000L));
final JobVertexID jobVertexId = new JobVertexID();
JobInformation jobInformation = new JobInformation(jobId, name.getMethodName(), new SerializedValue<>(new ExecutionConfig()), new Configuration(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList());
TaskInformation taskInformation = new TaskInformation(jobVertexId, "test task", 1, 1, TestInvokable.class.getName(), new Configuration());
SerializedValue<JobInformation> serializedJobInformation = new SerializedValue<>(jobInformation);
SerializedValue<TaskInformation> serializedJobVertexInformation = new SerializedValue<>(taskInformation);
final TaskDeploymentDescriptor tdd = new TaskDeploymentDescriptor(serializedJobInformation, serializedJobVertexInformation, new ExecutionAttemptID(), allocationId1, 0, 0, 0, null, Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList());
CompletableFuture<Iterable<SlotOffer>> offerResultFuture = new FlinkCompletableFuture<>();
// submit task first and then return acceptance response
when(jobMasterGateway.offerSlots(any(ResourceID.class), any(Iterable.class), eq(jobManagerLeaderId), any(Time.class))).thenReturn(offerResultFuture);
// we have to add the job after the TaskExecutor, because otherwise the service has not
// been properly started. This will also offer the slots to the job master
jobLeaderService.addJob(jobId, jobManagerAddress);
verify(jobMasterGateway).offerSlots(any(ResourceID.class), any(Iterable.class), eq(jobManagerLeaderId), any(Time.class));
// submit the task without having acknowledge the offered slots
taskManager.submitTask(tdd, jobManagerLeaderId);
// acknowledge the offered slots
offerResultFuture.complete(Collections.singleton(offer1));
verify(resourceManagerGateway).notifySlotAvailable(eq(resourceManagerLeaderId), eq(registrationId), eq(new SlotID(resourceId, 1)));
assertTrue(taskSlotTable.existsActiveSlot(jobId, allocationId1));
assertFalse(taskSlotTable.existsActiveSlot(jobId, allocationId2));
assertTrue(taskSlotTable.isSlotFree(1));
// check if a concurrent error occurred
testingFatalErrorHandler.rethrowError();
} finally {
rpc.stopService();
}
}
use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.
the class TaskAsyncCallTest method createTask.
private static Task createTask() throws Exception {
LibraryCacheManager libCache = mock(LibraryCacheManager.class);
when(libCache.getClassLoader(any(JobID.class))).thenReturn(ClassLoader.getSystemClassLoader());
ResultPartitionManager partitionManager = mock(ResultPartitionManager.class);
ResultPartitionConsumableNotifier consumableNotifier = mock(ResultPartitionConsumableNotifier.class);
PartitionProducerStateChecker partitionProducerStateChecker = mock(PartitionProducerStateChecker.class);
Executor executor = mock(Executor.class);
NetworkEnvironment networkEnvironment = mock(NetworkEnvironment.class);
when(networkEnvironment.getResultPartitionManager()).thenReturn(partitionManager);
when(networkEnvironment.getDefaultIOMode()).thenReturn(IOManager.IOMode.SYNC);
when(networkEnvironment.createKvStateTaskRegistry(any(JobID.class), any(JobVertexID.class))).thenReturn(mock(TaskKvStateRegistry.class));
JobInformation jobInformation = new JobInformation(new JobID(), "Job Name", new SerializedValue<>(new ExecutionConfig()), new Configuration(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList());
TaskInformation taskInformation = new TaskInformation(new JobVertexID(), "Test Task", 1, 1, CheckpointsInOrderInvokable.class.getName(), new Configuration());
return new Task(jobInformation, taskInformation, new ExecutionAttemptID(), new AllocationID(), 0, 0, Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), 0, new TaskStateHandles(), mock(MemoryManager.class), mock(IOManager.class), networkEnvironment, mock(BroadcastVariableManager.class), mock(TaskManagerActions.class), mock(InputSplitProvider.class), mock(CheckpointResponder.class), libCache, mock(FileCache.class), new TestingTaskManagerRuntimeInfo(), mock(TaskMetricGroup.class), consumableNotifier, partitionProducerStateChecker, executor);
}
use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.
the class TaskManagerTest method testTriggerStackTraceSampleMessage.
// ------------------------------------------------------------------------
// Stack trace sample
// ------------------------------------------------------------------------
/**
* Tests sampling of task stack traces.
*/
@Test
@SuppressWarnings("unchecked")
public void testTriggerStackTraceSampleMessage() throws Exception {
new JavaTestKit(system) {
{
ActorGateway taskManagerActorGateway = null;
// We need this to be a JM that answers to update messages for
// robustness on Travis (if jobs need to be resubmitted in (4)).
ActorRef jm = system.actorOf(Props.create(new SimpleLookupJobManagerCreator(null)));
ActorGateway jobManagerActorGateway = new AkkaActorGateway(jm, null);
final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), leaderSessionID);
try {
final ActorGateway jobManager = jobManagerActorGateway;
final ActorGateway taskManager = TestingUtils.createTaskManager(system, jobManager, new Configuration(), true, false);
final JobID jobId = new JobID();
// Single blocking task
final TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(jobId, "Job", new JobVertexID(), new ExecutionAttemptID(), new SerializedValue<>(new ExecutionConfig()), "Task", 1, 0, 1, 0, new Configuration(), new Configuration(), BlockingNoOpInvokable.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList(), 0);
// Submit the task
new Within(d) {
@Override
protected void run() {
try {
// Make sure to register
Future<?> connectFuture = taskManager.ask(new TestingTaskManagerMessages.NotifyWhenRegisteredAtJobManager(jobManager.actor()), remaining());
Await.ready(connectFuture, remaining());
Future<Object> taskRunningFuture = taskManager.ask(new TestingTaskManagerMessages.NotifyWhenTaskIsRunning(tdd.getExecutionAttemptId()), timeout);
taskManager.tell(new SubmitTask(tdd));
Await.ready(taskRunningFuture, d);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
};
//
// 1) Trigger sample for non-existing task
//
new Within(d) {
@Override
protected void run() {
try {
ExecutionAttemptID taskId = new ExecutionAttemptID();
taskManager.tell(new TriggerStackTraceSample(112223, taskId, 100, timeD, 0), testActorGateway);
// Receive the expected message (heartbeat races possible)
Object[] msg = receiveN(1);
while (!(msg[0] instanceof Status.Failure)) {
msg = receiveN(1);
}
Status.Failure response = (Status.Failure) msg[0];
assertEquals(IllegalStateException.class, response.cause().getClass());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
};
//
// 2) Trigger sample for the blocking task
//
new Within(d) {
@Override
protected void run() {
boolean success = false;
Throwable lastError = null;
for (int i = 0; i < 100 && !success; i++) {
try {
int numSamples = 5;
taskManager.tell(new TriggerStackTraceSample(19230, tdd.getExecutionAttemptId(), numSamples, Time.milliseconds(100L), 0), testActorGateway);
// Receive the expected message (heartbeat races possible)
Object[] msg = receiveN(1);
while (!(msg[0] instanceof StackTraceSampleResponse)) {
msg = receiveN(1);
}
StackTraceSampleResponse response = (StackTraceSampleResponse) msg[0];
// ---- Verify response ----
assertEquals(19230, response.getSampleId());
assertEquals(tdd.getExecutionAttemptId(), response.getExecutionAttemptID());
List<StackTraceElement[]> traces = response.getSamples();
assertEquals("Number of samples", numSamples, traces.size());
for (StackTraceElement[] trace : traces) {
// Look for BlockingNoOpInvokable#invoke
for (StackTraceElement elem : trace) {
if (elem.getClassName().equals(BlockingNoOpInvokable.class.getName())) {
assertEquals("invoke", elem.getMethodName());
success = true;
break;
}
}
assertTrue("Unexpected stack trace: " + Arrays.toString(trace), success);
}
} catch (Throwable t) {
lastError = t;
LOG.warn("Failed to find invokable.", t);
}
try {
Thread.sleep(100);
} catch (InterruptedException e) {
LOG.error("Interrupted while sleeping before retry.", e);
break;
}
}
if (!success) {
if (lastError == null) {
fail("Failed to find invokable");
} else {
fail(lastError.getMessage());
}
}
}
};
//
// 3) Trigger sample for the blocking task with max depth
//
new Within(d) {
@Override
protected void run() {
try {
int numSamples = 5;
int maxDepth = 2;
taskManager.tell(new TriggerStackTraceSample(1337, tdd.getExecutionAttemptId(), numSamples, Time.milliseconds(100L), maxDepth), testActorGateway);
// Receive the expected message (heartbeat races possible)
Object[] msg = receiveN(1);
while (!(msg[0] instanceof StackTraceSampleResponse)) {
msg = receiveN(1);
}
StackTraceSampleResponse response = (StackTraceSampleResponse) msg[0];
// ---- Verify response ----
assertEquals(1337, response.getSampleId());
assertEquals(tdd.getExecutionAttemptId(), response.getExecutionAttemptID());
List<StackTraceElement[]> traces = response.getSamples();
assertEquals("Number of samples", numSamples, traces.size());
for (StackTraceElement[] trace : traces) {
assertEquals("Max depth", maxDepth, trace.length);
}
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
};
//
// 4) Trigger sample for the blocking task, but cancel it during sampling
//
new Within(d) {
@Override
protected void run() {
try {
int maxAttempts = 10;
int sleepTime = 100;
for (int i = 0; i < maxAttempts; i++, sleepTime *= 2) {
// Trigger many samples in order to cancel the task
// during a sample
taskManager.tell(new TriggerStackTraceSample(44, tdd.getExecutionAttemptId(), Integer.MAX_VALUE, Time.milliseconds(10L), 0), testActorGateway);
Thread.sleep(sleepTime);
Future<?> removeFuture = taskManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobId), remaining());
// Cancel the task
taskManager.tell(new CancelTask(tdd.getExecutionAttemptId()));
// Receive the expected message (heartbeat races possible)
while (true) {
Object[] msg = receiveN(1);
if (msg[0] instanceof StackTraceSampleResponse) {
StackTraceSampleResponse response = (StackTraceSampleResponse) msg[0];
assertEquals(tdd.getExecutionAttemptId(), response.getExecutionAttemptID());
assertEquals(44, response.getSampleId());
// Done
return;
} else if (msg[0] instanceof Failure) {
// Wait for removal before resubmitting
Await.ready(removeFuture, remaining());
Future<?> taskRunningFuture = taskManager.ask(new TestingTaskManagerMessages.NotifyWhenTaskIsRunning(tdd.getExecutionAttemptId()), timeout);
// Resubmit
taskManager.tell(new SubmitTask(tdd));
Await.ready(taskRunningFuture, remaining());
// Retry the sample message
break;
} else {
// Different message
continue;
}
}
}
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
};
} finally {
TestingUtils.stopActor(taskManagerActorGateway);
TestingUtils.stopActor(jobManagerActorGateway);
}
}
};
}
use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.
the class TaskManagerTest method testCancellingDependentAndStateUpdateFails.
@Test
public void testCancellingDependentAndStateUpdateFails() {
// this tests creates two tasks. the sender sends data, and fails to send the
// state update back to the job manager
// the second one blocks to be canceled
new JavaTestKit(system) {
{
ActorGateway jobManager = null;
ActorGateway taskManager = null;
final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), leaderSessionID);
try {
final JobID jid = new JobID();
JobVertexID vid1 = new JobVertexID();
JobVertexID vid2 = new JobVertexID();
final ExecutionAttemptID eid1 = new ExecutionAttemptID();
final ExecutionAttemptID eid2 = new ExecutionAttemptID();
ActorRef jm = system.actorOf(Props.create(new SimpleLookupFailingUpdateJobManagerCreator(leaderSessionID, eid2)));
jobManager = new AkkaActorGateway(jm, leaderSessionID);
taskManager = TestingUtils.createTaskManager(system, jobManager, new Configuration(), true, true);
final ActorGateway tm = taskManager;
IntermediateResultPartitionID partitionId = new IntermediateResultPartitionID();
List<ResultPartitionDeploymentDescriptor> irpdd = new ArrayList<ResultPartitionDeploymentDescriptor>();
irpdd.add(new ResultPartitionDeploymentDescriptor(new IntermediateDataSetID(), partitionId, ResultPartitionType.PIPELINED, 1, 1, true));
InputGateDeploymentDescriptor ircdd = new InputGateDeploymentDescriptor(new IntermediateDataSetID(), ResultPartitionType.PIPELINED, 0, new InputChannelDeploymentDescriptor[] { new InputChannelDeploymentDescriptor(new ResultPartitionID(partitionId, eid1), ResultPartitionLocation.createLocal()) });
final TaskDeploymentDescriptor tdd1 = createTaskDeploymentDescriptor(jid, "TestJob", vid1, eid1, new SerializedValue<>(new ExecutionConfig()), "Sender", 1, 0, 1, 0, new Configuration(), new Configuration(), Tasks.Sender.class.getName(), irpdd, Collections.<InputGateDeploymentDescriptor>emptyList(), new ArrayList<BlobKey>(), Collections.<URL>emptyList(), 0);
final TaskDeploymentDescriptor tdd2 = createTaskDeploymentDescriptor(jid, "TestJob", vid2, eid2, new SerializedValue<>(new ExecutionConfig()), "Receiver", 7, 2, 7, 0, new Configuration(), new Configuration(), Tasks.BlockingReceiver.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.singletonList(ircdd), new ArrayList<BlobKey>(), Collections.<URL>emptyList(), 0);
new Within(d) {
@Override
protected void run() {
try {
Future<Object> t1Running = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskIsRunning(eid1), timeout);
Future<Object> t2Running = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskIsRunning(eid2), timeout);
tm.tell(new SubmitTask(tdd2), testActorGateway);
tm.tell(new SubmitTask(tdd1), testActorGateway);
expectMsgEquals(Acknowledge.get());
expectMsgEquals(Acknowledge.get());
Await.ready(t1Running, d);
Await.ready(t2Running, d);
tm.tell(TestingTaskManagerMessages.getRequestRunningTasksMessage(), testActorGateway);
Map<ExecutionAttemptID, Task> tasks = expectMsgClass(TestingTaskManagerMessages.ResponseRunningTasks.class).asJava();
Task t1 = tasks.get(eid1);
Task t2 = tasks.get(eid2);
tm.tell(new CancelTask(eid2), testActorGateway);
expectMsgEquals(Acknowledge.get());
if (t2 != null) {
Future<Object> response = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskRemoved(eid2), timeout);
Await.ready(response, d);
}
if (t1 != null) {
if (t1.getExecutionState() == ExecutionState.RUNNING) {
tm.tell(new CancelTask(eid1), testActorGateway);
expectMsgEquals(Acknowledge.get());
}
Future<Object> response = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskRemoved(eid1), timeout);
Await.ready(response, d);
}
tm.tell(TestingTaskManagerMessages.getRequestRunningTasksMessage(), testActorGateway);
tasks = expectMsgClass(TestingTaskManagerMessages.ResponseRunningTasks.class).asJava();
assertEquals(0, tasks.size());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
};
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
// shut down the actors
TestingUtils.stopActor(taskManager);
TestingUtils.stopActor(jobManager);
}
}
};
}
use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.
the class TaskManagerTest method testSubmitAndExecuteTask.
@Test
public void testSubmitAndExecuteTask() throws IOException {
new JavaTestKit(system) {
{
ActorGateway taskManager = null;
final ActorGateway jobManager = TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
try {
taskManager = TestingUtils.createTaskManager(system, jobManager, new Configuration(), true, false);
final ActorGateway tm = taskManager;
// handle the registration
new Within(d) {
@Override
protected void run() {
expectMsgClass(RegistrationMessages.RegisterTaskManager.class);
final InstanceID iid = new InstanceID();
assertEquals(tm.actor(), getLastSender());
tm.tell(new RegistrationMessages.AcknowledgeRegistration(iid, 12345), jobManager);
}
};
final JobID jid = new JobID();
final JobVertexID vid = new JobVertexID();
final ExecutionAttemptID eid = new ExecutionAttemptID();
final SerializedValue<ExecutionConfig> executionConfig = new SerializedValue<>(new ExecutionConfig());
final TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(jid, "TestJob", vid, eid, executionConfig, "TestTask", 7, 2, 7, 0, new Configuration(), new Configuration(), TestInvokableCorrect.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), new ArrayList<BlobKey>(), Collections.<URL>emptyList(), 0);
new Within(d) {
@Override
protected void run() {
tm.tell(new SubmitTask(tdd), jobManager);
// TaskManager should acknowledge the submission
// heartbeats may be interleaved
long deadline = System.currentTimeMillis() + 10000;
do {
Object message = receiveOne(d);
if (message.equals(Acknowledge.get())) {
break;
}
} while (System.currentTimeMillis() < deadline);
// task should have switched to running
Object toRunning = new TaskMessages.UpdateTaskExecutionState(new TaskExecutionState(jid, eid, ExecutionState.RUNNING));
// task should have switched to finished
Object toFinished = new TaskMessages.UpdateTaskExecutionState(new TaskExecutionState(jid, eid, ExecutionState.FINISHED));
deadline = System.currentTimeMillis() + 10000;
do {
Object message = receiveOne(d);
if (message.equals(toRunning)) {
break;
} else if (!(message instanceof TaskManagerMessages.Heartbeat)) {
fail("Unexpected message: " + message);
}
} while (System.currentTimeMillis() < deadline);
deadline = System.currentTimeMillis() + 10000;
do {
Object message = receiveOne(d);
if (message.equals(toFinished)) {
break;
} else if (!(message instanceof TaskManagerMessages.Heartbeat)) {
fail("Unexpected message: " + message);
}
} while (System.currentTimeMillis() < deadline);
}
};
} finally {
// shut down the actors
TestingUtils.stopActor(taskManager);
TestingUtils.stopActor(jobManager);
}
}
};
}
Aggregations