use of org.apache.flink.api.common.JobID in project flink by apache.
the class AbstractExecutionGraphRequestHandler method handleJsonRequest.
@Override
public String handleJsonRequest(Map<String, String> pathParams, Map<String, String> queryParams, ActorGateway jobManager) throws Exception {
String jidString = pathParams.get("jobid");
if (jidString == null) {
throw new RuntimeException("JobId parameter missing");
}
JobID jid;
try {
jid = JobID.fromHexString(jidString);
} catch (Exception e) {
throw new RuntimeException("Invalid JobID string '" + jidString + "': " + e.getMessage());
}
AccessExecutionGraph eg = executionGraphHolder.getExecutionGraph(jid, jobManager);
if (eg == null) {
throw new NotFoundException("Could not find job with id " + jid);
}
return handleRequest(eg, pathParams);
}
use of org.apache.flink.api.common.JobID in project flink by apache.
the class TaskSlotTable method freeSlot.
/**
* Tries to free the slot. If the slot is empty it will set the state of the task slot to free
* and return its index. If the slot is not empty, then it will set the state of the task slot
* to releasing, fail all tasks and return -1.
*
* @param allocationId identifying the task slot to be freed
* @param cause to fail the tasks with if slot is not empty
* @throws SlotNotFoundException if there is not task slot for the given allocation id
* @return Index of the freed slot if the slot could be freed; otherwise -1
*/
public int freeSlot(AllocationID allocationId, Throwable cause) throws SlotNotFoundException {
checkInit();
if (LOG.isDebugEnabled()) {
LOG.debug("Free slot {}.", allocationId, cause);
} else {
LOG.info("Free slot {}.", allocationId);
}
TaskSlot taskSlot = getTaskSlot(allocationId);
if (taskSlot != null) {
LOG.info("Free slot {}.", allocationId, cause);
final JobID jobId = taskSlot.getJobId();
if (taskSlot.markFree()) {
// remove the allocation id to task slot mapping
allocationIDTaskSlotMap.remove(allocationId);
// unregister a potential timeout
timerService.unregisterTimeout(allocationId);
Set<AllocationID> slots = slotsPerJob.get(jobId);
if (slots == null) {
throw new IllegalStateException("There are no more slots allocated for the job " + jobId + ". This indicates a programming bug.");
}
slots.remove(allocationId);
if (slots.isEmpty()) {
slotsPerJob.remove(jobId);
}
return taskSlot.getIndex();
} else {
// we couldn't free the task slot because it still contains task, fail the tasks
// and set the slot state to releasing so that it gets eventually freed
taskSlot.markReleasing();
Iterator<Task> taskIterator = taskSlot.getTasks();
while (taskIterator.hasNext()) {
taskIterator.next().failExternally(cause);
}
return -1;
}
} else {
throw new SlotNotFoundException(allocationId);
}
}
use of org.apache.flink.api.common.JobID in project flink by apache.
the class ResourceManager method requestSlot.
/**
* Requests a slot from the resource manager.
*
* @param slotRequest Slot request
* @return Slot assignment
*/
@RpcMethod
public RMSlotRequestReply requestSlot(UUID jobMasterLeaderID, UUID resourceManagerLeaderID, SlotRequest slotRequest) {
log.info("Request slot with profile {} for job {} with allocation id {}.", slotRequest.getResourceProfile(), slotRequest.getJobId(), slotRequest.getAllocationId());
JobID jobId = slotRequest.getJobId();
JobManagerRegistration jobManagerRegistration = jobManagerRegistrations.get(jobId);
if (jobManagerRegistration != null && jobMasterLeaderID.equals(jobManagerRegistration.getLeaderID()) && resourceManagerLeaderID.equals(leaderSessionId)) {
return slotManager.requestSlot(slotRequest);
} else {
log.info("Ignoring slot request for unknown JobMaster with JobID {}", jobId);
return new RMSlotRequestRejected(slotRequest.getAllocationId());
}
}
use of org.apache.flink.api.common.JobID in project flink by apache.
the class JobManagerTest method testKvStateMessages.
/**
* Tests that the JobManager handles {@link org.apache.flink.runtime.query.KvStateMessage}
* instances as expected.
*/
@Test
public void testKvStateMessages() throws Exception {
Deadline deadline = new FiniteDuration(100, TimeUnit.SECONDS).fromNow();
Configuration config = new Configuration();
config.setString(ConfigConstants.AKKA_ASK_TIMEOUT, "100ms");
UUID leaderSessionId = null;
ActorGateway jobManager = new AkkaActorGateway(JobManager.startJobManagerActors(config, system, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), TestingJobManager.class, MemoryArchivist.class)._1(), leaderSessionId);
LeaderRetrievalService leaderRetrievalService = new StandaloneLeaderRetrievalService(AkkaUtils.getAkkaURL(system, jobManager.actor()));
Configuration tmConfig = new Configuration();
tmConfig.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, 4);
tmConfig.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 8);
ActorRef taskManager = TaskManager.startTaskManagerComponentsAndActor(tmConfig, ResourceID.generate(), system, "localhost", scala.Option.<String>empty(), scala.Option.apply(leaderRetrievalService), true, TestingTaskManager.class);
Future<Object> registrationFuture = jobManager.ask(new NotifyWhenAtLeastNumTaskManagerAreRegistered(1), deadline.timeLeft());
Await.ready(registrationFuture, deadline.timeLeft());
//
// Location lookup
//
LookupKvStateLocation lookupNonExistingJob = new LookupKvStateLocation(new JobID(), "any-name");
Future<KvStateLocation> lookupFuture = jobManager.ask(lookupNonExistingJob, deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<KvStateLocation>apply(KvStateLocation.class));
try {
Await.result(lookupFuture, deadline.timeLeft());
fail("Did not throw expected Exception");
} catch (IllegalStateException ignored) {
// Expected
}
JobGraph jobGraph = new JobGraph("croissant");
JobVertex jobVertex1 = new JobVertex("cappuccino");
jobVertex1.setParallelism(4);
jobVertex1.setMaxParallelism(16);
jobVertex1.setInvokableClass(BlockingNoOpInvokable.class);
JobVertex jobVertex2 = new JobVertex("americano");
jobVertex2.setParallelism(4);
jobVertex2.setMaxParallelism(16);
jobVertex2.setInvokableClass(BlockingNoOpInvokable.class);
jobGraph.addVertex(jobVertex1);
jobGraph.addVertex(jobVertex2);
Future<JobSubmitSuccess> submitFuture = jobManager.ask(new SubmitJob(jobGraph, ListeningBehaviour.DETACHED), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<JobSubmitSuccess>apply(JobSubmitSuccess.class));
Await.result(submitFuture, deadline.timeLeft());
Object lookupUnknownRegistrationName = new LookupKvStateLocation(jobGraph.getJobID(), "unknown");
lookupFuture = jobManager.ask(lookupUnknownRegistrationName, deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<KvStateLocation>apply(KvStateLocation.class));
try {
Await.result(lookupFuture, deadline.timeLeft());
fail("Did not throw expected Exception");
} catch (UnknownKvStateLocation ignored) {
// Expected
}
//
// Registration
//
NotifyKvStateRegistered registerNonExistingJob = new NotifyKvStateRegistered(new JobID(), new JobVertexID(), new KeyGroupRange(0, 0), "any-name", new KvStateID(), new KvStateServerAddress(InetAddress.getLocalHost(), 1233));
jobManager.tell(registerNonExistingJob);
LookupKvStateLocation lookupAfterRegistration = new LookupKvStateLocation(registerNonExistingJob.getJobId(), registerNonExistingJob.getRegistrationName());
lookupFuture = jobManager.ask(lookupAfterRegistration, deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<KvStateLocation>apply(KvStateLocation.class));
try {
Await.result(lookupFuture, deadline.timeLeft());
fail("Did not throw expected Exception");
} catch (IllegalStateException ignored) {
// Expected
}
NotifyKvStateRegistered registerForExistingJob = new NotifyKvStateRegistered(jobGraph.getJobID(), jobVertex1.getID(), new KeyGroupRange(0, 0), "register-me", new KvStateID(), new KvStateServerAddress(InetAddress.getLocalHost(), 1293));
jobManager.tell(registerForExistingJob);
lookupAfterRegistration = new LookupKvStateLocation(registerForExistingJob.getJobId(), registerForExistingJob.getRegistrationName());
lookupFuture = jobManager.ask(lookupAfterRegistration, deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<KvStateLocation>apply(KvStateLocation.class));
KvStateLocation location = Await.result(lookupFuture, deadline.timeLeft());
assertNotNull(location);
assertEquals(jobGraph.getJobID(), location.getJobId());
assertEquals(jobVertex1.getID(), location.getJobVertexId());
assertEquals(jobVertex1.getMaxParallelism(), location.getNumKeyGroups());
assertEquals(1, location.getNumRegisteredKeyGroups());
KeyGroupRange keyGroupRange = registerForExistingJob.getKeyGroupRange();
assertEquals(1, keyGroupRange.getNumberOfKeyGroups());
assertEquals(registerForExistingJob.getKvStateId(), location.getKvStateID(keyGroupRange.getStartKeyGroup()));
assertEquals(registerForExistingJob.getKvStateServerAddress(), location.getKvStateServerAddress(keyGroupRange.getStartKeyGroup()));
//
// Unregistration
//
NotifyKvStateUnregistered unregister = new NotifyKvStateUnregistered(registerForExistingJob.getJobId(), registerForExistingJob.getJobVertexId(), registerForExistingJob.getKeyGroupRange(), registerForExistingJob.getRegistrationName());
jobManager.tell(unregister);
lookupFuture = jobManager.ask(lookupAfterRegistration, deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<KvStateLocation>apply(KvStateLocation.class));
try {
Await.result(lookupFuture, deadline.timeLeft());
fail("Did not throw expected Exception");
} catch (UnknownKvStateLocation ignored) {
// Expected
}
//
// Duplicate registration fails task
//
NotifyKvStateRegistered register = new NotifyKvStateRegistered(jobGraph.getJobID(), jobVertex1.getID(), new KeyGroupRange(0, 0), "duplicate-me", new KvStateID(), new KvStateServerAddress(InetAddress.getLocalHost(), 1293));
NotifyKvStateRegistered duplicate = new NotifyKvStateRegistered(jobGraph.getJobID(), // <--- different operator, but...
jobVertex2.getID(), new KeyGroupRange(0, 0), // ...same name
"duplicate-me", new KvStateID(), new KvStateServerAddress(InetAddress.getLocalHost(), 1293));
Future<TestingJobManagerMessages.JobStatusIs> failedFuture = jobManager.ask(new NotifyWhenJobStatus(jobGraph.getJobID(), JobStatus.FAILED), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<JobStatusIs>apply(JobStatusIs.class));
jobManager.tell(register);
jobManager.tell(duplicate);
// Wait for failure
JobStatusIs jobStatus = Await.result(failedFuture, deadline.timeLeft());
assertEquals(JobStatus.FAILED, jobStatus.state());
}
use of org.apache.flink.api.common.JobID in project flink by apache.
the class JobManagerTest method testRequestPartitionStateUnregisteredExecution.
/**
* Tests the JobManager response when the execution is not registered with
* the ExecutionGraph.
*/
@Test
public void testRequestPartitionStateUnregisteredExecution() throws Exception {
new JavaTestKit(system) {
{
new Within(duration("15 seconds")) {
@Override
protected void run() {
// Setup
TestingCluster cluster = null;
try {
cluster = startTestingCluster(4, 1, DEFAULT_AKKA_ASK_TIMEOUT());
final IntermediateDataSetID rid = new IntermediateDataSetID();
// Create a task
final JobVertex sender = new JobVertex("Sender");
sender.setParallelism(1);
// just finish
sender.setInvokableClass(NoOpInvokable.class);
sender.createAndAddResultDataSet(rid, PIPELINED);
final JobVertex sender2 = new JobVertex("Blocking Sender");
sender2.setParallelism(1);
// just block
sender2.setInvokableClass(BlockingNoOpInvokable.class);
sender2.createAndAddResultDataSet(new IntermediateDataSetID(), PIPELINED);
final JobGraph jobGraph = new JobGraph("Fast finishing producer test job", sender, sender2);
final JobID jid = jobGraph.getJobID();
final ActorGateway jobManagerGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
// we can set the leader session ID to None because we don't use this gateway to send messages
final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), null);
// Submit the job and wait for all vertices to be running
jobManagerGateway.tell(new SubmitJob(jobGraph, ListeningBehaviour.EXECUTION_RESULT), testActorGateway);
expectMsgClass(JobSubmitSuccess.class);
jobManagerGateway.tell(new WaitForAllVerticesToBeRunningOrFinished(jid), testActorGateway);
expectMsgClass(AllVerticesRunning.class);
Future<Object> egFuture = jobManagerGateway.ask(new RequestExecutionGraph(jobGraph.getJobID()), remaining());
ExecutionGraphFound egFound = (ExecutionGraphFound) Await.result(egFuture, remaining());
ExecutionGraph eg = (ExecutionGraph) egFound.executionGraph();
ExecutionVertex vertex = eg.getJobVertex(sender.getID()).getTaskVertices()[0];
while (vertex.getExecutionState() != ExecutionState.FINISHED) {
Thread.sleep(1);
}
IntermediateResultPartition partition = vertex.getProducedPartitions().values().iterator().next();
ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), vertex.getCurrentExecutionAttempt().getAttemptId());
// Producer finished, request state
Object request = new RequestPartitionProducerState(jid, rid, partitionId);
Future<ExecutionState> producerStateFuture = jobManagerGateway.ask(request, getRemainingTime()).mapTo(ClassTag$.MODULE$.<ExecutionState>apply(ExecutionState.class));
assertEquals(ExecutionState.FINISHED, Await.result(producerStateFuture, getRemainingTime()));
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
};
}
};
}
Aggregations