Search in sources :

Example 1 with JobID

use of org.apache.flink.api.common.JobID in project flink by apache.

the class AbstractExecutionGraphRequestHandler method handleJsonRequest.

@Override
public String handleJsonRequest(Map<String, String> pathParams, Map<String, String> queryParams, ActorGateway jobManager) throws Exception {
    String jidString = pathParams.get("jobid");
    if (jidString == null) {
        throw new RuntimeException("JobId parameter missing");
    }
    JobID jid;
    try {
        jid = JobID.fromHexString(jidString);
    } catch (Exception e) {
        throw new RuntimeException("Invalid JobID string '" + jidString + "': " + e.getMessage());
    }
    AccessExecutionGraph eg = executionGraphHolder.getExecutionGraph(jid, jobManager);
    if (eg == null) {
        throw new NotFoundException("Could not find job with id " + jid);
    }
    return handleRequest(eg, pathParams);
}
Also used : NotFoundException(org.apache.flink.runtime.webmonitor.NotFoundException) AccessExecutionGraph(org.apache.flink.runtime.executiongraph.AccessExecutionGraph) JobID(org.apache.flink.api.common.JobID) NotFoundException(org.apache.flink.runtime.webmonitor.NotFoundException)

Example 2 with JobID

use of org.apache.flink.api.common.JobID in project flink by apache.

the class TaskSlotTable method freeSlot.

/**
	 * Tries to free the slot. If the slot is empty it will set the state of the task slot to free
	 * and return its index. If the slot is not empty, then it will set the state of the task slot
	 * to releasing, fail all tasks and return -1.
	 *
	 * @param allocationId identifying the task slot to be freed
	 * @param cause to fail the tasks with if slot is not empty
	 * @throws SlotNotFoundException if there is not task slot for the given allocation id
	 * @return Index of the freed slot if the slot could be freed; otherwise -1
	 */
public int freeSlot(AllocationID allocationId, Throwable cause) throws SlotNotFoundException {
    checkInit();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Free slot {}.", allocationId, cause);
    } else {
        LOG.info("Free slot {}.", allocationId);
    }
    TaskSlot taskSlot = getTaskSlot(allocationId);
    if (taskSlot != null) {
        LOG.info("Free slot {}.", allocationId, cause);
        final JobID jobId = taskSlot.getJobId();
        if (taskSlot.markFree()) {
            // remove the allocation id to task slot mapping
            allocationIDTaskSlotMap.remove(allocationId);
            // unregister a potential timeout
            timerService.unregisterTimeout(allocationId);
            Set<AllocationID> slots = slotsPerJob.get(jobId);
            if (slots == null) {
                throw new IllegalStateException("There are no more slots allocated for the job " + jobId + ". This indicates a programming bug.");
            }
            slots.remove(allocationId);
            if (slots.isEmpty()) {
                slotsPerJob.remove(jobId);
            }
            return taskSlot.getIndex();
        } else {
            // we couldn't free the task slot because it still contains task, fail the tasks
            // and set the slot state to releasing so that it gets eventually freed
            taskSlot.markReleasing();
            Iterator<Task> taskIterator = taskSlot.getTasks();
            while (taskIterator.hasNext()) {
                taskIterator.next().failExternally(cause);
            }
            return -1;
        }
    } else {
        throw new SlotNotFoundException(allocationId);
    }
}
Also used : Task(org.apache.flink.runtime.taskmanager.Task) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) JobID(org.apache.flink.api.common.JobID)

Example 3 with JobID

use of org.apache.flink.api.common.JobID in project flink by apache.

the class ResourceManager method requestSlot.

/**
	 * Requests a slot from the resource manager.
	 *
	 * @param slotRequest Slot request
	 * @return Slot assignment
	 */
@RpcMethod
public RMSlotRequestReply requestSlot(UUID jobMasterLeaderID, UUID resourceManagerLeaderID, SlotRequest slotRequest) {
    log.info("Request slot with profile {} for job {} with allocation id {}.", slotRequest.getResourceProfile(), slotRequest.getJobId(), slotRequest.getAllocationId());
    JobID jobId = slotRequest.getJobId();
    JobManagerRegistration jobManagerRegistration = jobManagerRegistrations.get(jobId);
    if (jobManagerRegistration != null && jobMasterLeaderID.equals(jobManagerRegistration.getLeaderID()) && resourceManagerLeaderID.equals(leaderSessionId)) {
        return slotManager.requestSlot(slotRequest);
    } else {
        log.info("Ignoring slot request for unknown JobMaster with JobID {}", jobId);
        return new RMSlotRequestRejected(slotRequest.getAllocationId());
    }
}
Also used : RMSlotRequestRejected(org.apache.flink.runtime.resourcemanager.messages.jobmanager.RMSlotRequestRejected) JobID(org.apache.flink.api.common.JobID) JobManagerRegistration(org.apache.flink.runtime.resourcemanager.registration.JobManagerRegistration) RpcMethod(org.apache.flink.runtime.rpc.RpcMethod)

Example 4 with JobID

use of org.apache.flink.api.common.JobID in project flink by apache.

the class JobManagerTest method testKvStateMessages.

/**
	 * Tests that the JobManager handles {@link org.apache.flink.runtime.query.KvStateMessage}
	 * instances as expected.
	 */
@Test
public void testKvStateMessages() throws Exception {
    Deadline deadline = new FiniteDuration(100, TimeUnit.SECONDS).fromNow();
    Configuration config = new Configuration();
    config.setString(ConfigConstants.AKKA_ASK_TIMEOUT, "100ms");
    UUID leaderSessionId = null;
    ActorGateway jobManager = new AkkaActorGateway(JobManager.startJobManagerActors(config, system, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), TestingJobManager.class, MemoryArchivist.class)._1(), leaderSessionId);
    LeaderRetrievalService leaderRetrievalService = new StandaloneLeaderRetrievalService(AkkaUtils.getAkkaURL(system, jobManager.actor()));
    Configuration tmConfig = new Configuration();
    tmConfig.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, 4);
    tmConfig.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 8);
    ActorRef taskManager = TaskManager.startTaskManagerComponentsAndActor(tmConfig, ResourceID.generate(), system, "localhost", scala.Option.<String>empty(), scala.Option.apply(leaderRetrievalService), true, TestingTaskManager.class);
    Future<Object> registrationFuture = jobManager.ask(new NotifyWhenAtLeastNumTaskManagerAreRegistered(1), deadline.timeLeft());
    Await.ready(registrationFuture, deadline.timeLeft());
    //
    // Location lookup
    //
    LookupKvStateLocation lookupNonExistingJob = new LookupKvStateLocation(new JobID(), "any-name");
    Future<KvStateLocation> lookupFuture = jobManager.ask(lookupNonExistingJob, deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<KvStateLocation>apply(KvStateLocation.class));
    try {
        Await.result(lookupFuture, deadline.timeLeft());
        fail("Did not throw expected Exception");
    } catch (IllegalStateException ignored) {
    // Expected
    }
    JobGraph jobGraph = new JobGraph("croissant");
    JobVertex jobVertex1 = new JobVertex("cappuccino");
    jobVertex1.setParallelism(4);
    jobVertex1.setMaxParallelism(16);
    jobVertex1.setInvokableClass(BlockingNoOpInvokable.class);
    JobVertex jobVertex2 = new JobVertex("americano");
    jobVertex2.setParallelism(4);
    jobVertex2.setMaxParallelism(16);
    jobVertex2.setInvokableClass(BlockingNoOpInvokable.class);
    jobGraph.addVertex(jobVertex1);
    jobGraph.addVertex(jobVertex2);
    Future<JobSubmitSuccess> submitFuture = jobManager.ask(new SubmitJob(jobGraph, ListeningBehaviour.DETACHED), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<JobSubmitSuccess>apply(JobSubmitSuccess.class));
    Await.result(submitFuture, deadline.timeLeft());
    Object lookupUnknownRegistrationName = new LookupKvStateLocation(jobGraph.getJobID(), "unknown");
    lookupFuture = jobManager.ask(lookupUnknownRegistrationName, deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<KvStateLocation>apply(KvStateLocation.class));
    try {
        Await.result(lookupFuture, deadline.timeLeft());
        fail("Did not throw expected Exception");
    } catch (UnknownKvStateLocation ignored) {
    // Expected
    }
    //
    // Registration
    //
    NotifyKvStateRegistered registerNonExistingJob = new NotifyKvStateRegistered(new JobID(), new JobVertexID(), new KeyGroupRange(0, 0), "any-name", new KvStateID(), new KvStateServerAddress(InetAddress.getLocalHost(), 1233));
    jobManager.tell(registerNonExistingJob);
    LookupKvStateLocation lookupAfterRegistration = new LookupKvStateLocation(registerNonExistingJob.getJobId(), registerNonExistingJob.getRegistrationName());
    lookupFuture = jobManager.ask(lookupAfterRegistration, deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<KvStateLocation>apply(KvStateLocation.class));
    try {
        Await.result(lookupFuture, deadline.timeLeft());
        fail("Did not throw expected Exception");
    } catch (IllegalStateException ignored) {
    // Expected
    }
    NotifyKvStateRegistered registerForExistingJob = new NotifyKvStateRegistered(jobGraph.getJobID(), jobVertex1.getID(), new KeyGroupRange(0, 0), "register-me", new KvStateID(), new KvStateServerAddress(InetAddress.getLocalHost(), 1293));
    jobManager.tell(registerForExistingJob);
    lookupAfterRegistration = new LookupKvStateLocation(registerForExistingJob.getJobId(), registerForExistingJob.getRegistrationName());
    lookupFuture = jobManager.ask(lookupAfterRegistration, deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<KvStateLocation>apply(KvStateLocation.class));
    KvStateLocation location = Await.result(lookupFuture, deadline.timeLeft());
    assertNotNull(location);
    assertEquals(jobGraph.getJobID(), location.getJobId());
    assertEquals(jobVertex1.getID(), location.getJobVertexId());
    assertEquals(jobVertex1.getMaxParallelism(), location.getNumKeyGroups());
    assertEquals(1, location.getNumRegisteredKeyGroups());
    KeyGroupRange keyGroupRange = registerForExistingJob.getKeyGroupRange();
    assertEquals(1, keyGroupRange.getNumberOfKeyGroups());
    assertEquals(registerForExistingJob.getKvStateId(), location.getKvStateID(keyGroupRange.getStartKeyGroup()));
    assertEquals(registerForExistingJob.getKvStateServerAddress(), location.getKvStateServerAddress(keyGroupRange.getStartKeyGroup()));
    //
    // Unregistration
    //
    NotifyKvStateUnregistered unregister = new NotifyKvStateUnregistered(registerForExistingJob.getJobId(), registerForExistingJob.getJobVertexId(), registerForExistingJob.getKeyGroupRange(), registerForExistingJob.getRegistrationName());
    jobManager.tell(unregister);
    lookupFuture = jobManager.ask(lookupAfterRegistration, deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<KvStateLocation>apply(KvStateLocation.class));
    try {
        Await.result(lookupFuture, deadline.timeLeft());
        fail("Did not throw expected Exception");
    } catch (UnknownKvStateLocation ignored) {
    // Expected
    }
    //
    // Duplicate registration fails task
    //
    NotifyKvStateRegistered register = new NotifyKvStateRegistered(jobGraph.getJobID(), jobVertex1.getID(), new KeyGroupRange(0, 0), "duplicate-me", new KvStateID(), new KvStateServerAddress(InetAddress.getLocalHost(), 1293));
    NotifyKvStateRegistered duplicate = new NotifyKvStateRegistered(jobGraph.getJobID(), // <--- different operator, but...
    jobVertex2.getID(), new KeyGroupRange(0, 0), // ...same name
    "duplicate-me", new KvStateID(), new KvStateServerAddress(InetAddress.getLocalHost(), 1293));
    Future<TestingJobManagerMessages.JobStatusIs> failedFuture = jobManager.ask(new NotifyWhenJobStatus(jobGraph.getJobID(), JobStatus.FAILED), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<JobStatusIs>apply(JobStatusIs.class));
    jobManager.tell(register);
    jobManager.tell(duplicate);
    // Wait for failure
    JobStatusIs jobStatus = Await.result(failedFuture, deadline.timeLeft());
    assertEquals(JobStatus.FAILED, jobStatus.state());
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) Configuration(org.apache.flink.configuration.Configuration) UnknownKvStateLocation(org.apache.flink.runtime.query.UnknownKvStateLocation) ActorRef(akka.actor.ActorRef) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) KvStateServerAddress(org.apache.flink.runtime.query.KvStateServerAddress) LookupKvStateLocation(org.apache.flink.runtime.query.KvStateMessage.LookupKvStateLocation) KvStateLocation(org.apache.flink.runtime.query.KvStateLocation) UnknownKvStateLocation(org.apache.flink.runtime.query.UnknownKvStateLocation) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) JobSubmitSuccess(org.apache.flink.runtime.messages.JobManagerMessages.JobSubmitSuccess) KvStateID(org.apache.flink.runtime.query.KvStateID) UUID(java.util.UUID) SubmitJob(org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob) NotifyKvStateRegistered(org.apache.flink.runtime.query.KvStateMessage.NotifyKvStateRegistered) NotifyKvStateUnregistered(org.apache.flink.runtime.query.KvStateMessage.NotifyKvStateUnregistered) JobStatusIs(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.JobStatusIs) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) StandaloneLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.StandaloneLeaderRetrievalService) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) StandaloneLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.StandaloneLeaderRetrievalService) LookupKvStateLocation(org.apache.flink.runtime.query.KvStateMessage.LookupKvStateLocation) NotifyWhenAtLeastNumTaskManagerAreRegistered(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.NotifyWhenAtLeastNumTaskManagerAreRegistered) JobID(org.apache.flink.api.common.JobID) NotifyWhenJobStatus(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.NotifyWhenJobStatus) Test(org.junit.Test)

Example 5 with JobID

use of org.apache.flink.api.common.JobID in project flink by apache.

the class JobManagerTest method testRequestPartitionStateUnregisteredExecution.

/**
	 * Tests the JobManager response when the execution is not registered with
	 * the ExecutionGraph.
	 */
@Test
public void testRequestPartitionStateUnregisteredExecution() throws Exception {
    new JavaTestKit(system) {

        {
            new Within(duration("15 seconds")) {

                @Override
                protected void run() {
                    // Setup
                    TestingCluster cluster = null;
                    try {
                        cluster = startTestingCluster(4, 1, DEFAULT_AKKA_ASK_TIMEOUT());
                        final IntermediateDataSetID rid = new IntermediateDataSetID();
                        // Create a task
                        final JobVertex sender = new JobVertex("Sender");
                        sender.setParallelism(1);
                        // just finish
                        sender.setInvokableClass(NoOpInvokable.class);
                        sender.createAndAddResultDataSet(rid, PIPELINED);
                        final JobVertex sender2 = new JobVertex("Blocking Sender");
                        sender2.setParallelism(1);
                        // just block
                        sender2.setInvokableClass(BlockingNoOpInvokable.class);
                        sender2.createAndAddResultDataSet(new IntermediateDataSetID(), PIPELINED);
                        final JobGraph jobGraph = new JobGraph("Fast finishing producer test job", sender, sender2);
                        final JobID jid = jobGraph.getJobID();
                        final ActorGateway jobManagerGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
                        // we can set the leader session ID to None because we don't use this gateway to send messages
                        final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), null);
                        // Submit the job and wait for all vertices to be running
                        jobManagerGateway.tell(new SubmitJob(jobGraph, ListeningBehaviour.EXECUTION_RESULT), testActorGateway);
                        expectMsgClass(JobSubmitSuccess.class);
                        jobManagerGateway.tell(new WaitForAllVerticesToBeRunningOrFinished(jid), testActorGateway);
                        expectMsgClass(AllVerticesRunning.class);
                        Future<Object> egFuture = jobManagerGateway.ask(new RequestExecutionGraph(jobGraph.getJobID()), remaining());
                        ExecutionGraphFound egFound = (ExecutionGraphFound) Await.result(egFuture, remaining());
                        ExecutionGraph eg = (ExecutionGraph) egFound.executionGraph();
                        ExecutionVertex vertex = eg.getJobVertex(sender.getID()).getTaskVertices()[0];
                        while (vertex.getExecutionState() != ExecutionState.FINISHED) {
                            Thread.sleep(1);
                        }
                        IntermediateResultPartition partition = vertex.getProducedPartitions().values().iterator().next();
                        ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), vertex.getCurrentExecutionAttempt().getAttemptId());
                        // Producer finished, request state
                        Object request = new RequestPartitionProducerState(jid, rid, partitionId);
                        Future<ExecutionState> producerStateFuture = jobManagerGateway.ask(request, getRemainingTime()).mapTo(ClassTag$.MODULE$.<ExecutionState>apply(ExecutionState.class));
                        assertEquals(ExecutionState.FINISHED, Await.result(producerStateFuture, getRemainingTime()));
                    } catch (Exception e) {
                        e.printStackTrace();
                        fail(e.getMessage());
                    } finally {
                        if (cluster != null) {
                            cluster.shutdown();
                        }
                    }
                }
            };
        }
    };
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) RequestPartitionProducerState(org.apache.flink.runtime.messages.JobManagerMessages.RequestPartitionProducerState) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) WaitForAllVerticesToBeRunningOrFinished(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunningOrFinished) TestingUtils.startTestingCluster(org.apache.flink.runtime.testingUtils.TestingUtils.startTestingCluster) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) IntermediateResultPartition(org.apache.flink.runtime.executiongraph.IntermediateResultPartition) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) ExecutionGraphFound(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.ExecutionGraphFound) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SubmitJob(org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob) JavaTestKit(akka.testkit.JavaTestKit) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

JobID (org.apache.flink.api.common.JobID)335 Test (org.junit.Test)274 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)88 IOException (java.io.IOException)74 Configuration (org.apache.flink.configuration.Configuration)72 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)61 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)48 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)47 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)44 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)42 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)38 ArrayList (java.util.ArrayList)37 MetricRegistry (org.apache.flink.runtime.metrics.MetricRegistry)32 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)31 HashMap (java.util.HashMap)29 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)29 FiniteDuration (scala.concurrent.duration.FiniteDuration)28 IntermediateDataSetID (org.apache.flink.runtime.jobgraph.IntermediateDataSetID)24 File (java.io.File)23 UUID (java.util.UUID)23