Search in sources :

Example 1 with ExecutionState

use of org.apache.flink.runtime.execution.ExecutionState in project flink by apache.

the class SubtasksTimesHandler method createSubtaskTimesJson.

public static String createSubtaskTimesJson(AccessExecutionJobVertex jobVertex) throws IOException {
    final long now = System.currentTimeMillis();
    StringWriter writer = new StringWriter();
    JsonGenerator gen = JsonFactory.jacksonFactory.createGenerator(writer);
    gen.writeStartObject();
    gen.writeStringField("id", jobVertex.getJobVertexId().toString());
    gen.writeStringField("name", jobVertex.getName());
    gen.writeNumberField("now", now);
    gen.writeArrayFieldStart("subtasks");
    int num = 0;
    for (AccessExecutionVertex vertex : jobVertex.getTaskVertices()) {
        long[] timestamps = vertex.getCurrentExecutionAttempt().getStateTimestamps();
        ExecutionState status = vertex.getExecutionState();
        long scheduledTime = timestamps[ExecutionState.SCHEDULED.ordinal()];
        long start = scheduledTime > 0 ? scheduledTime : -1;
        long end = status.isTerminal() ? timestamps[status.ordinal()] : now;
        long duration = start >= 0 ? end - start : -1L;
        gen.writeStartObject();
        gen.writeNumberField("subtask", num++);
        TaskManagerLocation location = vertex.getCurrentAssignedResourceLocation();
        String locationString = location == null ? "(unassigned)" : location.getHostname();
        gen.writeStringField("host", locationString);
        gen.writeNumberField("duration", duration);
        gen.writeObjectFieldStart("timestamps");
        for (ExecutionState state : ExecutionState.values()) {
            gen.writeNumberField(state.name(), timestamps[state.ordinal()]);
        }
        gen.writeEndObject();
        gen.writeEndObject();
    }
    gen.writeEndArray();
    gen.writeEndObject();
    gen.close();
    return writer.toString();
}
Also used : ExecutionState(org.apache.flink.runtime.execution.ExecutionState) StringWriter(java.io.StringWriter) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) AccessExecutionVertex(org.apache.flink.runtime.executiongraph.AccessExecutionVertex)

Example 2 with ExecutionState

use of org.apache.flink.runtime.execution.ExecutionState in project flink by apache.

the class SubtaskExecutionAttemptDetailsHandler method createAttemptDetailsJson.

public static String createAttemptDetailsJson(AccessExecution execAttempt, String jobID, String vertexID, @Nullable MetricFetcher fetcher) throws IOException {
    StringWriter writer = new StringWriter();
    JsonGenerator gen = JsonFactory.jacksonFactory.createGenerator(writer);
    final ExecutionState status = execAttempt.getState();
    final long now = System.currentTimeMillis();
    TaskManagerLocation location = execAttempt.getAssignedResourceLocation();
    String locationString = location == null ? "(unassigned)" : location.getHostname();
    long startTime = execAttempt.getStateTimestamp(ExecutionState.DEPLOYING);
    if (startTime == 0) {
        startTime = -1;
    }
    long endTime = status.isTerminal() ? execAttempt.getStateTimestamp(status) : -1;
    long duration = startTime > 0 ? ((endTime > 0 ? endTime : now) - startTime) : -1;
    gen.writeStartObject();
    gen.writeNumberField("subtask", execAttempt.getParallelSubtaskIndex());
    gen.writeStringField("status", status.name());
    gen.writeNumberField("attempt", execAttempt.getAttemptNumber());
    gen.writeStringField("host", locationString);
    gen.writeNumberField("start-time", startTime);
    gen.writeNumberField("end-time", endTime);
    gen.writeNumberField("duration", duration);
    MutableIOMetrics counts = new MutableIOMetrics();
    counts.addIOMetrics(execAttempt, fetcher, jobID, vertexID);
    counts.writeIOMetricsAsJson(gen);
    gen.writeEndObject();
    gen.close();
    return writer.toString();
}
Also used : ExecutionState(org.apache.flink.runtime.execution.ExecutionState) MutableIOMetrics(org.apache.flink.runtime.webmonitor.utils.MutableIOMetrics) StringWriter(java.io.StringWriter) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) JsonGenerator(com.fasterxml.jackson.core.JsonGenerator)

Example 3 with ExecutionState

use of org.apache.flink.runtime.execution.ExecutionState in project flink by apache.

the class JobManagerTest method testRequestPartitionStateUnregisteredExecution.

/**
	 * Tests the JobManager response when the execution is not registered with
	 * the ExecutionGraph.
	 */
@Test
public void testRequestPartitionStateUnregisteredExecution() throws Exception {
    new JavaTestKit(system) {

        {
            new Within(duration("15 seconds")) {

                @Override
                protected void run() {
                    // Setup
                    TestingCluster cluster = null;
                    try {
                        cluster = startTestingCluster(4, 1, DEFAULT_AKKA_ASK_TIMEOUT());
                        final IntermediateDataSetID rid = new IntermediateDataSetID();
                        // Create a task
                        final JobVertex sender = new JobVertex("Sender");
                        sender.setParallelism(1);
                        // just finish
                        sender.setInvokableClass(NoOpInvokable.class);
                        sender.createAndAddResultDataSet(rid, PIPELINED);
                        final JobVertex sender2 = new JobVertex("Blocking Sender");
                        sender2.setParallelism(1);
                        // just block
                        sender2.setInvokableClass(BlockingNoOpInvokable.class);
                        sender2.createAndAddResultDataSet(new IntermediateDataSetID(), PIPELINED);
                        final JobGraph jobGraph = new JobGraph("Fast finishing producer test job", sender, sender2);
                        final JobID jid = jobGraph.getJobID();
                        final ActorGateway jobManagerGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
                        // we can set the leader session ID to None because we don't use this gateway to send messages
                        final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), null);
                        // Submit the job and wait for all vertices to be running
                        jobManagerGateway.tell(new SubmitJob(jobGraph, ListeningBehaviour.EXECUTION_RESULT), testActorGateway);
                        expectMsgClass(JobSubmitSuccess.class);
                        jobManagerGateway.tell(new WaitForAllVerticesToBeRunningOrFinished(jid), testActorGateway);
                        expectMsgClass(AllVerticesRunning.class);
                        Future<Object> egFuture = jobManagerGateway.ask(new RequestExecutionGraph(jobGraph.getJobID()), remaining());
                        ExecutionGraphFound egFound = (ExecutionGraphFound) Await.result(egFuture, remaining());
                        ExecutionGraph eg = (ExecutionGraph) egFound.executionGraph();
                        ExecutionVertex vertex = eg.getJobVertex(sender.getID()).getTaskVertices()[0];
                        while (vertex.getExecutionState() != ExecutionState.FINISHED) {
                            Thread.sleep(1);
                        }
                        IntermediateResultPartition partition = vertex.getProducedPartitions().values().iterator().next();
                        ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), vertex.getCurrentExecutionAttempt().getAttemptId());
                        // Producer finished, request state
                        Object request = new RequestPartitionProducerState(jid, rid, partitionId);
                        Future<ExecutionState> producerStateFuture = jobManagerGateway.ask(request, getRemainingTime()).mapTo(ClassTag$.MODULE$.<ExecutionState>apply(ExecutionState.class));
                        assertEquals(ExecutionState.FINISHED, Await.result(producerStateFuture, getRemainingTime()));
                    } catch (Exception e) {
                        e.printStackTrace();
                        fail(e.getMessage());
                    } finally {
                        if (cluster != null) {
                            cluster.shutdown();
                        }
                    }
                }
            };
        }
    };
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) RequestPartitionProducerState(org.apache.flink.runtime.messages.JobManagerMessages.RequestPartitionProducerState) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) WaitForAllVerticesToBeRunningOrFinished(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunningOrFinished) TestingUtils.startTestingCluster(org.apache.flink.runtime.testingUtils.TestingUtils.startTestingCluster) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) IntermediateResultPartition(org.apache.flink.runtime.executiongraph.IntermediateResultPartition) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) ExecutionGraphFound(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.ExecutionGraphFound) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SubmitJob(org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob) JavaTestKit(akka.testkit.JavaTestKit) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 4 with ExecutionState

use of org.apache.flink.runtime.execution.ExecutionState in project flink by apache.

the class JobManagerTest method testRequestPartitionState.

/**
	 * Tests responses to partition state requests.
	 */
@Test
public void testRequestPartitionState() throws Exception {
    new JavaTestKit(system) {

        {
            new Within(duration("15 seconds")) {

                @Override
                protected void run() {
                    // Setup
                    TestingCluster cluster = null;
                    try {
                        cluster = startTestingCluster(2, 1, DEFAULT_AKKA_ASK_TIMEOUT());
                        final IntermediateDataSetID rid = new IntermediateDataSetID();
                        // Create a task
                        final JobVertex sender = new JobVertex("Sender");
                        sender.setParallelism(1);
                        // just block
                        sender.setInvokableClass(BlockingNoOpInvokable.class);
                        sender.createAndAddResultDataSet(rid, PIPELINED);
                        final JobGraph jobGraph = new JobGraph("Blocking test job", sender);
                        final JobID jid = jobGraph.getJobID();
                        final ActorGateway jobManagerGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
                        // we can set the leader session ID to None because we don't use this gateway to send messages
                        final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), null);
                        // Submit the job and wait for all vertices to be running
                        jobManagerGateway.tell(new SubmitJob(jobGraph, ListeningBehaviour.EXECUTION_RESULT), testActorGateway);
                        expectMsgClass(JobSubmitSuccess.class);
                        jobManagerGateway.tell(new WaitForAllVerticesToBeRunningOrFinished(jid), testActorGateway);
                        expectMsgClass(AllVerticesRunning.class);
                        // This is the mock execution ID of the task requesting the state of the partition
                        final ExecutionAttemptID receiver = new ExecutionAttemptID();
                        // Request the execution graph to get the runtime info
                        jobManagerGateway.tell(new RequestExecutionGraph(jid), testActorGateway);
                        final ExecutionGraph eg = (ExecutionGraph) expectMsgClass(ExecutionGraphFound.class).executionGraph();
                        final ExecutionVertex vertex = eg.getJobVertex(sender.getID()).getTaskVertices()[0];
                        final IntermediateResultPartition partition = vertex.getProducedPartitions().values().iterator().next();
                        final ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), vertex.getCurrentExecutionAttempt().getAttemptId());
                        // - The test ----------------------------------------------------------------------
                        // 1. All execution states
                        RequestPartitionProducerState request = new RequestPartitionProducerState(jid, rid, partitionId);
                        for (ExecutionState state : ExecutionState.values()) {
                            ExecutionGraphTestUtils.setVertexState(vertex, state);
                            Future<ExecutionState> futurePartitionState = jobManagerGateway.ask(request, getRemainingTime()).mapTo(ClassTag$.MODULE$.<ExecutionState>apply(ExecutionState.class));
                            ExecutionState resp = Await.result(futurePartitionState, getRemainingTime());
                            assertEquals(state, resp);
                        }
                        // 2. Non-existing execution
                        request = new RequestPartitionProducerState(jid, rid, new ResultPartitionID());
                        Future<?> futurePartitionState = jobManagerGateway.ask(request, getRemainingTime());
                        try {
                            Await.result(futurePartitionState, getRemainingTime());
                            fail("Did not fail with expected RuntimeException");
                        } catch (RuntimeException e) {
                            assertEquals(IllegalArgumentException.class, e.getCause().getClass());
                        }
                        // 3. Non-existing job
                        request = new RequestPartitionProducerState(new JobID(), rid, new ResultPartitionID());
                        futurePartitionState = jobManagerGateway.ask(request, getRemainingTime());
                        try {
                            Await.result(futurePartitionState, getRemainingTime());
                            fail("Did not fail with expected IllegalArgumentException");
                        } catch (IllegalArgumentException ignored) {
                        }
                    } catch (Exception e) {
                        e.printStackTrace();
                        fail(e.getMessage());
                    } finally {
                        if (cluster != null) {
                            cluster.shutdown();
                        }
                    }
                }
            };
        }
    };
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) RequestPartitionProducerState(org.apache.flink.runtime.messages.JobManagerMessages.RequestPartitionProducerState) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) WaitForAllVerticesToBeRunningOrFinished(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunningOrFinished) TestingUtils.startTestingCluster(org.apache.flink.runtime.testingUtils.TestingUtils.startTestingCluster) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) IntermediateResultPartition(org.apache.flink.runtime.executiongraph.IntermediateResultPartition) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SubmitJob(org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob) JavaTestKit(akka.testkit.JavaTestKit) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 5 with ExecutionState

use of org.apache.flink.runtime.execution.ExecutionState in project flink by apache.

the class Execution method deployToSlot.

public void deployToSlot(final SimpleSlot slot) throws JobException {
    checkNotNull(slot);
    // The more general check is the timeout of the deployment call
    if (!slot.isAlive()) {
        throw new JobException("Target slot (TaskManager) for deployment is no longer alive.");
    }
    // make sure exactly one deployment call happens from the correct state
    // note: the transition from CREATED to DEPLOYING is for testing purposes only
    ExecutionState previous = this.state;
    if (previous == SCHEDULED || previous == CREATED) {
        if (!transitionState(previous, DEPLOYING)) {
            // this should actually not happen and indicates a race somewhere else
            throw new IllegalStateException("Cannot deploy task: Concurrent deployment call race.");
        }
    } else {
        // vertex may have been cancelled, or it was already scheduled
        throw new IllegalStateException("The vertex must be in CREATED or SCHEDULED state to be deployed. Found state " + previous);
    }
    try {
        // good, we are allowed to deploy
        if (!slot.setExecutedVertex(this)) {
            throw new JobException("Could not assign the ExecutionVertex to the slot " + slot);
        }
        this.assignedResource = slot;
        // race double check, did we fail/cancel and do we need to release the slot?
        if (this.state != DEPLOYING) {
            slot.releaseSlot();
            return;
        }
        if (LOG.isInfoEnabled()) {
            LOG.info(String.format("Deploying %s (attempt #%d) to %s", vertex.getSimpleName(), attemptNumber, getAssignedResourceLocation().getHostname()));
        }
        final TaskDeploymentDescriptor deployment = vertex.createDeploymentDescriptor(attemptId, slot, taskState, attemptNumber);
        // register this execution at the execution graph, to receive call backs
        vertex.getExecutionGraph().registerExecution(this);
        final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway();
        final Future<Acknowledge> submitResultFuture = taskManagerGateway.submitTask(deployment, timeout);
        submitResultFuture.exceptionallyAsync(new ApplyFunction<Throwable, Void>() {

            @Override
            public Void apply(Throwable failure) {
                if (failure instanceof TimeoutException) {
                    String taskname = vertex.getTaskNameWithSubtaskIndex() + " (" + attemptId + ')';
                    markFailed(new Exception("Cannot deploy task " + taskname + " - TaskManager (" + getAssignedResourceLocation() + ") not responding after a timeout of " + timeout, failure));
                } else {
                    markFailed(failure);
                }
                return null;
            }
        }, executor);
    } catch (Throwable t) {
        markFailed(t);
        ExceptionUtils.rethrow(t);
    }
}
Also used : ExecutionState(org.apache.flink.runtime.execution.ExecutionState) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) TaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway) TimeoutException(java.util.concurrent.TimeoutException) JobException(org.apache.flink.runtime.JobException) JobException(org.apache.flink.runtime.JobException) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) TimeoutException(java.util.concurrent.TimeoutException)

Aggregations

ExecutionState (org.apache.flink.runtime.execution.ExecutionState)26 Test (org.junit.Test)11 ResultPartitionID (org.apache.flink.runtime.io.network.partition.ResultPartitionID)6 TaskManagerLocation (org.apache.flink.runtime.taskmanager.TaskManagerLocation)6 JsonGenerator (com.fasterxml.jackson.core.JsonGenerator)5 IOException (java.io.IOException)5 StringWriter (java.io.StringWriter)5 TimeoutException (java.util.concurrent.TimeoutException)5 JobID (org.apache.flink.api.common.JobID)5 AccessExecutionVertex (org.apache.flink.runtime.executiongraph.AccessExecutionVertex)5 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)4 HashMap (java.util.HashMap)3 JobException (org.apache.flink.runtime.JobException)3 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)3 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)3 IntermediateResultPartition (org.apache.flink.runtime.executiongraph.IntermediateResultPartition)3 SimpleSlot (org.apache.flink.runtime.instance.SimpleSlot)3 ConnectionID (org.apache.flink.runtime.io.network.ConnectionID)3 IntermediateDataSetID (org.apache.flink.runtime.jobgraph.IntermediateDataSetID)3 MutableIOMetrics (org.apache.flink.runtime.webmonitor.utils.MutableIOMetrics)3