Search in sources :

Example 11 with ExecutionState

use of org.apache.flink.runtime.execution.ExecutionState in project flink by apache.

the class JobManagerTest method testRequestPartitionState.

/**
	 * Tests responses to partition state requests.
	 */
@Test
public void testRequestPartitionState() throws Exception {
    new JavaTestKit(system) {

        {
            new Within(duration("15 seconds")) {

                @Override
                protected void run() {
                    // Setup
                    TestingCluster cluster = null;
                    try {
                        cluster = startTestingCluster(2, 1, DEFAULT_AKKA_ASK_TIMEOUT());
                        final IntermediateDataSetID rid = new IntermediateDataSetID();
                        // Create a task
                        final JobVertex sender = new JobVertex("Sender");
                        sender.setParallelism(1);
                        // just block
                        sender.setInvokableClass(BlockingNoOpInvokable.class);
                        sender.createAndAddResultDataSet(rid, PIPELINED);
                        final JobGraph jobGraph = new JobGraph("Blocking test job", sender);
                        final JobID jid = jobGraph.getJobID();
                        final ActorGateway jobManagerGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
                        // we can set the leader session ID to None because we don't use this gateway to send messages
                        final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), null);
                        // Submit the job and wait for all vertices to be running
                        jobManagerGateway.tell(new SubmitJob(jobGraph, ListeningBehaviour.EXECUTION_RESULT), testActorGateway);
                        expectMsgClass(JobSubmitSuccess.class);
                        jobManagerGateway.tell(new WaitForAllVerticesToBeRunningOrFinished(jid), testActorGateway);
                        expectMsgClass(AllVerticesRunning.class);
                        // This is the mock execution ID of the task requesting the state of the partition
                        final ExecutionAttemptID receiver = new ExecutionAttemptID();
                        // Request the execution graph to get the runtime info
                        jobManagerGateway.tell(new RequestExecutionGraph(jid), testActorGateway);
                        final ExecutionGraph eg = (ExecutionGraph) expectMsgClass(ExecutionGraphFound.class).executionGraph();
                        final ExecutionVertex vertex = eg.getJobVertex(sender.getID()).getTaskVertices()[0];
                        final IntermediateResultPartition partition = vertex.getProducedPartitions().values().iterator().next();
                        final ResultPartitionID partitionId = new ResultPartitionID(partition.getPartitionId(), vertex.getCurrentExecutionAttempt().getAttemptId());
                        // - The test ----------------------------------------------------------------------
                        // 1. All execution states
                        RequestPartitionProducerState request = new RequestPartitionProducerState(jid, rid, partitionId);
                        for (ExecutionState state : ExecutionState.values()) {
                            ExecutionGraphTestUtils.setVertexState(vertex, state);
                            Future<ExecutionState> futurePartitionState = jobManagerGateway.ask(request, getRemainingTime()).mapTo(ClassTag$.MODULE$.<ExecutionState>apply(ExecutionState.class));
                            ExecutionState resp = Await.result(futurePartitionState, getRemainingTime());
                            assertEquals(state, resp);
                        }
                        // 2. Non-existing execution
                        request = new RequestPartitionProducerState(jid, rid, new ResultPartitionID());
                        Future<?> futurePartitionState = jobManagerGateway.ask(request, getRemainingTime());
                        try {
                            Await.result(futurePartitionState, getRemainingTime());
                            fail("Did not fail with expected RuntimeException");
                        } catch (RuntimeException e) {
                            assertEquals(IllegalArgumentException.class, e.getCause().getClass());
                        }
                        // 3. Non-existing job
                        request = new RequestPartitionProducerState(new JobID(), rid, new ResultPartitionID());
                        futurePartitionState = jobManagerGateway.ask(request, getRemainingTime());
                        try {
                            Await.result(futurePartitionState, getRemainingTime());
                            fail("Did not fail with expected IllegalArgumentException");
                        } catch (IllegalArgumentException ignored) {
                        }
                    } catch (Exception e) {
                        e.printStackTrace();
                        fail(e.getMessage());
                    } finally {
                        if (cluster != null) {
                            cluster.shutdown();
                        }
                    }
                }
            };
        }
    };
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) RequestPartitionProducerState(org.apache.flink.runtime.messages.JobManagerMessages.RequestPartitionProducerState) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) WaitForAllVerticesToBeRunningOrFinished(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunningOrFinished) TestingUtils.startTestingCluster(org.apache.flink.runtime.testingUtils.TestingUtils.startTestingCluster) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) IntermediateResultPartition(org.apache.flink.runtime.executiongraph.IntermediateResultPartition) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) RequestExecutionGraph(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestExecutionGraph) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SubmitJob(org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob) JavaTestKit(akka.testkit.JavaTestKit) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 12 with ExecutionState

use of org.apache.flink.runtime.execution.ExecutionState in project flink by apache.

the class JobDetailsHandler method createJobDetailsJson.

public static String createJobDetailsJson(AccessExecutionGraph graph, @Nullable MetricFetcher fetcher) throws IOException {
    final StringWriter writer = new StringWriter();
    final JsonGenerator gen = JsonFactory.jacksonFactory.createGenerator(writer);
    final long now = System.currentTimeMillis();
    gen.writeStartObject();
    // basic info
    gen.writeStringField("jid", graph.getJobID().toString());
    gen.writeStringField("name", graph.getJobName());
    gen.writeBooleanField("isStoppable", graph.isStoppable());
    gen.writeStringField("state", graph.getState().name());
    // times and duration
    final long jobStartTime = graph.getStatusTimestamp(JobStatus.CREATED);
    final long jobEndTime = graph.getState().isGloballyTerminalState() ? graph.getStatusTimestamp(graph.getState()) : -1L;
    gen.writeNumberField("start-time", jobStartTime);
    gen.writeNumberField("end-time", jobEndTime);
    gen.writeNumberField("duration", (jobEndTime > 0 ? jobEndTime : now) - jobStartTime);
    gen.writeNumberField("now", now);
    // timestamps
    gen.writeObjectFieldStart("timestamps");
    for (JobStatus status : JobStatus.values()) {
        gen.writeNumberField(status.name(), graph.getStatusTimestamp(status));
    }
    gen.writeEndObject();
    // job vertices
    int[] jobVerticesPerState = new int[ExecutionState.values().length];
    gen.writeArrayFieldStart("vertices");
    for (AccessExecutionJobVertex ejv : graph.getVerticesTopologically()) {
        int[] tasksPerState = new int[ExecutionState.values().length];
        long startTime = Long.MAX_VALUE;
        long endTime = 0;
        boolean allFinished = true;
        for (AccessExecutionVertex vertex : ejv.getTaskVertices()) {
            final ExecutionState state = vertex.getExecutionState();
            tasksPerState[state.ordinal()]++;
            // take the earliest start time
            long started = vertex.getStateTimestamp(ExecutionState.DEPLOYING);
            if (started > 0) {
                startTime = Math.min(startTime, started);
            }
            allFinished &= state.isTerminal();
            endTime = Math.max(endTime, vertex.getStateTimestamp(state));
        }
        long duration;
        if (startTime < Long.MAX_VALUE) {
            if (allFinished) {
                duration = endTime - startTime;
            } else {
                endTime = -1L;
                duration = now - startTime;
            }
        } else {
            startTime = -1L;
            endTime = -1L;
            duration = -1L;
        }
        ExecutionState jobVertexState = ExecutionJobVertex.getAggregateJobVertexState(tasksPerState, ejv.getParallelism());
        jobVerticesPerState[jobVertexState.ordinal()]++;
        gen.writeStartObject();
        gen.writeStringField("id", ejv.getJobVertexId().toString());
        gen.writeStringField("name", ejv.getName());
        gen.writeNumberField("parallelism", ejv.getParallelism());
        gen.writeStringField("status", jobVertexState.name());
        gen.writeNumberField("start-time", startTime);
        gen.writeNumberField("end-time", endTime);
        gen.writeNumberField("duration", duration);
        gen.writeObjectFieldStart("tasks");
        for (ExecutionState state : ExecutionState.values()) {
            gen.writeNumberField(state.name(), tasksPerState[state.ordinal()]);
        }
        gen.writeEndObject();
        MutableIOMetrics counts = new MutableIOMetrics();
        for (AccessExecutionVertex vertex : ejv.getTaskVertices()) {
            counts.addIOMetrics(vertex.getCurrentExecutionAttempt(), fetcher, graph.getJobID().toString(), ejv.getJobVertexId().toString());
        }
        counts.writeIOMetricsAsJson(gen);
        gen.writeEndObject();
    }
    gen.writeEndArray();
    gen.writeObjectFieldStart("status-counts");
    for (ExecutionState state : ExecutionState.values()) {
        gen.writeNumberField(state.name(), jobVerticesPerState[state.ordinal()]);
    }
    gen.writeEndObject();
    gen.writeFieldName("plan");
    gen.writeRawValue(graph.getJsonPlan());
    gen.writeEndObject();
    gen.close();
    return writer.toString();
}
Also used : JobStatus(org.apache.flink.runtime.jobgraph.JobStatus) AccessExecutionJobVertex(org.apache.flink.runtime.executiongraph.AccessExecutionJobVertex) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) MutableIOMetrics(org.apache.flink.runtime.webmonitor.utils.MutableIOMetrics) StringWriter(java.io.StringWriter) JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) AccessExecutionVertex(org.apache.flink.runtime.executiongraph.AccessExecutionVertex)

Example 13 with ExecutionState

use of org.apache.flink.runtime.execution.ExecutionState in project flink by apache.

the class JobVertexDetailsHandler method createVertexDetailsJson.

public static String createVertexDetailsJson(AccessExecutionJobVertex jobVertex, String jobID, @Nullable MetricFetcher fetcher) throws IOException {
    final long now = System.currentTimeMillis();
    StringWriter writer = new StringWriter();
    JsonGenerator gen = JsonFactory.jacksonFactory.createGenerator(writer);
    gen.writeStartObject();
    gen.writeStringField("id", jobVertex.getJobVertexId().toString());
    gen.writeStringField("name", jobVertex.getName());
    gen.writeNumberField("parallelism", jobVertex.getParallelism());
    gen.writeNumberField("now", now);
    gen.writeArrayFieldStart("subtasks");
    int num = 0;
    for (AccessExecutionVertex vertex : jobVertex.getTaskVertices()) {
        final ExecutionState status = vertex.getExecutionState();
        TaskManagerLocation location = vertex.getCurrentAssignedResourceLocation();
        String locationString = location == null ? "(unassigned)" : location.getHostname() + ":" + location.dataPort();
        long startTime = vertex.getStateTimestamp(ExecutionState.DEPLOYING);
        if (startTime == 0) {
            startTime = -1;
        }
        long endTime = status.isTerminal() ? vertex.getStateTimestamp(status) : -1;
        long duration = startTime > 0 ? ((endTime > 0 ? endTime : now) - startTime) : -1;
        gen.writeStartObject();
        gen.writeNumberField("subtask", num);
        gen.writeStringField("status", status.name());
        gen.writeNumberField("attempt", vertex.getCurrentExecutionAttempt().getAttemptNumber());
        gen.writeStringField("host", locationString);
        gen.writeNumberField("start-time", startTime);
        gen.writeNumberField("end-time", endTime);
        gen.writeNumberField("duration", duration);
        MutableIOMetrics counts = new MutableIOMetrics();
        counts.addIOMetrics(vertex.getCurrentExecutionAttempt(), fetcher, jobID, jobVertex.getJobVertexId().toString());
        counts.writeIOMetricsAsJson(gen);
        gen.writeEndObject();
        num++;
    }
    gen.writeEndArray();
    gen.writeEndObject();
    gen.close();
    return writer.toString();
}
Also used : ExecutionState(org.apache.flink.runtime.execution.ExecutionState) MutableIOMetrics(org.apache.flink.runtime.webmonitor.utils.MutableIOMetrics) StringWriter(java.io.StringWriter) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) AccessExecutionVertex(org.apache.flink.runtime.executiongraph.AccessExecutionVertex)

Example 14 with ExecutionState

use of org.apache.flink.runtime.execution.ExecutionState in project flink by apache.

the class JobVertexTaskManagersHandler method createVertexDetailsByTaskManagerJson.

public static String createVertexDetailsByTaskManagerJson(AccessExecutionJobVertex jobVertex, String jobID, @Nullable MetricFetcher fetcher) throws IOException {
    StringWriter writer = new StringWriter();
    JsonGenerator gen = JsonFactory.jacksonFactory.createGenerator(writer);
    // Build a map that groups tasks by TaskManager
    Map<String, List<AccessExecutionVertex>> taskManagerVertices = new HashMap<>();
    for (AccessExecutionVertex vertex : jobVertex.getTaskVertices()) {
        TaskManagerLocation location = vertex.getCurrentAssignedResourceLocation();
        String taskManager = location == null ? "(unassigned)" : location.getHostname() + ":" + location.dataPort();
        List<AccessExecutionVertex> vertices = taskManagerVertices.get(taskManager);
        if (vertices == null) {
            vertices = new ArrayList<>();
            taskManagerVertices.put(taskManager, vertices);
        }
        vertices.add(vertex);
    }
    // Build JSON response
    final long now = System.currentTimeMillis();
    gen.writeStartObject();
    gen.writeStringField("id", jobVertex.getJobVertexId().toString());
    gen.writeStringField("name", jobVertex.getName());
    gen.writeNumberField("now", now);
    gen.writeArrayFieldStart("taskmanagers");
    for (Map.Entry<String, List<AccessExecutionVertex>> entry : taskManagerVertices.entrySet()) {
        String host = entry.getKey();
        List<AccessExecutionVertex> taskVertices = entry.getValue();
        int[] tasksPerState = new int[ExecutionState.values().length];
        long startTime = Long.MAX_VALUE;
        long endTime = 0;
        boolean allFinished = true;
        MutableIOMetrics counts = new MutableIOMetrics();
        for (AccessExecutionVertex vertex : taskVertices) {
            final ExecutionState state = vertex.getExecutionState();
            tasksPerState[state.ordinal()]++;
            // take the earliest start time
            long started = vertex.getStateTimestamp(ExecutionState.DEPLOYING);
            if (started > 0) {
                startTime = Math.min(startTime, started);
            }
            allFinished &= state.isTerminal();
            endTime = Math.max(endTime, vertex.getStateTimestamp(state));
            counts.addIOMetrics(vertex.getCurrentExecutionAttempt(), fetcher, jobID, jobVertex.getJobVertexId().toString());
        }
        long duration;
        if (startTime < Long.MAX_VALUE) {
            if (allFinished) {
                duration = endTime - startTime;
            } else {
                endTime = -1L;
                duration = now - startTime;
            }
        } else {
            startTime = -1L;
            endTime = -1L;
            duration = -1L;
        }
        ExecutionState jobVertexState = ExecutionJobVertex.getAggregateJobVertexState(tasksPerState, taskVertices.size());
        gen.writeStartObject();
        gen.writeStringField("host", host);
        gen.writeStringField("status", jobVertexState.name());
        gen.writeNumberField("start-time", startTime);
        gen.writeNumberField("end-time", endTime);
        gen.writeNumberField("duration", duration);
        counts.writeIOMetricsAsJson(gen);
        gen.writeObjectFieldStart("status-counts");
        for (ExecutionState state : ExecutionState.values()) {
            gen.writeNumberField(state.name(), tasksPerState[state.ordinal()]);
        }
        gen.writeEndObject();
        gen.writeEndObject();
    }
    gen.writeEndArray();
    gen.writeEndObject();
    gen.close();
    return writer.toString();
}
Also used : ExecutionState(org.apache.flink.runtime.execution.ExecutionState) MutableIOMetrics(org.apache.flink.runtime.webmonitor.utils.MutableIOMetrics) HashMap(java.util.HashMap) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) StringWriter(java.io.StringWriter) JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) AccessExecutionVertex(org.apache.flink.runtime.executiongraph.AccessExecutionVertex)

Example 15 with ExecutionState

use of org.apache.flink.runtime.execution.ExecutionState in project flink by apache.

the class ExecutionVertex method resetForNewExecution.

// --------------------------------------------------------------------------------------------
//   Actions
// --------------------------------------------------------------------------------------------
public void resetForNewExecution() {
    LOG.debug("Resetting execution vertex {} for new execution.", getSimpleName());
    synchronized (priorExecutions) {
        Execution execution = currentExecution;
        ExecutionState state = execution.getState();
        if (state == FINISHED || state == CANCELED || state == FAILED) {
            priorExecutions.add(execution);
            currentExecution = new Execution(getExecutionGraph().getFutureExecutor(), this, execution.getAttemptNumber() + 1, System.currentTimeMillis(), timeout);
            CoLocationGroup grp = jobVertex.getCoLocationGroup();
            if (grp != null) {
                this.locationConstraint = grp.getLocationConstraint(subTaskIndex);
            }
        } else {
            throw new IllegalStateException("Cannot reset a vertex that is in state " + state);
        }
    }
}
Also used : ExecutionState(org.apache.flink.runtime.execution.ExecutionState) CoLocationGroup(org.apache.flink.runtime.jobmanager.scheduler.CoLocationGroup)

Aggregations

ExecutionState (org.apache.flink.runtime.execution.ExecutionState)26 Test (org.junit.Test)11 ResultPartitionID (org.apache.flink.runtime.io.network.partition.ResultPartitionID)6 TaskManagerLocation (org.apache.flink.runtime.taskmanager.TaskManagerLocation)6 JsonGenerator (com.fasterxml.jackson.core.JsonGenerator)5 IOException (java.io.IOException)5 StringWriter (java.io.StringWriter)5 TimeoutException (java.util.concurrent.TimeoutException)5 JobID (org.apache.flink.api.common.JobID)5 AccessExecutionVertex (org.apache.flink.runtime.executiongraph.AccessExecutionVertex)5 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)4 HashMap (java.util.HashMap)3 JobException (org.apache.flink.runtime.JobException)3 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)3 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)3 IntermediateResultPartition (org.apache.flink.runtime.executiongraph.IntermediateResultPartition)3 SimpleSlot (org.apache.flink.runtime.instance.SimpleSlot)3 ConnectionID (org.apache.flink.runtime.io.network.ConnectionID)3 IntermediateDataSetID (org.apache.flink.runtime.jobgraph.IntermediateDataSetID)3 MutableIOMetrics (org.apache.flink.runtime.webmonitor.utils.MutableIOMetrics)3