Search in sources :

Example 1 with JobStatus

use of org.apache.flink.runtime.jobgraph.JobStatus in project flink by apache.

the class WebMonitorMessagesTest method randomJobDetails.

private JobDetails[] randomJobDetails(Random rnd) {
    final JobDetails[] details = new JobDetails[rnd.nextInt(10)];
    for (int k = 0; k < details.length; k++) {
        int[] numVerticesPerState = new int[ExecutionState.values().length];
        int numTotal = 0;
        for (int i = 0; i < numVerticesPerState.length; i++) {
            int count = rnd.nextInt(55);
            numVerticesPerState[i] = count;
            numTotal += count;
        }
        long time = rnd.nextLong();
        long endTime = rnd.nextBoolean() ? -1L : time + rnd.nextInt();
        long lastModified = endTime == -1 ? time + rnd.nextInt() : endTime;
        String name = new GenericMessageTester.StringInstantiator().instantiate(rnd);
        JobID jid = new JobID();
        JobStatus status = JobStatus.values()[rnd.nextInt(JobStatus.values().length)];
        details[k] = new JobDetails(jid, name, time, endTime, status, lastModified, numVerticesPerState, numTotal);
    }
    return details;
}
Also used : JobStatus(org.apache.flink.runtime.jobgraph.JobStatus) JobDetails(org.apache.flink.runtime.messages.webmonitor.JobDetails) RequestJobDetails(org.apache.flink.runtime.messages.webmonitor.RequestJobDetails) JobID(org.apache.flink.api.common.JobID)

Example 2 with JobStatus

use of org.apache.flink.runtime.jobgraph.JobStatus in project flink by apache.

the class ExecutionGraph method cancel.

public void cancel() {
    while (true) {
        JobStatus current = state;
        if (current == JobStatus.RUNNING || current == JobStatus.CREATED) {
            if (transitionState(current, JobStatus.CANCELLING)) {
                for (ExecutionJobVertex ejv : verticesInCreationOrder) {
                    ejv.cancel();
                }
                return;
            }
        } else // all vertices to be in their final state.
        if (current == JobStatus.FAILING) {
            if (transitionState(current, JobStatus.CANCELLING)) {
                return;
            }
        } else // into the canceled state.
        if (current == JobStatus.RESTARTING) {
            synchronized (progressLock) {
                if (transitionState(current, JobStatus.CANCELED)) {
                    postRunCleanup();
                    progressLock.notifyAll();
                    LOG.info("Canceled during restart.");
                    return;
                }
            }
        } else {
            // no need to treat other states
            return;
        }
    }
}
Also used : JobStatus(org.apache.flink.runtime.jobgraph.JobStatus)

Example 3 with JobStatus

use of org.apache.flink.runtime.jobgraph.JobStatus in project flink by apache.

the class JobDetailsHandlerTest method compareJobDetails.

private static void compareJobDetails(AccessExecutionGraph originalJob, String json) throws IOException {
    JsonNode result = ArchivedJobGenerationUtils.mapper.readTree(json);
    Assert.assertEquals(originalJob.getJobID().toString(), result.get("jid").asText());
    Assert.assertEquals(originalJob.getJobName(), result.get("name").asText());
    Assert.assertEquals(originalJob.isStoppable(), result.get("isStoppable").asBoolean());
    Assert.assertEquals(originalJob.getState().name(), result.get("state").asText());
    Assert.assertEquals(originalJob.getStatusTimestamp(JobStatus.CREATED), result.get("start-time").asLong());
    Assert.assertEquals(originalJob.getStatusTimestamp(originalJob.getState()), result.get("end-time").asLong());
    Assert.assertEquals(originalJob.getStatusTimestamp(originalJob.getState()) - originalJob.getStatusTimestamp(JobStatus.CREATED), result.get("duration").asLong());
    JsonNode timestamps = result.get("timestamps");
    for (JobStatus status : JobStatus.values()) {
        Assert.assertEquals(originalJob.getStatusTimestamp(status), timestamps.get(status.name()).asLong());
    }
    ArrayNode tasks = (ArrayNode) result.get("vertices");
    int x = 0;
    for (AccessExecutionJobVertex expectedTask : originalJob.getVerticesTopologically()) {
        JsonNode task = tasks.get(x);
        Assert.assertEquals(expectedTask.getJobVertexId().toString(), task.get("id").asText());
        Assert.assertEquals(expectedTask.getName(), task.get("name").asText());
        Assert.assertEquals(expectedTask.getParallelism(), task.get("parallelism").asInt());
        Assert.assertEquals(expectedTask.getAggregateState().name(), task.get("status").asText());
        Assert.assertEquals(3, task.get("start-time").asLong());
        Assert.assertEquals(5, task.get("end-time").asLong());
        Assert.assertEquals(2, task.get("duration").asLong());
        JsonNode subtasksPerState = task.get("tasks");
        Assert.assertEquals(0, subtasksPerState.get(ExecutionState.CREATED.name()).asInt());
        Assert.assertEquals(0, subtasksPerState.get(ExecutionState.SCHEDULED.name()).asInt());
        Assert.assertEquals(0, subtasksPerState.get(ExecutionState.DEPLOYING.name()).asInt());
        Assert.assertEquals(0, subtasksPerState.get(ExecutionState.RUNNING.name()).asInt());
        Assert.assertEquals(1, subtasksPerState.get(ExecutionState.FINISHED.name()).asInt());
        Assert.assertEquals(0, subtasksPerState.get(ExecutionState.CANCELING.name()).asInt());
        Assert.assertEquals(0, subtasksPerState.get(ExecutionState.CANCELED.name()).asInt());
        Assert.assertEquals(0, subtasksPerState.get(ExecutionState.FAILED.name()).asInt());
        long expectedNumBytesIn = 0;
        long expectedNumBytesOut = 0;
        long expectedNumRecordsIn = 0;
        long expectedNumRecordsOut = 0;
        for (AccessExecutionVertex vertex : expectedTask.getTaskVertices()) {
            IOMetrics ioMetrics = vertex.getCurrentExecutionAttempt().getIOMetrics();
            expectedNumBytesIn += ioMetrics.getNumBytesInLocal() + ioMetrics.getNumBytesInRemote();
            expectedNumBytesOut += ioMetrics.getNumBytesOut();
            expectedNumRecordsIn += ioMetrics.getNumRecordsIn();
            expectedNumRecordsOut += ioMetrics.getNumRecordsOut();
        }
        JsonNode metrics = task.get("metrics");
        Assert.assertEquals(expectedNumBytesIn, metrics.get("read-bytes").asLong());
        Assert.assertEquals(expectedNumBytesOut, metrics.get("write-bytes").asLong());
        Assert.assertEquals(expectedNumRecordsIn, metrics.get("read-records").asLong());
        Assert.assertEquals(expectedNumRecordsOut, metrics.get("write-records").asLong());
        x++;
    }
    Assert.assertEquals(1, tasks.size());
    JsonNode statusCounts = result.get("status-counts");
    Assert.assertEquals(0, statusCounts.get(ExecutionState.CREATED.name()).asInt());
    Assert.assertEquals(0, statusCounts.get(ExecutionState.SCHEDULED.name()).asInt());
    Assert.assertEquals(0, statusCounts.get(ExecutionState.DEPLOYING.name()).asInt());
    Assert.assertEquals(1, statusCounts.get(ExecutionState.RUNNING.name()).asInt());
    Assert.assertEquals(0, statusCounts.get(ExecutionState.FINISHED.name()).asInt());
    Assert.assertEquals(0, statusCounts.get(ExecutionState.CANCELING.name()).asInt());
    Assert.assertEquals(0, statusCounts.get(ExecutionState.CANCELED.name()).asInt());
    Assert.assertEquals(0, statusCounts.get(ExecutionState.FAILED.name()).asInt());
    Assert.assertEquals(ArchivedJobGenerationUtils.mapper.readTree(originalJob.getJsonPlan()), result.get("plan"));
}
Also used : JobStatus(org.apache.flink.runtime.jobgraph.JobStatus) AccessExecutionJobVertex(org.apache.flink.runtime.executiongraph.AccessExecutionJobVertex) JsonNode(com.fasterxml.jackson.databind.JsonNode) ArrayNode(com.fasterxml.jackson.databind.node.ArrayNode) IOMetrics(org.apache.flink.runtime.executiongraph.IOMetrics) AccessExecutionVertex(org.apache.flink.runtime.executiongraph.AccessExecutionVertex)

Example 4 with JobStatus

use of org.apache.flink.runtime.jobgraph.JobStatus in project flink by apache.

the class JobDetailsHandler method createJobDetailsJson.

public static String createJobDetailsJson(AccessExecutionGraph graph, @Nullable MetricFetcher fetcher) throws IOException {
    final StringWriter writer = new StringWriter();
    final JsonGenerator gen = JsonFactory.jacksonFactory.createGenerator(writer);
    final long now = System.currentTimeMillis();
    gen.writeStartObject();
    // basic info
    gen.writeStringField("jid", graph.getJobID().toString());
    gen.writeStringField("name", graph.getJobName());
    gen.writeBooleanField("isStoppable", graph.isStoppable());
    gen.writeStringField("state", graph.getState().name());
    // times and duration
    final long jobStartTime = graph.getStatusTimestamp(JobStatus.CREATED);
    final long jobEndTime = graph.getState().isGloballyTerminalState() ? graph.getStatusTimestamp(graph.getState()) : -1L;
    gen.writeNumberField("start-time", jobStartTime);
    gen.writeNumberField("end-time", jobEndTime);
    gen.writeNumberField("duration", (jobEndTime > 0 ? jobEndTime : now) - jobStartTime);
    gen.writeNumberField("now", now);
    // timestamps
    gen.writeObjectFieldStart("timestamps");
    for (JobStatus status : JobStatus.values()) {
        gen.writeNumberField(status.name(), graph.getStatusTimestamp(status));
    }
    gen.writeEndObject();
    // job vertices
    int[] jobVerticesPerState = new int[ExecutionState.values().length];
    gen.writeArrayFieldStart("vertices");
    for (AccessExecutionJobVertex ejv : graph.getVerticesTopologically()) {
        int[] tasksPerState = new int[ExecutionState.values().length];
        long startTime = Long.MAX_VALUE;
        long endTime = 0;
        boolean allFinished = true;
        for (AccessExecutionVertex vertex : ejv.getTaskVertices()) {
            final ExecutionState state = vertex.getExecutionState();
            tasksPerState[state.ordinal()]++;
            // take the earliest start time
            long started = vertex.getStateTimestamp(ExecutionState.DEPLOYING);
            if (started > 0) {
                startTime = Math.min(startTime, started);
            }
            allFinished &= state.isTerminal();
            endTime = Math.max(endTime, vertex.getStateTimestamp(state));
        }
        long duration;
        if (startTime < Long.MAX_VALUE) {
            if (allFinished) {
                duration = endTime - startTime;
            } else {
                endTime = -1L;
                duration = now - startTime;
            }
        } else {
            startTime = -1L;
            endTime = -1L;
            duration = -1L;
        }
        ExecutionState jobVertexState = ExecutionJobVertex.getAggregateJobVertexState(tasksPerState, ejv.getParallelism());
        jobVerticesPerState[jobVertexState.ordinal()]++;
        gen.writeStartObject();
        gen.writeStringField("id", ejv.getJobVertexId().toString());
        gen.writeStringField("name", ejv.getName());
        gen.writeNumberField("parallelism", ejv.getParallelism());
        gen.writeStringField("status", jobVertexState.name());
        gen.writeNumberField("start-time", startTime);
        gen.writeNumberField("end-time", endTime);
        gen.writeNumberField("duration", duration);
        gen.writeObjectFieldStart("tasks");
        for (ExecutionState state : ExecutionState.values()) {
            gen.writeNumberField(state.name(), tasksPerState[state.ordinal()]);
        }
        gen.writeEndObject();
        MutableIOMetrics counts = new MutableIOMetrics();
        for (AccessExecutionVertex vertex : ejv.getTaskVertices()) {
            counts.addIOMetrics(vertex.getCurrentExecutionAttempt(), fetcher, graph.getJobID().toString(), ejv.getJobVertexId().toString());
        }
        counts.writeIOMetricsAsJson(gen);
        gen.writeEndObject();
    }
    gen.writeEndArray();
    gen.writeObjectFieldStart("status-counts");
    for (ExecutionState state : ExecutionState.values()) {
        gen.writeNumberField(state.name(), jobVerticesPerState[state.ordinal()]);
    }
    gen.writeEndObject();
    gen.writeFieldName("plan");
    gen.writeRawValue(graph.getJsonPlan());
    gen.writeEndObject();
    gen.close();
    return writer.toString();
}
Also used : JobStatus(org.apache.flink.runtime.jobgraph.JobStatus) AccessExecutionJobVertex(org.apache.flink.runtime.executiongraph.AccessExecutionJobVertex) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) MutableIOMetrics(org.apache.flink.runtime.webmonitor.utils.MutableIOMetrics) StringWriter(java.io.StringWriter) JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) AccessExecutionVertex(org.apache.flink.runtime.executiongraph.AccessExecutionVertex)

Example 5 with JobStatus

use of org.apache.flink.runtime.jobgraph.JobStatus in project flink by apache.

the class ExecutionGraph method jobVertexInFinalState.

void jobVertexInFinalState() {
    synchronized (progressLock) {
        if (numFinishedJobVertices >= verticesInCreationOrder.size()) {
            throw new IllegalStateException("All vertices are already finished, cannot transition vertex to finished.");
        }
        numFinishedJobVertices++;
        if (numFinishedJobVertices == verticesInCreationOrder.size()) {
            // we are done, transition to the final state
            JobStatus current;
            while (true) {
                current = this.state;
                if (current == JobStatus.RUNNING) {
                    if (transitionState(current, JobStatus.FINISHED)) {
                        postRunCleanup();
                        break;
                    }
                } else if (current == JobStatus.CANCELLING) {
                    if (transitionState(current, JobStatus.CANCELED)) {
                        postRunCleanup();
                        break;
                    }
                } else if (current == JobStatus.FAILING) {
                    if (tryRestartOrFail()) {
                        break;
                    }
                // concurrent job status change, let's check again
                } else if (current == JobStatus.SUSPENDED) {
                    // we've already cleaned up when entering the SUSPENDED state
                    break;
                } else if (current.isGloballyTerminalState()) {
                    LOG.warn("Job has entered globally terminal state without waiting for all " + "job vertices to reach final state.");
                    break;
                } else {
                    fail(new Exception("ExecutionGraph went into final state from state " + current));
                    break;
                }
            }
            // done transitioning the state
            // also, notify waiters
            progressLock.notifyAll();
        }
    }
}
Also used : JobStatus(org.apache.flink.runtime.jobgraph.JobStatus) SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) StoppingException(org.apache.flink.runtime.StoppingException) NoResourceAvailableException(org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException) JobException(org.apache.flink.runtime.JobException) NoSuchElementException(java.util.NoSuchElementException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Aggregations

JobStatus (org.apache.flink.runtime.jobgraph.JobStatus)12 JobID (org.apache.flink.api.common.JobID)3 AccessExecutionJobVertex (org.apache.flink.runtime.executiongraph.AccessExecutionJobVertex)3 AccessExecutionVertex (org.apache.flink.runtime.executiongraph.AccessExecutionVertex)3 JobDetails (org.apache.flink.runtime.messages.webmonitor.JobDetails)3 Test (org.junit.Test)3 ArrayList (java.util.ArrayList)2 ExecutionState (org.apache.flink.runtime.execution.ExecutionState)2 SuppressRestartsException (org.apache.flink.runtime.execution.SuppressRestartsException)2 RequestJobDetails (org.apache.flink.runtime.messages.webmonitor.RequestJobDetails)2 Deadline (scala.concurrent.duration.Deadline)2 ActorSystem (akka.actor.ActorSystem)1 JsonGenerator (com.fasterxml.jackson.core.JsonGenerator)1 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 ArrayNode (com.fasterxml.jackson.databind.node.ArrayNode)1 File (java.io.File)1 IOException (java.io.IOException)1 StringWriter (java.io.StringWriter)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1