use of org.apache.flink.runtime.jobgraph.JobStatus in project flink by apache.
the class WebMonitorMessagesTest method randomJobDetails.
private JobDetails[] randomJobDetails(Random rnd) {
final JobDetails[] details = new JobDetails[rnd.nextInt(10)];
for (int k = 0; k < details.length; k++) {
int[] numVerticesPerState = new int[ExecutionState.values().length];
int numTotal = 0;
for (int i = 0; i < numVerticesPerState.length; i++) {
int count = rnd.nextInt(55);
numVerticesPerState[i] = count;
numTotal += count;
}
long time = rnd.nextLong();
long endTime = rnd.nextBoolean() ? -1L : time + rnd.nextInt();
long lastModified = endTime == -1 ? time + rnd.nextInt() : endTime;
String name = new GenericMessageTester.StringInstantiator().instantiate(rnd);
JobID jid = new JobID();
JobStatus status = JobStatus.values()[rnd.nextInt(JobStatus.values().length)];
details[k] = new JobDetails(jid, name, time, endTime, status, lastModified, numVerticesPerState, numTotal);
}
return details;
}
use of org.apache.flink.runtime.jobgraph.JobStatus in project flink by apache.
the class ExecutionGraph method cancel.
public void cancel() {
while (true) {
JobStatus current = state;
if (current == JobStatus.RUNNING || current == JobStatus.CREATED) {
if (transitionState(current, JobStatus.CANCELLING)) {
for (ExecutionJobVertex ejv : verticesInCreationOrder) {
ejv.cancel();
}
return;
}
} else // all vertices to be in their final state.
if (current == JobStatus.FAILING) {
if (transitionState(current, JobStatus.CANCELLING)) {
return;
}
} else // into the canceled state.
if (current == JobStatus.RESTARTING) {
synchronized (progressLock) {
if (transitionState(current, JobStatus.CANCELED)) {
postRunCleanup();
progressLock.notifyAll();
LOG.info("Canceled during restart.");
return;
}
}
} else {
// no need to treat other states
return;
}
}
}
use of org.apache.flink.runtime.jobgraph.JobStatus in project flink by apache.
the class JobDetailsHandlerTest method compareJobDetails.
private static void compareJobDetails(AccessExecutionGraph originalJob, String json) throws IOException {
JsonNode result = ArchivedJobGenerationUtils.mapper.readTree(json);
Assert.assertEquals(originalJob.getJobID().toString(), result.get("jid").asText());
Assert.assertEquals(originalJob.getJobName(), result.get("name").asText());
Assert.assertEquals(originalJob.isStoppable(), result.get("isStoppable").asBoolean());
Assert.assertEquals(originalJob.getState().name(), result.get("state").asText());
Assert.assertEquals(originalJob.getStatusTimestamp(JobStatus.CREATED), result.get("start-time").asLong());
Assert.assertEquals(originalJob.getStatusTimestamp(originalJob.getState()), result.get("end-time").asLong());
Assert.assertEquals(originalJob.getStatusTimestamp(originalJob.getState()) - originalJob.getStatusTimestamp(JobStatus.CREATED), result.get("duration").asLong());
JsonNode timestamps = result.get("timestamps");
for (JobStatus status : JobStatus.values()) {
Assert.assertEquals(originalJob.getStatusTimestamp(status), timestamps.get(status.name()).asLong());
}
ArrayNode tasks = (ArrayNode) result.get("vertices");
int x = 0;
for (AccessExecutionJobVertex expectedTask : originalJob.getVerticesTopologically()) {
JsonNode task = tasks.get(x);
Assert.assertEquals(expectedTask.getJobVertexId().toString(), task.get("id").asText());
Assert.assertEquals(expectedTask.getName(), task.get("name").asText());
Assert.assertEquals(expectedTask.getParallelism(), task.get("parallelism").asInt());
Assert.assertEquals(expectedTask.getAggregateState().name(), task.get("status").asText());
Assert.assertEquals(3, task.get("start-time").asLong());
Assert.assertEquals(5, task.get("end-time").asLong());
Assert.assertEquals(2, task.get("duration").asLong());
JsonNode subtasksPerState = task.get("tasks");
Assert.assertEquals(0, subtasksPerState.get(ExecutionState.CREATED.name()).asInt());
Assert.assertEquals(0, subtasksPerState.get(ExecutionState.SCHEDULED.name()).asInt());
Assert.assertEquals(0, subtasksPerState.get(ExecutionState.DEPLOYING.name()).asInt());
Assert.assertEquals(0, subtasksPerState.get(ExecutionState.RUNNING.name()).asInt());
Assert.assertEquals(1, subtasksPerState.get(ExecutionState.FINISHED.name()).asInt());
Assert.assertEquals(0, subtasksPerState.get(ExecutionState.CANCELING.name()).asInt());
Assert.assertEquals(0, subtasksPerState.get(ExecutionState.CANCELED.name()).asInt());
Assert.assertEquals(0, subtasksPerState.get(ExecutionState.FAILED.name()).asInt());
long expectedNumBytesIn = 0;
long expectedNumBytesOut = 0;
long expectedNumRecordsIn = 0;
long expectedNumRecordsOut = 0;
for (AccessExecutionVertex vertex : expectedTask.getTaskVertices()) {
IOMetrics ioMetrics = vertex.getCurrentExecutionAttempt().getIOMetrics();
expectedNumBytesIn += ioMetrics.getNumBytesInLocal() + ioMetrics.getNumBytesInRemote();
expectedNumBytesOut += ioMetrics.getNumBytesOut();
expectedNumRecordsIn += ioMetrics.getNumRecordsIn();
expectedNumRecordsOut += ioMetrics.getNumRecordsOut();
}
JsonNode metrics = task.get("metrics");
Assert.assertEquals(expectedNumBytesIn, metrics.get("read-bytes").asLong());
Assert.assertEquals(expectedNumBytesOut, metrics.get("write-bytes").asLong());
Assert.assertEquals(expectedNumRecordsIn, metrics.get("read-records").asLong());
Assert.assertEquals(expectedNumRecordsOut, metrics.get("write-records").asLong());
x++;
}
Assert.assertEquals(1, tasks.size());
JsonNode statusCounts = result.get("status-counts");
Assert.assertEquals(0, statusCounts.get(ExecutionState.CREATED.name()).asInt());
Assert.assertEquals(0, statusCounts.get(ExecutionState.SCHEDULED.name()).asInt());
Assert.assertEquals(0, statusCounts.get(ExecutionState.DEPLOYING.name()).asInt());
Assert.assertEquals(1, statusCounts.get(ExecutionState.RUNNING.name()).asInt());
Assert.assertEquals(0, statusCounts.get(ExecutionState.FINISHED.name()).asInt());
Assert.assertEquals(0, statusCounts.get(ExecutionState.CANCELING.name()).asInt());
Assert.assertEquals(0, statusCounts.get(ExecutionState.CANCELED.name()).asInt());
Assert.assertEquals(0, statusCounts.get(ExecutionState.FAILED.name()).asInt());
Assert.assertEquals(ArchivedJobGenerationUtils.mapper.readTree(originalJob.getJsonPlan()), result.get("plan"));
}
use of org.apache.flink.runtime.jobgraph.JobStatus in project flink by apache.
the class JobDetailsHandler method createJobDetailsJson.
public static String createJobDetailsJson(AccessExecutionGraph graph, @Nullable MetricFetcher fetcher) throws IOException {
final StringWriter writer = new StringWriter();
final JsonGenerator gen = JsonFactory.jacksonFactory.createGenerator(writer);
final long now = System.currentTimeMillis();
gen.writeStartObject();
// basic info
gen.writeStringField("jid", graph.getJobID().toString());
gen.writeStringField("name", graph.getJobName());
gen.writeBooleanField("isStoppable", graph.isStoppable());
gen.writeStringField("state", graph.getState().name());
// times and duration
final long jobStartTime = graph.getStatusTimestamp(JobStatus.CREATED);
final long jobEndTime = graph.getState().isGloballyTerminalState() ? graph.getStatusTimestamp(graph.getState()) : -1L;
gen.writeNumberField("start-time", jobStartTime);
gen.writeNumberField("end-time", jobEndTime);
gen.writeNumberField("duration", (jobEndTime > 0 ? jobEndTime : now) - jobStartTime);
gen.writeNumberField("now", now);
// timestamps
gen.writeObjectFieldStart("timestamps");
for (JobStatus status : JobStatus.values()) {
gen.writeNumberField(status.name(), graph.getStatusTimestamp(status));
}
gen.writeEndObject();
// job vertices
int[] jobVerticesPerState = new int[ExecutionState.values().length];
gen.writeArrayFieldStart("vertices");
for (AccessExecutionJobVertex ejv : graph.getVerticesTopologically()) {
int[] tasksPerState = new int[ExecutionState.values().length];
long startTime = Long.MAX_VALUE;
long endTime = 0;
boolean allFinished = true;
for (AccessExecutionVertex vertex : ejv.getTaskVertices()) {
final ExecutionState state = vertex.getExecutionState();
tasksPerState[state.ordinal()]++;
// take the earliest start time
long started = vertex.getStateTimestamp(ExecutionState.DEPLOYING);
if (started > 0) {
startTime = Math.min(startTime, started);
}
allFinished &= state.isTerminal();
endTime = Math.max(endTime, vertex.getStateTimestamp(state));
}
long duration;
if (startTime < Long.MAX_VALUE) {
if (allFinished) {
duration = endTime - startTime;
} else {
endTime = -1L;
duration = now - startTime;
}
} else {
startTime = -1L;
endTime = -1L;
duration = -1L;
}
ExecutionState jobVertexState = ExecutionJobVertex.getAggregateJobVertexState(tasksPerState, ejv.getParallelism());
jobVerticesPerState[jobVertexState.ordinal()]++;
gen.writeStartObject();
gen.writeStringField("id", ejv.getJobVertexId().toString());
gen.writeStringField("name", ejv.getName());
gen.writeNumberField("parallelism", ejv.getParallelism());
gen.writeStringField("status", jobVertexState.name());
gen.writeNumberField("start-time", startTime);
gen.writeNumberField("end-time", endTime);
gen.writeNumberField("duration", duration);
gen.writeObjectFieldStart("tasks");
for (ExecutionState state : ExecutionState.values()) {
gen.writeNumberField(state.name(), tasksPerState[state.ordinal()]);
}
gen.writeEndObject();
MutableIOMetrics counts = new MutableIOMetrics();
for (AccessExecutionVertex vertex : ejv.getTaskVertices()) {
counts.addIOMetrics(vertex.getCurrentExecutionAttempt(), fetcher, graph.getJobID().toString(), ejv.getJobVertexId().toString());
}
counts.writeIOMetricsAsJson(gen);
gen.writeEndObject();
}
gen.writeEndArray();
gen.writeObjectFieldStart("status-counts");
for (ExecutionState state : ExecutionState.values()) {
gen.writeNumberField(state.name(), jobVerticesPerState[state.ordinal()]);
}
gen.writeEndObject();
gen.writeFieldName("plan");
gen.writeRawValue(graph.getJsonPlan());
gen.writeEndObject();
gen.close();
return writer.toString();
}
use of org.apache.flink.runtime.jobgraph.JobStatus in project flink by apache.
the class ExecutionGraph method jobVertexInFinalState.
void jobVertexInFinalState() {
synchronized (progressLock) {
if (numFinishedJobVertices >= verticesInCreationOrder.size()) {
throw new IllegalStateException("All vertices are already finished, cannot transition vertex to finished.");
}
numFinishedJobVertices++;
if (numFinishedJobVertices == verticesInCreationOrder.size()) {
// we are done, transition to the final state
JobStatus current;
while (true) {
current = this.state;
if (current == JobStatus.RUNNING) {
if (transitionState(current, JobStatus.FINISHED)) {
postRunCleanup();
break;
}
} else if (current == JobStatus.CANCELLING) {
if (transitionState(current, JobStatus.CANCELED)) {
postRunCleanup();
break;
}
} else if (current == JobStatus.FAILING) {
if (tryRestartOrFail()) {
break;
}
// concurrent job status change, let's check again
} else if (current == JobStatus.SUSPENDED) {
// we've already cleaned up when entering the SUSPENDED state
break;
} else if (current.isGloballyTerminalState()) {
LOG.warn("Job has entered globally terminal state without waiting for all " + "job vertices to reach final state.");
break;
} else {
fail(new Exception("ExecutionGraph went into final state from state " + current));
break;
}
}
// done transitioning the state
// also, notify waiters
progressLock.notifyAll();
}
}
}
Aggregations