Search in sources :

Example 26 with TaskManagerLocation

use of org.apache.flink.runtime.taskmanager.TaskManagerLocation in project flink by apache.

the class JobExceptionsHandler method createJobExceptionsJson.

public static String createJobExceptionsJson(AccessExecutionGraph graph) throws IOException {
    StringWriter writer = new StringWriter();
    JsonGenerator gen = JsonFactory.jacksonFactory.createGenerator(writer);
    gen.writeStartObject();
    // most important is the root failure cause
    String rootException = graph.getFailureCauseAsString();
    if (rootException != null && !rootException.equals(ExceptionUtils.STRINGIFIED_NULL_EXCEPTION)) {
        gen.writeStringField("root-exception", rootException);
    }
    // we additionally collect all exceptions (up to a limit) that occurred in the individual tasks
    gen.writeArrayFieldStart("all-exceptions");
    int numExceptionsSoFar = 0;
    boolean truncated = false;
    for (AccessExecutionVertex task : graph.getAllExecutionVertices()) {
        String t = task.getFailureCauseAsString();
        if (t != null && !t.equals(ExceptionUtils.STRINGIFIED_NULL_EXCEPTION)) {
            if (numExceptionsSoFar >= MAX_NUMBER_EXCEPTION_TO_REPORT) {
                truncated = true;
                break;
            }
            TaskManagerLocation location = task.getCurrentAssignedResourceLocation();
            String locationString = location != null ? location.getFQDNHostname() + ':' + location.dataPort() : "(unassigned)";
            gen.writeStartObject();
            gen.writeStringField("exception", t);
            gen.writeStringField("task", task.getTaskNameWithSubtaskIndex());
            gen.writeStringField("location", locationString);
            gen.writeEndObject();
            numExceptionsSoFar++;
        }
    }
    gen.writeEndArray();
    gen.writeBooleanField("truncated", truncated);
    gen.writeEndObject();
    gen.close();
    return writer.toString();
}
Also used : StringWriter(java.io.StringWriter) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) AccessExecutionVertex(org.apache.flink.runtime.executiongraph.AccessExecutionVertex)

Example 27 with TaskManagerLocation

use of org.apache.flink.runtime.taskmanager.TaskManagerLocation in project flink by apache.

the class JobVertexDetailsHandler method createVertexDetailsJson.

public static String createVertexDetailsJson(AccessExecutionJobVertex jobVertex, String jobID, @Nullable MetricFetcher fetcher) throws IOException {
    final long now = System.currentTimeMillis();
    StringWriter writer = new StringWriter();
    JsonGenerator gen = JsonFactory.jacksonFactory.createGenerator(writer);
    gen.writeStartObject();
    gen.writeStringField("id", jobVertex.getJobVertexId().toString());
    gen.writeStringField("name", jobVertex.getName());
    gen.writeNumberField("parallelism", jobVertex.getParallelism());
    gen.writeNumberField("now", now);
    gen.writeArrayFieldStart("subtasks");
    int num = 0;
    for (AccessExecutionVertex vertex : jobVertex.getTaskVertices()) {
        final ExecutionState status = vertex.getExecutionState();
        TaskManagerLocation location = vertex.getCurrentAssignedResourceLocation();
        String locationString = location == null ? "(unassigned)" : location.getHostname() + ":" + location.dataPort();
        long startTime = vertex.getStateTimestamp(ExecutionState.DEPLOYING);
        if (startTime == 0) {
            startTime = -1;
        }
        long endTime = status.isTerminal() ? vertex.getStateTimestamp(status) : -1;
        long duration = startTime > 0 ? ((endTime > 0 ? endTime : now) - startTime) : -1;
        gen.writeStartObject();
        gen.writeNumberField("subtask", num);
        gen.writeStringField("status", status.name());
        gen.writeNumberField("attempt", vertex.getCurrentExecutionAttempt().getAttemptNumber());
        gen.writeStringField("host", locationString);
        gen.writeNumberField("start-time", startTime);
        gen.writeNumberField("end-time", endTime);
        gen.writeNumberField("duration", duration);
        MutableIOMetrics counts = new MutableIOMetrics();
        counts.addIOMetrics(vertex.getCurrentExecutionAttempt(), fetcher, jobID, jobVertex.getJobVertexId().toString());
        counts.writeIOMetricsAsJson(gen);
        gen.writeEndObject();
        num++;
    }
    gen.writeEndArray();
    gen.writeEndObject();
    gen.close();
    return writer.toString();
}
Also used : ExecutionState(org.apache.flink.runtime.execution.ExecutionState) MutableIOMetrics(org.apache.flink.runtime.webmonitor.utils.MutableIOMetrics) StringWriter(java.io.StringWriter) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) AccessExecutionVertex(org.apache.flink.runtime.executiongraph.AccessExecutionVertex)

Example 28 with TaskManagerLocation

use of org.apache.flink.runtime.taskmanager.TaskManagerLocation in project flink by apache.

the class JobVertexTaskManagersHandler method createVertexDetailsByTaskManagerJson.

public static String createVertexDetailsByTaskManagerJson(AccessExecutionJobVertex jobVertex, String jobID, @Nullable MetricFetcher fetcher) throws IOException {
    StringWriter writer = new StringWriter();
    JsonGenerator gen = JsonFactory.jacksonFactory.createGenerator(writer);
    // Build a map that groups tasks by TaskManager
    Map<String, List<AccessExecutionVertex>> taskManagerVertices = new HashMap<>();
    for (AccessExecutionVertex vertex : jobVertex.getTaskVertices()) {
        TaskManagerLocation location = vertex.getCurrentAssignedResourceLocation();
        String taskManager = location == null ? "(unassigned)" : location.getHostname() + ":" + location.dataPort();
        List<AccessExecutionVertex> vertices = taskManagerVertices.get(taskManager);
        if (vertices == null) {
            vertices = new ArrayList<>();
            taskManagerVertices.put(taskManager, vertices);
        }
        vertices.add(vertex);
    }
    // Build JSON response
    final long now = System.currentTimeMillis();
    gen.writeStartObject();
    gen.writeStringField("id", jobVertex.getJobVertexId().toString());
    gen.writeStringField("name", jobVertex.getName());
    gen.writeNumberField("now", now);
    gen.writeArrayFieldStart("taskmanagers");
    for (Map.Entry<String, List<AccessExecutionVertex>> entry : taskManagerVertices.entrySet()) {
        String host = entry.getKey();
        List<AccessExecutionVertex> taskVertices = entry.getValue();
        int[] tasksPerState = new int[ExecutionState.values().length];
        long startTime = Long.MAX_VALUE;
        long endTime = 0;
        boolean allFinished = true;
        MutableIOMetrics counts = new MutableIOMetrics();
        for (AccessExecutionVertex vertex : taskVertices) {
            final ExecutionState state = vertex.getExecutionState();
            tasksPerState[state.ordinal()]++;
            // take the earliest start time
            long started = vertex.getStateTimestamp(ExecutionState.DEPLOYING);
            if (started > 0) {
                startTime = Math.min(startTime, started);
            }
            allFinished &= state.isTerminal();
            endTime = Math.max(endTime, vertex.getStateTimestamp(state));
            counts.addIOMetrics(vertex.getCurrentExecutionAttempt(), fetcher, jobID, jobVertex.getJobVertexId().toString());
        }
        long duration;
        if (startTime < Long.MAX_VALUE) {
            if (allFinished) {
                duration = endTime - startTime;
            } else {
                endTime = -1L;
                duration = now - startTime;
            }
        } else {
            startTime = -1L;
            endTime = -1L;
            duration = -1L;
        }
        ExecutionState jobVertexState = ExecutionJobVertex.getAggregateJobVertexState(tasksPerState, taskVertices.size());
        gen.writeStartObject();
        gen.writeStringField("host", host);
        gen.writeStringField("status", jobVertexState.name());
        gen.writeNumberField("start-time", startTime);
        gen.writeNumberField("end-time", endTime);
        gen.writeNumberField("duration", duration);
        counts.writeIOMetricsAsJson(gen);
        gen.writeObjectFieldStart("status-counts");
        for (ExecutionState state : ExecutionState.values()) {
            gen.writeNumberField(state.name(), tasksPerState[state.ordinal()]);
        }
        gen.writeEndObject();
        gen.writeEndObject();
    }
    gen.writeEndArray();
    gen.writeEndObject();
    gen.close();
    return writer.toString();
}
Also used : ExecutionState(org.apache.flink.runtime.execution.ExecutionState) MutableIOMetrics(org.apache.flink.runtime.webmonitor.utils.MutableIOMetrics) HashMap(java.util.HashMap) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) StringWriter(java.io.StringWriter) JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) AccessExecutionVertex(org.apache.flink.runtime.executiongraph.AccessExecutionVertex)

Example 29 with TaskManagerLocation

use of org.apache.flink.runtime.taskmanager.TaskManagerLocation in project flink by apache.

the class SubtasksAllAccumulatorsHandler method createSubtasksAccumulatorsJson.

public static String createSubtasksAccumulatorsJson(AccessExecutionJobVertex jobVertex) throws IOException {
    StringWriter writer = new StringWriter();
    JsonGenerator gen = JsonFactory.jacksonFactory.createGenerator(writer);
    gen.writeStartObject();
    gen.writeStringField("id", jobVertex.getJobVertexId().toString());
    gen.writeNumberField("parallelism", jobVertex.getParallelism());
    gen.writeArrayFieldStart("subtasks");
    int num = 0;
    for (AccessExecutionVertex vertex : jobVertex.getTaskVertices()) {
        TaskManagerLocation location = vertex.getCurrentAssignedResourceLocation();
        String locationString = location == null ? "(unassigned)" : location.getHostname();
        gen.writeStartObject();
        gen.writeNumberField("subtask", num++);
        gen.writeNumberField("attempt", vertex.getCurrentExecutionAttempt().getAttemptNumber());
        gen.writeStringField("host", locationString);
        StringifiedAccumulatorResult[] accs = vertex.getCurrentExecutionAttempt().getUserAccumulatorsStringified();
        gen.writeArrayFieldStart("user-accumulators");
        for (StringifiedAccumulatorResult acc : accs) {
            gen.writeStartObject();
            gen.writeStringField("name", acc.getName());
            gen.writeStringField("type", acc.getType());
            gen.writeStringField("value", acc.getValue());
            gen.writeEndObject();
        }
        gen.writeEndArray();
        gen.writeEndObject();
    }
    gen.writeEndArray();
    gen.writeEndObject();
    gen.close();
    return writer.toString();
}
Also used : StringWriter(java.io.StringWriter) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) StringifiedAccumulatorResult(org.apache.flink.runtime.accumulators.StringifiedAccumulatorResult) JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) AccessExecutionVertex(org.apache.flink.runtime.executiongraph.AccessExecutionVertex)

Example 30 with TaskManagerLocation

use of org.apache.flink.runtime.taskmanager.TaskManagerLocation in project flink by apache.

the class ArchivedJobGenerationUtils method generateArchivedJob.

private static void generateArchivedJob() throws Exception {
    // Attempt
    StringifiedAccumulatorResult acc1 = new StringifiedAccumulatorResult("name1", "type1", "value1");
    StringifiedAccumulatorResult acc2 = new StringifiedAccumulatorResult("name2", "type2", "value2");
    TaskManagerLocation location = new TaskManagerLocation(new ResourceID("hello"), InetAddress.getLocalHost(), 1234);
    originalAttempt = new ArchivedExecutionBuilder().setStateTimestamps(new long[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 }).setParallelSubtaskIndex(1).setAttemptNumber(0).setAssignedResourceLocation(location).setUserAccumulators(new StringifiedAccumulatorResult[] { acc1, acc2 }).setState(ExecutionState.FINISHED).setFailureCause("attemptException").build();
    // Subtask
    originalSubtask = new ArchivedExecutionVertexBuilder().setSubtaskIndex(originalAttempt.getParallelSubtaskIndex()).setTaskNameWithSubtask("hello(1/1)").setCurrentExecution(originalAttempt).build();
    // Task
    originalTask = new ArchivedExecutionJobVertexBuilder().setTaskVertices(new ArchivedExecutionVertex[] { originalSubtask }).build();
    // Job
    Map<JobVertexID, ArchivedExecutionJobVertex> tasks = new HashMap<>();
    tasks.put(originalTask.getJobVertexId(), originalTask);
    originalJob = new ArchivedExecutionGraphBuilder().setJobID(new JobID()).setTasks(tasks).setFailureCause("jobException").setState(JobStatus.FINISHED).setStateTimestamps(new long[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }).setArchivedUserAccumulators(new StringifiedAccumulatorResult[] { acc1, acc2 }).build();
}
Also used : ArchivedExecutionJobVertex(org.apache.flink.runtime.executiongraph.ArchivedExecutionJobVertex) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) HashMap(java.util.HashMap) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) StringifiedAccumulatorResult(org.apache.flink.runtime.accumulators.StringifiedAccumulatorResult) JobID(org.apache.flink.api.common.JobID)

Aggregations

TaskManagerLocation (org.apache.flink.runtime.taskmanager.TaskManagerLocation)59 Test (org.junit.Test)30 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)25 SimpleSlot (org.apache.flink.runtime.instance.SimpleSlot)16 JobID (org.apache.flink.api.common.JobID)14 Instance (org.apache.flink.runtime.instance.Instance)12 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)11 InetAddress (java.net.InetAddress)10 MetricRegistry (org.apache.flink.runtime.metrics.MetricRegistry)10 UUID (java.util.UUID)9 Time (org.apache.flink.api.common.time.Time)9 BroadcastVariableManager (org.apache.flink.runtime.broadcast.BroadcastVariableManager)9 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)9 FileCache (org.apache.flink.runtime.filecache.FileCache)9 HeartbeatServices (org.apache.flink.runtime.heartbeat.HeartbeatServices)9 IOManager (org.apache.flink.runtime.io.disk.iomanager.IOManager)9 NetworkEnvironment (org.apache.flink.runtime.io.network.NetworkEnvironment)9 ActorTaskManagerGateway (org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway)9 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)9 TaskManagerMetricGroup (org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup)9