Search in sources :

Example 26 with Instance

use of org.apache.flink.runtime.instance.Instance in project flink by apache.

the class ExecutionVertexDeploymentTest method testDeployFailedAsynchronously.

@Test
public void testDeployFailedAsynchronously() {
    try {
        final JobVertexID jid = new JobVertexID();
        final ExecutionJobVertex ejv = getExecutionVertex(jid);
        final ExecutionVertex vertex = new ExecutionVertex(ejv, 0, new IntermediateResult[0], AkkaUtils.getDefaultTimeout());
        final Instance instance = getInstance(new ActorTaskManagerGateway(new SimpleFailingActorGateway(TestingUtils.directExecutionContext())));
        final SimpleSlot slot = instance.allocateSimpleSlot(ejv.getJobId());
        assertEquals(ExecutionState.CREATED, vertex.getExecutionState());
        vertex.deployToSlot(slot);
        // wait until the state transition must be done
        for (int i = 0; i < 100; i++) {
            if (vertex.getExecutionState() == ExecutionState.FAILED && vertex.getFailureCause() != null) {
                break;
            } else {
                Thread.sleep(10);
            }
        }
        assertEquals(ExecutionState.FAILED, vertex.getExecutionState());
        assertNotNull(vertex.getFailureCause());
        assertTrue(vertex.getFailureCause().getMessage().contains(ERROR_MESSAGE));
        assertTrue(vertex.getStateTimestamp(ExecutionState.CREATED) > 0);
        assertTrue(vertex.getStateTimestamp(ExecutionState.DEPLOYING) > 0);
        assertTrue(vertex.getStateTimestamp(ExecutionState.FAILED) > 0);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Instance(org.apache.flink.runtime.instance.Instance) ExecutionGraphTestUtils.getInstance(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.getInstance) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) SimpleFailingActorGateway(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleFailingActorGateway) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) ExecutionGraphTestUtils.getExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.getExecutionVertex) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway) Test(org.junit.Test)

Example 27 with Instance

use of org.apache.flink.runtime.instance.Instance in project flink by apache.

the class ExecutionGraphTestUtils method getInstance.

public static Instance getInstance(final TaskManagerGateway gateway, final int numberOfSlots) throws Exception {
    ResourceID resourceID = ResourceID.generate();
    HardwareDescription hardwareDescription = new HardwareDescription(4, 2L * 1024 * 1024 * 1024, 1024 * 1024 * 1024, 512 * 1024 * 1024);
    InetAddress address = InetAddress.getByName("127.0.0.1");
    TaskManagerLocation connection = new TaskManagerLocation(resourceID, address, 10001);
    return new Instance(gateway, connection, new InstanceID(), hardwareDescription, numberOfSlots);
}
Also used : HardwareDescription(org.apache.flink.runtime.instance.HardwareDescription) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Instance(org.apache.flink.runtime.instance.Instance) InstanceID(org.apache.flink.runtime.instance.InstanceID) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) InetAddress(java.net.InetAddress)

Example 28 with Instance

use of org.apache.flink.runtime.instance.Instance in project flink by apache.

the class ExecutionGraphDeploymentTest method testBuildDeploymentDescriptor.

@Test
public void testBuildDeploymentDescriptor() {
    try {
        final JobID jobId = new JobID();
        final JobVertexID jid1 = new JobVertexID();
        final JobVertexID jid2 = new JobVertexID();
        final JobVertexID jid3 = new JobVertexID();
        final JobVertexID jid4 = new JobVertexID();
        JobVertex v1 = new JobVertex("v1", jid1);
        JobVertex v2 = new JobVertex("v2", jid2);
        JobVertex v3 = new JobVertex("v3", jid3);
        JobVertex v4 = new JobVertex("v4", jid4);
        v1.setParallelism(10);
        v2.setParallelism(10);
        v3.setParallelism(10);
        v4.setParallelism(10);
        v1.setInvokableClass(BatchTask.class);
        v2.setInvokableClass(BatchTask.class);
        v3.setInvokableClass(BatchTask.class);
        v4.setInvokableClass(BatchTask.class);
        v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
        v3.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
        v4.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
        ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), jobId, "some job", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy(), new Scheduler(TestingUtils.defaultExecutionContext()));
        List<JobVertex> ordered = Arrays.asList(v1, v2, v3, v4);
        eg.attachJobGraph(ordered);
        ExecutionJobVertex ejv = eg.getAllVertices().get(jid2);
        ExecutionVertex vertex = ejv.getTaskVertices()[3];
        ExecutionGraphTestUtils.SimpleActorGateway instanceGateway = new ExecutionGraphTestUtils.SimpleActorGateway(TestingUtils.directExecutionContext());
        final Instance instance = getInstance(new ActorTaskManagerGateway(instanceGateway));
        final SimpleSlot slot = instance.allocateSimpleSlot(jobId);
        assertEquals(ExecutionState.CREATED, vertex.getExecutionState());
        vertex.deployToSlot(slot);
        assertEquals(ExecutionState.DEPLOYING, vertex.getExecutionState());
        TaskDeploymentDescriptor descr = instanceGateway.lastTDD;
        assertNotNull(descr);
        JobInformation jobInformation = descr.getSerializedJobInformation().deserializeValue(getClass().getClassLoader());
        TaskInformation taskInformation = descr.getSerializedTaskInformation().deserializeValue(getClass().getClassLoader());
        assertEquals(jobId, jobInformation.getJobId());
        assertEquals(jid2, taskInformation.getJobVertexId());
        assertEquals(3, descr.getSubtaskIndex());
        assertEquals(10, taskInformation.getNumberOfSubtasks());
        assertEquals(BatchTask.class.getName(), taskInformation.getInvokableClassName());
        assertEquals("v2", taskInformation.getTaskName());
        Collection<ResultPartitionDeploymentDescriptor> producedPartitions = descr.getProducedPartitions();
        Collection<InputGateDeploymentDescriptor> consumedPartitions = descr.getInputGates();
        assertEquals(2, producedPartitions.size());
        assertEquals(1, consumedPartitions.size());
        Iterator<ResultPartitionDeploymentDescriptor> iteratorProducedPartitions = producedPartitions.iterator();
        Iterator<InputGateDeploymentDescriptor> iteratorConsumedPartitions = consumedPartitions.iterator();
        assertEquals(10, iteratorProducedPartitions.next().getNumberOfSubpartitions());
        assertEquals(10, iteratorProducedPartitions.next().getNumberOfSubpartitions());
        assertEquals(10, iteratorConsumedPartitions.next().getInputChannelDeploymentDescriptors().length);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ResultPartitionDeploymentDescriptor(org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor) Configuration(org.apache.flink.configuration.Configuration) Instance(org.apache.flink.runtime.instance.Instance) ExecutionGraphTestUtils.getInstance(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.getInstance) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) BatchTask(org.apache.flink.runtime.operators.BatchTask) InputGateDeploymentDescriptor(org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor) NoRestartStrategy(org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 29 with Instance

use of org.apache.flink.runtime.instance.Instance in project flink by apache.

the class TaskManagerLogHandler method respondAsLeader.

/**
	 * Response when running with leading JobManager.
	 */
@Override
protected void respondAsLeader(final ChannelHandlerContext ctx, final Routed routed, final ActorGateway jobManager) {
    if (cache == null) {
        scala.concurrent.Future<Object> portFuture = jobManager.ask(JobManagerMessages.getRequestBlobManagerPort(), timeout);
        scala.concurrent.Future<BlobCache> cacheFuture = portFuture.map(new Mapper<Object, BlobCache>() {

            @Override
            public BlobCache checkedApply(Object result) throws IOException {
                Option<String> hostOption = jobManager.actor().path().address().host();
                String host = hostOption.isDefined() ? hostOption.get() : "localhost";
                int port = (int) result;
                return new BlobCache(new InetSocketAddress(host, port), config);
            }
        }, executor);
        cache = new FlinkFuture<>(cacheFuture);
    }
    final String taskManagerID = routed.pathParams().get(TaskManagersHandler.TASK_MANAGER_ID_KEY);
    final HttpRequest request = routed.request();
    //fetch TaskManager logs if no other process is currently doing it
    if (lastRequestPending.putIfAbsent(taskManagerID, true) == null) {
        try {
            InstanceID instanceID = new InstanceID(StringUtils.hexStringToByte(taskManagerID));
            scala.concurrent.Future<JobManagerMessages.TaskManagerInstance> scalaTaskManagerFuture = jobManager.ask(new JobManagerMessages.RequestTaskManagerInstance(instanceID), timeout).mapTo(ClassTag$.MODULE$.<JobManagerMessages.TaskManagerInstance>apply(JobManagerMessages.TaskManagerInstance.class));
            Future<JobManagerMessages.TaskManagerInstance> taskManagerFuture = new FlinkFuture<>(scalaTaskManagerFuture);
            Future<BlobKey> blobKeyFuture = taskManagerFuture.thenCompose(new ApplyFunction<JobManagerMessages.TaskManagerInstance, Future<BlobKey>>() {

                @Override
                public Future<BlobKey> apply(JobManagerMessages.TaskManagerInstance value) {
                    Instance taskManager = value.instance().get();
                    if (serveLogFile) {
                        return taskManager.getTaskManagerGateway().requestTaskManagerLog(timeTimeout);
                    } else {
                        return taskManager.getTaskManagerGateway().requestTaskManagerStdout(timeTimeout);
                    }
                }
            });
            Future<String> logPathFuture = blobKeyFuture.thenCombine(cache, new BiFunction<BlobKey, BlobCache, Tuple2<BlobKey, BlobCache>>() {

                @Override
                public Tuple2<BlobKey, BlobCache> apply(BlobKey blobKey, BlobCache blobCache) {
                    return Tuple2.of(blobKey, blobCache);
                }
            }).thenComposeAsync(new ApplyFunction<Tuple2<BlobKey, BlobCache>, Future<String>>() {

                @Override
                public Future<String> apply(Tuple2<BlobKey, BlobCache> value) {
                    final BlobKey blobKey = value.f0;
                    final BlobCache blobCache = value.f1;
                    //delete previous log file, if it is different than the current one
                    HashMap<String, BlobKey> lastSubmittedFile = serveLogFile ? lastSubmittedLog : lastSubmittedStdout;
                    if (lastSubmittedFile.containsKey(taskManagerID)) {
                        if (!blobKey.equals(lastSubmittedFile.get(taskManagerID))) {
                            try {
                                blobCache.deleteGlobal(lastSubmittedFile.get(taskManagerID));
                            } catch (IOException e) {
                                return FlinkCompletableFuture.completedExceptionally(new Exception("Could not delete file for " + taskManagerID + '.', e));
                            }
                            lastSubmittedFile.put(taskManagerID, blobKey);
                        }
                    } else {
                        lastSubmittedFile.put(taskManagerID, blobKey);
                    }
                    try {
                        return FlinkCompletableFuture.completed(blobCache.getURL(blobKey).getFile());
                    } catch (IOException e) {
                        return FlinkCompletableFuture.completedExceptionally(new Exception("Could not retrieve blob for " + blobKey + '.', e));
                    }
                }
            }, executor);
            logPathFuture.exceptionally(new ApplyFunction<Throwable, Void>() {

                @Override
                public Void apply(Throwable failure) {
                    display(ctx, request, "Fetching TaskManager log failed.");
                    LOG.error("Fetching TaskManager log failed.", failure);
                    lastRequestPending.remove(taskManagerID);
                    return null;
                }
            });
            logPathFuture.thenAccept(new AcceptFunction<String>() {

                @Override
                public void accept(String filePath) {
                    File file = new File(filePath);
                    final RandomAccessFile raf;
                    try {
                        raf = new RandomAccessFile(file, "r");
                    } catch (FileNotFoundException e) {
                        display(ctx, request, "Displaying TaskManager log failed.");
                        LOG.error("Displaying TaskManager log failed.", e);
                        return;
                    }
                    long fileLength;
                    try {
                        fileLength = raf.length();
                    } catch (IOException ioe) {
                        display(ctx, request, "Displaying TaskManager log failed.");
                        LOG.error("Displaying TaskManager log failed.", ioe);
                        try {
                            raf.close();
                        } catch (IOException e) {
                            LOG.error("Could not close random access file.", e);
                        }
                        return;
                    }
                    final FileChannel fc = raf.getChannel();
                    HttpResponse response = new DefaultHttpResponse(HTTP_1_1, OK);
                    response.headers().set(CONTENT_TYPE, "text/plain");
                    if (HttpHeaders.isKeepAlive(request)) {
                        response.headers().set(CONNECTION, HttpHeaders.Values.KEEP_ALIVE);
                    }
                    HttpHeaders.setContentLength(response, fileLength);
                    // write the initial line and the header.
                    ctx.write(response);
                    // write the content.
                    ChannelFuture lastContentFuture;
                    final GenericFutureListener<io.netty.util.concurrent.Future<? super Void>> completionListener = new GenericFutureListener<io.netty.util.concurrent.Future<? super Void>>() {

                        @Override
                        public void operationComplete(io.netty.util.concurrent.Future<? super Void> future) throws Exception {
                            lastRequestPending.remove(taskManagerID);
                            fc.close();
                            raf.close();
                        }
                    };
                    if (ctx.pipeline().get(SslHandler.class) == null) {
                        ctx.write(new DefaultFileRegion(fc, 0, fileLength), ctx.newProgressivePromise()).addListener(completionListener);
                        lastContentFuture = ctx.writeAndFlush(LastHttpContent.EMPTY_LAST_CONTENT);
                    } else {
                        try {
                            lastContentFuture = ctx.writeAndFlush(new HttpChunkedInput(new ChunkedFile(raf, 0, fileLength, 8192)), ctx.newProgressivePromise()).addListener(completionListener);
                        } catch (IOException e) {
                            display(ctx, request, "Displaying TaskManager log failed.");
                            LOG.warn("Could not write http data.", e);
                            return;
                        }
                    // HttpChunkedInput will write the end marker (LastHttpContent) for us.
                    }
                    // close the connection, if no keep-alive is needed
                    if (!HttpHeaders.isKeepAlive(request)) {
                        lastContentFuture.addListener(ChannelFutureListener.CLOSE);
                    }
                }
            });
        } catch (Exception e) {
            display(ctx, request, "Error: " + e.getMessage());
            LOG.error("Fetching TaskManager log failed.", e);
            lastRequestPending.remove(taskManagerID);
        }
    } else {
        display(ctx, request, "loading...");
    }
}
Also used : InstanceID(org.apache.flink.runtime.instance.InstanceID) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) InetSocketAddress(java.net.InetSocketAddress) FileNotFoundException(java.io.FileNotFoundException) FlinkFuture(org.apache.flink.runtime.concurrent.impl.FlinkFuture) BlobKey(org.apache.flink.runtime.blob.BlobKey) GenericFutureListener(io.netty.util.concurrent.GenericFutureListener) ChannelFuture(io.netty.channel.ChannelFuture) ChunkedFile(io.netty.handler.stream.ChunkedFile) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) DefaultFileRegion(io.netty.channel.DefaultFileRegion) RandomAccessFile(java.io.RandomAccessFile) Option(scala.Option) RandomAccessFile(java.io.RandomAccessFile) ChunkedFile(io.netty.handler.stream.ChunkedFile) File(java.io.File) Instance(org.apache.flink.runtime.instance.Instance) BlobCache(org.apache.flink.runtime.blob.BlobCache) HttpChunkedInput(io.netty.handler.codec.http.HttpChunkedInput) HttpRequest(io.netty.handler.codec.http.HttpRequest) FileChannel(java.nio.channels.FileChannel) DefaultHttpResponse(io.netty.handler.codec.http.DefaultHttpResponse) HttpResponse(io.netty.handler.codec.http.HttpResponse) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) BiFunction(org.apache.flink.runtime.concurrent.BiFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) DefaultHttpResponse(io.netty.handler.codec.http.DefaultHttpResponse) FlinkFuture(org.apache.flink.runtime.concurrent.impl.FlinkFuture) Future(org.apache.flink.runtime.concurrent.Future) FlinkCompletableFuture(org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture) ChannelFuture(io.netty.channel.ChannelFuture)

Example 30 with Instance

use of org.apache.flink.runtime.instance.Instance in project flink by apache.

the class TaskManagersHandler method handleJsonRequest.

@Override
public String handleJsonRequest(Map<String, String> pathParams, Map<String, String> queryParams, ActorGateway jobManager) throws Exception {
    try {
        if (jobManager != null) {
            // whether one task manager's metrics are requested, or all task manager, we
            // return them in an array. This avoids unnecessary code complexity.
            // If only one task manager is requested, we only fetch one task manager metrics.
            final List<Instance> instances = new ArrayList<>();
            if (pathParams.containsKey(TASK_MANAGER_ID_KEY)) {
                try {
                    InstanceID instanceID = new InstanceID(StringUtils.hexStringToByte(pathParams.get(TASK_MANAGER_ID_KEY)));
                    Future<Object> future = jobManager.ask(new JobManagerMessages.RequestTaskManagerInstance(instanceID), timeout);
                    TaskManagerInstance instance = (TaskManagerInstance) Await.result(future, timeout);
                    if (instance.instance().nonEmpty()) {
                        instances.add(instance.instance().get());
                    }
                }// this means the id string was invalid. Keep the list empty.
                 catch (IllegalArgumentException e) {
                // do nothing.
                }
            } else {
                Future<Object> future = jobManager.ask(JobManagerMessages.getRequestRegisteredTaskManagers(), timeout);
                RegisteredTaskManagers taskManagers = (RegisteredTaskManagers) Await.result(future, timeout);
                instances.addAll(taskManagers.asJavaCollection());
            }
            StringWriter writer = new StringWriter();
            JsonGenerator gen = JsonFactory.jacksonFactory.createGenerator(writer);
            gen.writeStartObject();
            gen.writeArrayFieldStart("taskmanagers");
            for (Instance instance : instances) {
                gen.writeStartObject();
                gen.writeStringField("id", instance.getId().toString());
                gen.writeStringField("path", instance.getTaskManagerGateway().getAddress());
                gen.writeNumberField("dataPort", instance.getTaskManagerLocation().dataPort());
                gen.writeNumberField("timeSinceLastHeartbeat", instance.getLastHeartBeat());
                gen.writeNumberField("slotsNumber", instance.getTotalNumberOfSlots());
                gen.writeNumberField("freeSlots", instance.getNumberOfAvailableSlots());
                gen.writeNumberField("cpuCores", instance.getResources().getNumberOfCPUCores());
                gen.writeNumberField("physicalMemory", instance.getResources().getSizeOfPhysicalMemory());
                gen.writeNumberField("freeMemory", instance.getResources().getSizeOfJvmHeap());
                gen.writeNumberField("managedMemory", instance.getResources().getSizeOfManagedMemory());
                // only send metrics when only one task manager requests them.
                if (pathParams.containsKey(TASK_MANAGER_ID_KEY)) {
                    fetcher.update();
                    MetricStore.TaskManagerMetricStore metrics = fetcher.getMetricStore().getTaskManagerMetricStore(instance.getId().toString());
                    if (metrics != null) {
                        gen.writeObjectFieldStart("metrics");
                        long heapUsed = Long.valueOf(metrics.getMetric("Status.JVM.Memory.Heap.Used", "0"));
                        long heapCommitted = Long.valueOf(metrics.getMetric("Status.JVM.Memory.Heap.Committed", "0"));
                        long heapTotal = Long.valueOf(metrics.getMetric("Status.JVM.Memory.Heap.Max", "0"));
                        gen.writeNumberField("heapCommitted", heapCommitted);
                        gen.writeNumberField("heapUsed", heapUsed);
                        gen.writeNumberField("heapMax", heapTotal);
                        long nonHeapUsed = Long.valueOf(metrics.getMetric("Status.JVM.Memory.NonHeap.Used", "0"));
                        long nonHeapCommitted = Long.valueOf(metrics.getMetric("Status.JVM.Memory.NonHeap.Committed", "0"));
                        long nonHeapTotal = Long.valueOf(metrics.getMetric("Status.JVM.Memory.NonHeap.Max", "0"));
                        gen.writeNumberField("nonHeapCommitted", nonHeapCommitted);
                        gen.writeNumberField("nonHeapUsed", nonHeapUsed);
                        gen.writeNumberField("nonHeapMax", nonHeapTotal);
                        gen.writeNumberField("totalCommitted", heapCommitted + nonHeapCommitted);
                        gen.writeNumberField("totalUsed", heapUsed + nonHeapUsed);
                        gen.writeNumberField("totalMax", heapTotal + nonHeapTotal);
                        long directCount = Long.valueOf(metrics.getMetric("Status.JVM.Memory.Direct.Count", "0"));
                        long directUsed = Long.valueOf(metrics.getMetric("Status.JVM.Memory.Direct.MemoryUsed", "0"));
                        long directMax = Long.valueOf(metrics.getMetric("Status.JVM.Memory.Direct.TotalCapacity", "0"));
                        gen.writeNumberField("directCount", directCount);
                        gen.writeNumberField("directUsed", directUsed);
                        gen.writeNumberField("directMax", directMax);
                        long mappedCount = Long.valueOf(metrics.getMetric("Status.JVM.Memory.Mapped.Count", "0"));
                        long mappedUsed = Long.valueOf(metrics.getMetric("Status.JVM.Memory.Mapped.MemoryUsed", "0"));
                        long mappedMax = Long.valueOf(metrics.getMetric("Status.JVM.Memory.Mapped.TotalCapacity", "0"));
                        gen.writeNumberField("mappedCount", mappedCount);
                        gen.writeNumberField("mappedUsed", mappedUsed);
                        gen.writeNumberField("mappedMax", mappedMax);
                        long memorySegmentsAvailable = Long.valueOf(metrics.getMetric("Status.Network.AvailableMemorySegments", "0"));
                        long memorySegmentsTotal = Long.valueOf(metrics.getMetric("Status.Network.TotalMemorySegments", "0"));
                        gen.writeNumberField("memorySegmentsAvailable", memorySegmentsAvailable);
                        gen.writeNumberField("memorySegmentsTotal", memorySegmentsTotal);
                        gen.writeArrayFieldStart("garbageCollectors");
                        for (String gcName : metrics.garbageCollectorNames) {
                            String count = metrics.getMetric("Status.JVM.GarbageCollector." + gcName + ".Count", null);
                            String time = metrics.getMetric("Status.JVM.GarbageCollector." + gcName + ".Time", null);
                            if (count != null && time != null) {
                                gen.writeStartObject();
                                gen.writeStringField("name", gcName);
                                gen.writeNumberField("count", Long.valueOf(count));
                                gen.writeNumberField("time", Long.valueOf(time));
                                gen.writeEndObject();
                            }
                        }
                        gen.writeEndArray();
                        gen.writeEndObject();
                    }
                }
                gen.writeEndObject();
            }
            gen.writeEndArray();
            gen.writeEndObject();
            gen.close();
            return writer.toString();
        } else {
            throw new Exception("No connection to the leading JobManager.");
        }
    } catch (Exception e) {
        throw new RuntimeException("Failed to fetch list of all task managers: " + e.getMessage(), e);
    }
}
Also used : MetricStore(org.apache.flink.runtime.webmonitor.metrics.MetricStore) TaskManagerInstance(org.apache.flink.runtime.messages.JobManagerMessages.TaskManagerInstance) Instance(org.apache.flink.runtime.instance.Instance) InstanceID(org.apache.flink.runtime.instance.InstanceID) ArrayList(java.util.ArrayList) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) RegisteredTaskManagers(org.apache.flink.runtime.messages.JobManagerMessages.RegisteredTaskManagers) StringWriter(java.io.StringWriter) JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) TaskManagerInstance(org.apache.flink.runtime.messages.JobManagerMessages.TaskManagerInstance)

Aggregations

Instance (org.apache.flink.runtime.instance.Instance)63 Test (org.junit.Test)52 SimpleSlot (org.apache.flink.runtime.instance.SimpleSlot)38 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)33 ActorTaskManagerGateway (org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway)29 IOException (java.io.IOException)19 JobID (org.apache.flink.api.common.JobID)15 ExecutionException (java.util.concurrent.ExecutionException)14 Scheduler (org.apache.flink.runtime.jobmanager.scheduler.Scheduler)14 SchedulerTestUtils.getRandomInstance (org.apache.flink.runtime.jobmanager.scheduler.SchedulerTestUtils.getRandomInstance)14 ExecutionGraphTestUtils.getInstance (org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.getInstance)12 TaskManagerLocation (org.apache.flink.runtime.taskmanager.TaskManagerLocation)12 SimpleActorGateway (org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway)11 ExecutionGraphTestUtils.getExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.getExecutionVertex)11 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)11 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)10 FiniteDuration (scala.concurrent.duration.FiniteDuration)9 SuppressRestartsException (org.apache.flink.runtime.execution.SuppressRestartsException)8 BaseTestingActorGateway (org.apache.flink.runtime.instance.BaseTestingActorGateway)8 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)8