Search in sources :

Example 6 with InstanceID

use of org.apache.flink.runtime.instance.InstanceID in project flink by apache.

the class TaskManagerLogHandler method respondAsLeader.

/**
	 * Response when running with leading JobManager.
	 */
@Override
protected void respondAsLeader(final ChannelHandlerContext ctx, final Routed routed, final ActorGateway jobManager) {
    if (cache == null) {
        scala.concurrent.Future<Object> portFuture = jobManager.ask(JobManagerMessages.getRequestBlobManagerPort(), timeout);
        scala.concurrent.Future<BlobCache> cacheFuture = portFuture.map(new Mapper<Object, BlobCache>() {

            @Override
            public BlobCache checkedApply(Object result) throws IOException {
                Option<String> hostOption = jobManager.actor().path().address().host();
                String host = hostOption.isDefined() ? hostOption.get() : "localhost";
                int port = (int) result;
                return new BlobCache(new InetSocketAddress(host, port), config);
            }
        }, executor);
        cache = new FlinkFuture<>(cacheFuture);
    }
    final String taskManagerID = routed.pathParams().get(TaskManagersHandler.TASK_MANAGER_ID_KEY);
    final HttpRequest request = routed.request();
    //fetch TaskManager logs if no other process is currently doing it
    if (lastRequestPending.putIfAbsent(taskManagerID, true) == null) {
        try {
            InstanceID instanceID = new InstanceID(StringUtils.hexStringToByte(taskManagerID));
            scala.concurrent.Future<JobManagerMessages.TaskManagerInstance> scalaTaskManagerFuture = jobManager.ask(new JobManagerMessages.RequestTaskManagerInstance(instanceID), timeout).mapTo(ClassTag$.MODULE$.<JobManagerMessages.TaskManagerInstance>apply(JobManagerMessages.TaskManagerInstance.class));
            Future<JobManagerMessages.TaskManagerInstance> taskManagerFuture = new FlinkFuture<>(scalaTaskManagerFuture);
            Future<BlobKey> blobKeyFuture = taskManagerFuture.thenCompose(new ApplyFunction<JobManagerMessages.TaskManagerInstance, Future<BlobKey>>() {

                @Override
                public Future<BlobKey> apply(JobManagerMessages.TaskManagerInstance value) {
                    Instance taskManager = value.instance().get();
                    if (serveLogFile) {
                        return taskManager.getTaskManagerGateway().requestTaskManagerLog(timeTimeout);
                    } else {
                        return taskManager.getTaskManagerGateway().requestTaskManagerStdout(timeTimeout);
                    }
                }
            });
            Future<String> logPathFuture = blobKeyFuture.thenCombine(cache, new BiFunction<BlobKey, BlobCache, Tuple2<BlobKey, BlobCache>>() {

                @Override
                public Tuple2<BlobKey, BlobCache> apply(BlobKey blobKey, BlobCache blobCache) {
                    return Tuple2.of(blobKey, blobCache);
                }
            }).thenComposeAsync(new ApplyFunction<Tuple2<BlobKey, BlobCache>, Future<String>>() {

                @Override
                public Future<String> apply(Tuple2<BlobKey, BlobCache> value) {
                    final BlobKey blobKey = value.f0;
                    final BlobCache blobCache = value.f1;
                    //delete previous log file, if it is different than the current one
                    HashMap<String, BlobKey> lastSubmittedFile = serveLogFile ? lastSubmittedLog : lastSubmittedStdout;
                    if (lastSubmittedFile.containsKey(taskManagerID)) {
                        if (!blobKey.equals(lastSubmittedFile.get(taskManagerID))) {
                            try {
                                blobCache.deleteGlobal(lastSubmittedFile.get(taskManagerID));
                            } catch (IOException e) {
                                return FlinkCompletableFuture.completedExceptionally(new Exception("Could not delete file for " + taskManagerID + '.', e));
                            }
                            lastSubmittedFile.put(taskManagerID, blobKey);
                        }
                    } else {
                        lastSubmittedFile.put(taskManagerID, blobKey);
                    }
                    try {
                        return FlinkCompletableFuture.completed(blobCache.getURL(blobKey).getFile());
                    } catch (IOException e) {
                        return FlinkCompletableFuture.completedExceptionally(new Exception("Could not retrieve blob for " + blobKey + '.', e));
                    }
                }
            }, executor);
            logPathFuture.exceptionally(new ApplyFunction<Throwable, Void>() {

                @Override
                public Void apply(Throwable failure) {
                    display(ctx, request, "Fetching TaskManager log failed.");
                    LOG.error("Fetching TaskManager log failed.", failure);
                    lastRequestPending.remove(taskManagerID);
                    return null;
                }
            });
            logPathFuture.thenAccept(new AcceptFunction<String>() {

                @Override
                public void accept(String filePath) {
                    File file = new File(filePath);
                    final RandomAccessFile raf;
                    try {
                        raf = new RandomAccessFile(file, "r");
                    } catch (FileNotFoundException e) {
                        display(ctx, request, "Displaying TaskManager log failed.");
                        LOG.error("Displaying TaskManager log failed.", e);
                        return;
                    }
                    long fileLength;
                    try {
                        fileLength = raf.length();
                    } catch (IOException ioe) {
                        display(ctx, request, "Displaying TaskManager log failed.");
                        LOG.error("Displaying TaskManager log failed.", ioe);
                        try {
                            raf.close();
                        } catch (IOException e) {
                            LOG.error("Could not close random access file.", e);
                        }
                        return;
                    }
                    final FileChannel fc = raf.getChannel();
                    HttpResponse response = new DefaultHttpResponse(HTTP_1_1, OK);
                    response.headers().set(CONTENT_TYPE, "text/plain");
                    if (HttpHeaders.isKeepAlive(request)) {
                        response.headers().set(CONNECTION, HttpHeaders.Values.KEEP_ALIVE);
                    }
                    HttpHeaders.setContentLength(response, fileLength);
                    // write the initial line and the header.
                    ctx.write(response);
                    // write the content.
                    ChannelFuture lastContentFuture;
                    final GenericFutureListener<io.netty.util.concurrent.Future<? super Void>> completionListener = new GenericFutureListener<io.netty.util.concurrent.Future<? super Void>>() {

                        @Override
                        public void operationComplete(io.netty.util.concurrent.Future<? super Void> future) throws Exception {
                            lastRequestPending.remove(taskManagerID);
                            fc.close();
                            raf.close();
                        }
                    };
                    if (ctx.pipeline().get(SslHandler.class) == null) {
                        ctx.write(new DefaultFileRegion(fc, 0, fileLength), ctx.newProgressivePromise()).addListener(completionListener);
                        lastContentFuture = ctx.writeAndFlush(LastHttpContent.EMPTY_LAST_CONTENT);
                    } else {
                        try {
                            lastContentFuture = ctx.writeAndFlush(new HttpChunkedInput(new ChunkedFile(raf, 0, fileLength, 8192)), ctx.newProgressivePromise()).addListener(completionListener);
                        } catch (IOException e) {
                            display(ctx, request, "Displaying TaskManager log failed.");
                            LOG.warn("Could not write http data.", e);
                            return;
                        }
                    // HttpChunkedInput will write the end marker (LastHttpContent) for us.
                    }
                    // close the connection, if no keep-alive is needed
                    if (!HttpHeaders.isKeepAlive(request)) {
                        lastContentFuture.addListener(ChannelFutureListener.CLOSE);
                    }
                }
            });
        } catch (Exception e) {
            display(ctx, request, "Error: " + e.getMessage());
            LOG.error("Fetching TaskManager log failed.", e);
            lastRequestPending.remove(taskManagerID);
        }
    } else {
        display(ctx, request, "loading...");
    }
}
Also used : InstanceID(org.apache.flink.runtime.instance.InstanceID) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) InetSocketAddress(java.net.InetSocketAddress) FileNotFoundException(java.io.FileNotFoundException) FlinkFuture(org.apache.flink.runtime.concurrent.impl.FlinkFuture) BlobKey(org.apache.flink.runtime.blob.BlobKey) GenericFutureListener(io.netty.util.concurrent.GenericFutureListener) ChannelFuture(io.netty.channel.ChannelFuture) ChunkedFile(io.netty.handler.stream.ChunkedFile) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) DefaultFileRegion(io.netty.channel.DefaultFileRegion) RandomAccessFile(java.io.RandomAccessFile) Option(scala.Option) RandomAccessFile(java.io.RandomAccessFile) ChunkedFile(io.netty.handler.stream.ChunkedFile) File(java.io.File) Instance(org.apache.flink.runtime.instance.Instance) BlobCache(org.apache.flink.runtime.blob.BlobCache) HttpChunkedInput(io.netty.handler.codec.http.HttpChunkedInput) HttpRequest(io.netty.handler.codec.http.HttpRequest) FileChannel(java.nio.channels.FileChannel) DefaultHttpResponse(io.netty.handler.codec.http.DefaultHttpResponse) HttpResponse(io.netty.handler.codec.http.HttpResponse) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) BiFunction(org.apache.flink.runtime.concurrent.BiFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) DefaultHttpResponse(io.netty.handler.codec.http.DefaultHttpResponse) FlinkFuture(org.apache.flink.runtime.concurrent.impl.FlinkFuture) Future(org.apache.flink.runtime.concurrent.Future) FlinkCompletableFuture(org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture) ChannelFuture(io.netty.channel.ChannelFuture)

Example 7 with InstanceID

use of org.apache.flink.runtime.instance.InstanceID in project flink by apache.

the class TaskManagersHandler method handleJsonRequest.

@Override
public String handleJsonRequest(Map<String, String> pathParams, Map<String, String> queryParams, ActorGateway jobManager) throws Exception {
    try {
        if (jobManager != null) {
            // whether one task manager's metrics are requested, or all task manager, we
            // return them in an array. This avoids unnecessary code complexity.
            // If only one task manager is requested, we only fetch one task manager metrics.
            final List<Instance> instances = new ArrayList<>();
            if (pathParams.containsKey(TASK_MANAGER_ID_KEY)) {
                try {
                    InstanceID instanceID = new InstanceID(StringUtils.hexStringToByte(pathParams.get(TASK_MANAGER_ID_KEY)));
                    Future<Object> future = jobManager.ask(new JobManagerMessages.RequestTaskManagerInstance(instanceID), timeout);
                    TaskManagerInstance instance = (TaskManagerInstance) Await.result(future, timeout);
                    if (instance.instance().nonEmpty()) {
                        instances.add(instance.instance().get());
                    }
                }// this means the id string was invalid. Keep the list empty.
                 catch (IllegalArgumentException e) {
                // do nothing.
                }
            } else {
                Future<Object> future = jobManager.ask(JobManagerMessages.getRequestRegisteredTaskManagers(), timeout);
                RegisteredTaskManagers taskManagers = (RegisteredTaskManagers) Await.result(future, timeout);
                instances.addAll(taskManagers.asJavaCollection());
            }
            StringWriter writer = new StringWriter();
            JsonGenerator gen = JsonFactory.jacksonFactory.createGenerator(writer);
            gen.writeStartObject();
            gen.writeArrayFieldStart("taskmanagers");
            for (Instance instance : instances) {
                gen.writeStartObject();
                gen.writeStringField("id", instance.getId().toString());
                gen.writeStringField("path", instance.getTaskManagerGateway().getAddress());
                gen.writeNumberField("dataPort", instance.getTaskManagerLocation().dataPort());
                gen.writeNumberField("timeSinceLastHeartbeat", instance.getLastHeartBeat());
                gen.writeNumberField("slotsNumber", instance.getTotalNumberOfSlots());
                gen.writeNumberField("freeSlots", instance.getNumberOfAvailableSlots());
                gen.writeNumberField("cpuCores", instance.getResources().getNumberOfCPUCores());
                gen.writeNumberField("physicalMemory", instance.getResources().getSizeOfPhysicalMemory());
                gen.writeNumberField("freeMemory", instance.getResources().getSizeOfJvmHeap());
                gen.writeNumberField("managedMemory", instance.getResources().getSizeOfManagedMemory());
                // only send metrics when only one task manager requests them.
                if (pathParams.containsKey(TASK_MANAGER_ID_KEY)) {
                    fetcher.update();
                    MetricStore.TaskManagerMetricStore metrics = fetcher.getMetricStore().getTaskManagerMetricStore(instance.getId().toString());
                    if (metrics != null) {
                        gen.writeObjectFieldStart("metrics");
                        long heapUsed = Long.valueOf(metrics.getMetric("Status.JVM.Memory.Heap.Used", "0"));
                        long heapCommitted = Long.valueOf(metrics.getMetric("Status.JVM.Memory.Heap.Committed", "0"));
                        long heapTotal = Long.valueOf(metrics.getMetric("Status.JVM.Memory.Heap.Max", "0"));
                        gen.writeNumberField("heapCommitted", heapCommitted);
                        gen.writeNumberField("heapUsed", heapUsed);
                        gen.writeNumberField("heapMax", heapTotal);
                        long nonHeapUsed = Long.valueOf(metrics.getMetric("Status.JVM.Memory.NonHeap.Used", "0"));
                        long nonHeapCommitted = Long.valueOf(metrics.getMetric("Status.JVM.Memory.NonHeap.Committed", "0"));
                        long nonHeapTotal = Long.valueOf(metrics.getMetric("Status.JVM.Memory.NonHeap.Max", "0"));
                        gen.writeNumberField("nonHeapCommitted", nonHeapCommitted);
                        gen.writeNumberField("nonHeapUsed", nonHeapUsed);
                        gen.writeNumberField("nonHeapMax", nonHeapTotal);
                        gen.writeNumberField("totalCommitted", heapCommitted + nonHeapCommitted);
                        gen.writeNumberField("totalUsed", heapUsed + nonHeapUsed);
                        gen.writeNumberField("totalMax", heapTotal + nonHeapTotal);
                        long directCount = Long.valueOf(metrics.getMetric("Status.JVM.Memory.Direct.Count", "0"));
                        long directUsed = Long.valueOf(metrics.getMetric("Status.JVM.Memory.Direct.MemoryUsed", "0"));
                        long directMax = Long.valueOf(metrics.getMetric("Status.JVM.Memory.Direct.TotalCapacity", "0"));
                        gen.writeNumberField("directCount", directCount);
                        gen.writeNumberField("directUsed", directUsed);
                        gen.writeNumberField("directMax", directMax);
                        long mappedCount = Long.valueOf(metrics.getMetric("Status.JVM.Memory.Mapped.Count", "0"));
                        long mappedUsed = Long.valueOf(metrics.getMetric("Status.JVM.Memory.Mapped.MemoryUsed", "0"));
                        long mappedMax = Long.valueOf(metrics.getMetric("Status.JVM.Memory.Mapped.TotalCapacity", "0"));
                        gen.writeNumberField("mappedCount", mappedCount);
                        gen.writeNumberField("mappedUsed", mappedUsed);
                        gen.writeNumberField("mappedMax", mappedMax);
                        long memorySegmentsAvailable = Long.valueOf(metrics.getMetric("Status.Network.AvailableMemorySegments", "0"));
                        long memorySegmentsTotal = Long.valueOf(metrics.getMetric("Status.Network.TotalMemorySegments", "0"));
                        gen.writeNumberField("memorySegmentsAvailable", memorySegmentsAvailable);
                        gen.writeNumberField("memorySegmentsTotal", memorySegmentsTotal);
                        gen.writeArrayFieldStart("garbageCollectors");
                        for (String gcName : metrics.garbageCollectorNames) {
                            String count = metrics.getMetric("Status.JVM.GarbageCollector." + gcName + ".Count", null);
                            String time = metrics.getMetric("Status.JVM.GarbageCollector." + gcName + ".Time", null);
                            if (count != null && time != null) {
                                gen.writeStartObject();
                                gen.writeStringField("name", gcName);
                                gen.writeNumberField("count", Long.valueOf(count));
                                gen.writeNumberField("time", Long.valueOf(time));
                                gen.writeEndObject();
                            }
                        }
                        gen.writeEndArray();
                        gen.writeEndObject();
                    }
                }
                gen.writeEndObject();
            }
            gen.writeEndArray();
            gen.writeEndObject();
            gen.close();
            return writer.toString();
        } else {
            throw new Exception("No connection to the leading JobManager.");
        }
    } catch (Exception e) {
        throw new RuntimeException("Failed to fetch list of all task managers: " + e.getMessage(), e);
    }
}
Also used : MetricStore(org.apache.flink.runtime.webmonitor.metrics.MetricStore) TaskManagerInstance(org.apache.flink.runtime.messages.JobManagerMessages.TaskManagerInstance) Instance(org.apache.flink.runtime.instance.Instance) InstanceID(org.apache.flink.runtime.instance.InstanceID) ArrayList(java.util.ArrayList) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) RegisteredTaskManagers(org.apache.flink.runtime.messages.JobManagerMessages.RegisteredTaskManagers) StringWriter(java.io.StringWriter) JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) TaskManagerInstance(org.apache.flink.runtime.messages.JobManagerMessages.TaskManagerInstance)

Example 8 with InstanceID

use of org.apache.flink.runtime.instance.InstanceID in project flink by apache.

the class TaskManagerLogHandlerTest method testLogFetchingFailure.

@Test
public void testLogFetchingFailure() throws Exception {
    // ========= setup TaskManager =================================================================================
    InstanceID tmID = new InstanceID();
    ResourceID tmRID = new ResourceID(tmID.toString());
    TaskManagerGateway taskManagerGateway = mock(TaskManagerGateway.class);
    when(taskManagerGateway.getAddress()).thenReturn("/tm/address");
    Instance taskManager = mock(Instance.class);
    when(taskManager.getId()).thenReturn(tmID);
    when(taskManager.getTaskManagerID()).thenReturn(tmRID);
    when(taskManager.getTaskManagerGateway()).thenReturn(taskManagerGateway);
    CompletableFuture<BlobKey> future = new FlinkCompletableFuture<>();
    future.completeExceptionally(new IOException("failure"));
    when(taskManagerGateway.requestTaskManagerLog(any(Time.class))).thenReturn(future);
    // ========= setup JobManager ==================================================================================
    ActorGateway jobManagerGateway = mock(ActorGateway.class);
    Object registeredTaskManagersAnswer = new JobManagerMessages.RegisteredTaskManagers(JavaConverters.collectionAsScalaIterableConverter(Collections.singletonList(taskManager)).asScala());
    when(jobManagerGateway.ask(isA(JobManagerMessages.RequestRegisteredTaskManagers$.class), any(FiniteDuration.class))).thenReturn(Future$.MODULE$.successful(registeredTaskManagersAnswer));
    when(jobManagerGateway.ask(isA(JobManagerMessages.getRequestBlobManagerPort().getClass()), any(FiniteDuration.class))).thenReturn(Future$.MODULE$.successful((Object) 5));
    when(jobManagerGateway.ask(isA(JobManagerMessages.RequestTaskManagerInstance.class), any(FiniteDuration.class))).thenReturn(Future$.MODULE$.successful((Object) new JobManagerMessages.TaskManagerInstance(Option.apply(taskManager))));
    when(jobManagerGateway.path()).thenReturn("/jm/address");
    JobManagerRetriever retriever = mock(JobManagerRetriever.class);
    when(retriever.getJobManagerGatewayAndWebPort()).thenReturn(Option.apply(new scala.Tuple2<ActorGateway, Integer>(jobManagerGateway, 0)));
    TaskManagerLogHandler handler = new TaskManagerLogHandler(retriever, ExecutionContext$.MODULE$.fromExecutor(Executors.directExecutor()), Future$.MODULE$.successful("/jm/address"), AkkaUtils.getDefaultClientTimeout(), TaskManagerLogHandler.FileMode.LOG, new Configuration(), false);
    final AtomicReference<String> exception = new AtomicReference<>();
    ChannelHandlerContext ctx = mock(ChannelHandlerContext.class);
    when(ctx.write(isA(ByteBuf.class))).thenAnswer(new Answer<Object>() {

        @Override
        public Object answer(InvocationOnMock invocationOnMock) throws Throwable {
            ByteBuf data = invocationOnMock.getArgumentAt(0, ByteBuf.class);
            exception.set(new String(data.array(), ConfigConstants.DEFAULT_CHARSET));
            return null;
        }
    });
    Map<String, String> pathParams = new HashMap<>();
    pathParams.put(TaskManagersHandler.TASK_MANAGER_ID_KEY, tmID.toString());
    Routed routed = mock(Routed.class);
    when(routed.pathParams()).thenReturn(pathParams);
    when(routed.request()).thenReturn(new DefaultFullHttpRequest(HttpVersion.HTTP_1_1, HttpMethod.GET, "/taskmanagers/" + tmID + "/log"));
    handler.respondAsLeader(ctx, routed, jobManagerGateway);
    Assert.assertEquals("Fetching TaskManager log failed.", exception.get());
}
Also used : Configuration(org.apache.flink.configuration.Configuration) InstanceID(org.apache.flink.runtime.instance.InstanceID) Instance(org.apache.flink.runtime.instance.Instance) HashMap(java.util.HashMap) TaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway) Time(org.apache.flink.api.common.time.Time) ChannelHandlerContext(io.netty.channel.ChannelHandlerContext) ByteBuf(io.netty.buffer.ByteBuf) FlinkCompletableFuture(org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture) BlobKey(org.apache.flink.runtime.blob.BlobKey) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) Routed(io.netty.handler.codec.http.router.Routed) DefaultFullHttpRequest(io.netty.handler.codec.http.DefaultFullHttpRequest) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) FiniteDuration(scala.concurrent.duration.FiniteDuration) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) InvocationOnMock(org.mockito.invocation.InvocationOnMock) JobManagerRetriever(org.apache.flink.runtime.webmonitor.JobManagerRetriever) Test(org.junit.Test)

Example 9 with InstanceID

use of org.apache.flink.runtime.instance.InstanceID in project flink by apache.

the class MetricFetcherTest method testUpdate.

@Test
public void testUpdate() throws Exception {
    // ========= setup TaskManager =================================================================================
    JobID jobID = new JobID();
    InstanceID tmID = new InstanceID();
    ResourceID tmRID = new ResourceID(tmID.toString());
    TaskManagerGateway taskManagerGateway = mock(TaskManagerGateway.class);
    when(taskManagerGateway.getAddress()).thenReturn("/tm/address");
    Instance taskManager = mock(Instance.class);
    when(taskManager.getTaskManagerGateway()).thenReturn(taskManagerGateway);
    when(taskManager.getId()).thenReturn(tmID);
    when(taskManager.getTaskManagerID()).thenReturn(tmRID);
    // ========= setup JobManager ==================================================================================
    JobDetails details = mock(JobDetails.class);
    when(details.getJobId()).thenReturn(jobID);
    ActorGateway jobManagerGateway = mock(ActorGateway.class);
    Object registeredTaskManagersAnswer = new JobManagerMessages.RegisteredTaskManagers(JavaConverters.collectionAsScalaIterableConverter(Collections.singletonList(taskManager)).asScala());
    when(jobManagerGateway.ask(isA(RequestJobDetails.class), any(FiniteDuration.class))).thenReturn(Future$.MODULE$.successful((Object) new MultipleJobsDetails(new JobDetails[0], new JobDetails[0])));
    when(jobManagerGateway.ask(isA(JobManagerMessages.RequestRegisteredTaskManagers$.class), any(FiniteDuration.class))).thenReturn(Future$.MODULE$.successful(registeredTaskManagersAnswer));
    when(jobManagerGateway.path()).thenReturn("/jm/address");
    JobManagerRetriever retriever = mock(JobManagerRetriever.class);
    when(retriever.getJobManagerGatewayAndWebPort()).thenReturn(Option.apply(new scala.Tuple2<ActorGateway, Integer>(jobManagerGateway, 0)));
    // ========= setup QueryServices ================================================================================
    Object requestMetricsAnswer = createRequestDumpAnswer(tmID, jobID);
    final ActorRef jmQueryService = mock(ActorRef.class);
    final ActorRef tmQueryService = mock(ActorRef.class);
    ActorSystem actorSystem = mock(ActorSystem.class);
    when(actorSystem.actorFor(eq("/jm/" + METRIC_QUERY_SERVICE_NAME))).thenReturn(jmQueryService);
    when(actorSystem.actorFor(eq("/tm/" + METRIC_QUERY_SERVICE_NAME + "_" + tmRID.getResourceIdString()))).thenReturn(tmQueryService);
    MetricFetcher.BasicGateway jmQueryServiceGateway = mock(MetricFetcher.BasicGateway.class);
    when(jmQueryServiceGateway.ask(any(MetricQueryService.getCreateDump().getClass()), any(FiniteDuration.class))).thenReturn(Future$.MODULE$.successful((Object) new MetricDumpSerialization.MetricSerializationResult(new byte[0], 0, 0, 0, 0)));
    MetricFetcher.BasicGateway tmQueryServiceGateway = mock(MetricFetcher.BasicGateway.class);
    when(tmQueryServiceGateway.ask(any(MetricQueryService.getCreateDump().getClass()), any(FiniteDuration.class))).thenReturn(Future$.MODULE$.successful(requestMetricsAnswer));
    whenNew(MetricFetcher.BasicGateway.class).withArguments(eq(new Object() {

        @Override
        public boolean equals(Object o) {
            return o == jmQueryService;
        }
    })).thenReturn(jmQueryServiceGateway);
    whenNew(MetricFetcher.BasicGateway.class).withArguments(eq(new Object() {

        @Override
        public boolean equals(Object o) {
            return o == tmQueryService;
        }
    })).thenReturn(tmQueryServiceGateway);
    // ========= start MetricFetcher testing =======================================================================
    ExecutionContextExecutor context = ExecutionContext$.MODULE$.fromExecutor(new CurrentThreadExecutor());
    MetricFetcher fetcher = new MetricFetcher(actorSystem, retriever, context);
    // verify that update fetches metrics and updates the store
    fetcher.update();
    MetricStore store = fetcher.getMetricStore();
    synchronized (store) {
        assertEquals("7", store.jobManager.metrics.get("abc.hist_min"));
        assertEquals("6", store.jobManager.metrics.get("abc.hist_max"));
        assertEquals("4.0", store.jobManager.metrics.get("abc.hist_mean"));
        assertEquals("0.5", store.jobManager.metrics.get("abc.hist_median"));
        assertEquals("5.0", store.jobManager.metrics.get("abc.hist_stddev"));
        assertEquals("0.75", store.jobManager.metrics.get("abc.hist_p75"));
        assertEquals("0.9", store.jobManager.metrics.get("abc.hist_p90"));
        assertEquals("0.95", store.jobManager.metrics.get("abc.hist_p95"));
        assertEquals("0.98", store.jobManager.metrics.get("abc.hist_p98"));
        assertEquals("0.99", store.jobManager.metrics.get("abc.hist_p99"));
        assertEquals("0.999", store.jobManager.metrics.get("abc.hist_p999"));
        assertEquals("x", store.getTaskManagerMetricStore(tmID.toString()).metrics.get("abc.gauge"));
        assertEquals("5.0", store.getJobMetricStore(jobID.toString()).metrics.get("abc.jc"));
        assertEquals("2", store.getTaskMetricStore(jobID.toString(), "taskid").metrics.get("2.abc.tc"));
        assertEquals("1", store.getTaskMetricStore(jobID.toString(), "taskid").metrics.get("2.opname.abc.oc"));
    }
}
Also used : ActorSystem(akka.actor.ActorSystem) InstanceID(org.apache.flink.runtime.instance.InstanceID) Instance(org.apache.flink.runtime.instance.Instance) ActorRef(akka.actor.ActorRef) TaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway) MultipleJobsDetails(org.apache.flink.runtime.messages.webmonitor.MultipleJobsDetails) RequestJobDetails(org.apache.flink.runtime.messages.webmonitor.RequestJobDetails) JobDetails(org.apache.flink.runtime.messages.webmonitor.JobDetails) MetricDumpSerialization(org.apache.flink.runtime.metrics.dump.MetricDumpSerialization) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) ExecutionContextExecutor(scala.concurrent.ExecutionContextExecutor) FiniteDuration(scala.concurrent.duration.FiniteDuration) RequestJobDetails(org.apache.flink.runtime.messages.webmonitor.RequestJobDetails) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobManagerRetriever(org.apache.flink.runtime.webmonitor.JobManagerRetriever) JobID(org.apache.flink.api.common.JobID) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 10 with InstanceID

use of org.apache.flink.runtime.instance.InstanceID in project flink by apache.

the class TaskExecutorTest method testSlotAcceptance.

/**
	 * Tests that accepted slots go into state assigned and the others are returned to the resource
	 * manager.
	 */
@Test
public void testSlotAcceptance() throws Exception {
    final JobID jobId = new JobID();
    final TestingSerialRpcService rpc = new TestingSerialRpcService();
    final Configuration configuration = new Configuration();
    final TaskManagerConfiguration taskManagerConfiguration = TaskManagerConfiguration.fromConfiguration(configuration);
    final ResourceID resourceId = new ResourceID("foobar");
    final TaskManagerLocation taskManagerLocation = new TaskManagerLocation(resourceId, InetAddress.getLoopbackAddress(), 1234);
    final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
    final TimerService<AllocationID> timerService = mock(TimerService.class);
    final TaskSlotTable taskSlotTable = new TaskSlotTable(Arrays.asList(mock(ResourceProfile.class), mock(ResourceProfile.class)), timerService);
    final JobManagerTable jobManagerTable = new JobManagerTable();
    final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation);
    final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
    final String resourceManagerAddress = "rm";
    final UUID resourceManagerLeaderId = UUID.randomUUID();
    final String jobManagerAddress = "jm";
    final UUID jobManagerLeaderId = UUID.randomUUID();
    final LeaderRetrievalService resourceManagerLeaderRetrievalService = new TestingLeaderRetrievalService(resourceManagerAddress, resourceManagerLeaderId);
    final LeaderRetrievalService jobManagerLeaderRetrievalService = new TestingLeaderRetrievalService(jobManagerAddress, jobManagerLeaderId);
    haServices.setResourceManagerLeaderRetriever(resourceManagerLeaderRetrievalService);
    haServices.setJobMasterLeaderRetriever(jobId, jobManagerLeaderRetrievalService);
    final ResourceManagerGateway resourceManagerGateway = mock(ResourceManagerGateway.class);
    final InstanceID registrationId = new InstanceID();
    when(resourceManagerGateway.registerTaskExecutor(eq(resourceManagerLeaderId), any(String.class), eq(resourceId), any(SlotReport.class), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new TaskExecutorRegistrationSuccess(registrationId, 1000L)));
    final ResourceID jmResourceId = new ResourceID(jobManagerAddress);
    final int blobPort = 42;
    final AllocationID allocationId1 = new AllocationID();
    final AllocationID allocationId2 = new AllocationID();
    final SlotOffer offer1 = new SlotOffer(allocationId1, 0, ResourceProfile.UNKNOWN);
    final JobMasterGateway jobMasterGateway = mock(JobMasterGateway.class);
    when(jobMasterGateway.registerTaskManager(any(String.class), eq(taskManagerLocation), eq(jobManagerLeaderId), any(Time.class))).thenReturn(FlinkCompletableFuture.<RegistrationResponse>completed(new JMTMRegistrationSuccess(jmResourceId, blobPort)));
    when(jobMasterGateway.getHostname()).thenReturn(jobManagerAddress);
    when(jobMasterGateway.offerSlots(any(ResourceID.class), any(Iterable.class), eq(jobManagerLeaderId), any(Time.class))).thenReturn(FlinkCompletableFuture.completed((Iterable<SlotOffer>) Collections.singleton(offer1)));
    rpc.registerGateway(resourceManagerAddress, resourceManagerGateway);
    rpc.registerGateway(jobManagerAddress, jobMasterGateway);
    try {
        TaskExecutor taskManager = new TaskExecutor(taskManagerConfiguration, taskManagerLocation, rpc, mock(MemoryManager.class), mock(IOManager.class), mock(NetworkEnvironment.class), haServices, mock(HeartbeatServices.class, RETURNS_MOCKS), mock(MetricRegistry.class), mock(TaskManagerMetricGroup.class), mock(BroadcastVariableManager.class), mock(FileCache.class), taskSlotTable, jobManagerTable, jobLeaderService, testingFatalErrorHandler);
        taskManager.start();
        taskSlotTable.allocateSlot(0, jobId, allocationId1, Time.milliseconds(10000L));
        taskSlotTable.allocateSlot(1, jobId, allocationId2, Time.milliseconds(10000L));
        // we have to add the job after the TaskExecutor, because otherwise the service has not
        // been properly started.
        jobLeaderService.addJob(jobId, jobManagerAddress);
        verify(resourceManagerGateway).notifySlotAvailable(eq(resourceManagerLeaderId), eq(registrationId), eq(new SlotID(resourceId, 1)));
        assertTrue(taskSlotTable.existsActiveSlot(jobId, allocationId1));
        assertFalse(taskSlotTable.existsActiveSlot(jobId, allocationId2));
        assertTrue(taskSlotTable.isSlotFree(1));
        // check if a concurrent error occurred
        testingFatalErrorHandler.rethrowError();
    } finally {
        rpc.stopService();
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) InstanceID(org.apache.flink.runtime.instance.InstanceID) Time(org.apache.flink.api.common.time.Time) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) ResourceManagerGateway(org.apache.flink.runtime.resourcemanager.ResourceManagerGateway) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) BroadcastVariableManager(org.apache.flink.runtime.broadcast.BroadcastVariableManager) TestingSerialRpcService(org.apache.flink.runtime.rpc.TestingSerialRpcService) UUID(java.util.UUID) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) SlotOffer(org.apache.flink.runtime.taskexecutor.slot.SlotOffer) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) TaskManagerMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerMetricGroup) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) FileCache(org.apache.flink.runtime.filecache.FileCache) SlotID(org.apache.flink.runtime.clusterframework.types.SlotID) TaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) NetworkEnvironment(org.apache.flink.runtime.io.network.NetworkEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

InstanceID (org.apache.flink.runtime.instance.InstanceID)13 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)8 Test (org.junit.Test)8 JobID (org.apache.flink.api.common.JobID)5 Configuration (org.apache.flink.configuration.Configuration)5 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)5 Instance (org.apache.flink.runtime.instance.Instance)5 TaskManagerLocation (org.apache.flink.runtime.taskmanager.TaskManagerLocation)5 UUID (java.util.UUID)4 Time (org.apache.flink.api.common.time.Time)4 JobManagerMessages (org.apache.flink.runtime.messages.JobManagerMessages)4 JavaTestKit (akka.testkit.JavaTestKit)3 BlobKey (org.apache.flink.runtime.blob.BlobKey)3 FlinkCompletableFuture (org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture)3 ActorRef (akka.actor.ActorRef)2 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)2 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)2 BroadcastVariableManager (org.apache.flink.runtime.broadcast.BroadcastVariableManager)2