Search in sources :

Example 1 with BlobCache

use of org.apache.flink.runtime.blob.BlobCache in project flink by apache.

the class JobClient method retrieveClassLoader.

/**
	 * Reconstructs the class loader by first requesting information about it at the JobManager
	 * and then downloading missing jar files.
	 * @param jobID id of job
	 * @param jobManager gateway to the JobManager
	 * @param config the flink configuration
	 * @return A classloader that should behave like the original classloader
	 * @throws JobRetrievalException if anything goes wrong
	 */
public static ClassLoader retrieveClassLoader(JobID jobID, ActorGateway jobManager, Configuration config) throws JobRetrievalException {
    final Object jmAnswer;
    try {
        jmAnswer = Await.result(jobManager.ask(new JobManagerMessages.RequestClassloadingProps(jobID), AkkaUtils.getDefaultTimeoutAsFiniteDuration()), AkkaUtils.getDefaultTimeoutAsFiniteDuration());
    } catch (Exception e) {
        throw new JobRetrievalException(jobID, "Couldn't retrieve class loading properties from JobManager.", e);
    }
    if (jmAnswer instanceof JobManagerMessages.ClassloadingProps) {
        JobManagerMessages.ClassloadingProps props = ((JobManagerMessages.ClassloadingProps) jmAnswer);
        Option<String> jmHost = jobManager.actor().path().address().host();
        String jmHostname = jmHost.isDefined() ? jmHost.get() : "localhost";
        InetSocketAddress serverAddress = new InetSocketAddress(jmHostname, props.blobManagerPort());
        final BlobCache blobClient;
        try {
            blobClient = new BlobCache(serverAddress, config);
        } catch (IOException e) {
            throw new JobRetrievalException(jobID, "Failed to setup blob cache", e);
        }
        final Collection<BlobKey> requiredJarFiles = props.requiredJarFiles();
        final Collection<URL> requiredClasspaths = props.requiredClasspaths();
        final URL[] allURLs = new URL[requiredJarFiles.size() + requiredClasspaths.size()];
        int pos = 0;
        for (BlobKey blobKey : props.requiredJarFiles()) {
            try {
                allURLs[pos++] = blobClient.getURL(blobKey);
            } catch (Exception e) {
                blobClient.shutdown();
                throw new JobRetrievalException(jobID, "Failed to download BlobKey " + blobKey, e);
            }
        }
        for (URL url : requiredClasspaths) {
            allURLs[pos++] = url;
        }
        return new FlinkUserCodeClassLoader(allURLs, JobClient.class.getClassLoader());
    } else if (jmAnswer instanceof JobManagerMessages.JobNotFound) {
        throw new JobRetrievalException(jobID, "Couldn't retrieve class loader. Job " + jobID + " not found");
    } else {
        throw new JobRetrievalException(jobID, "Unknown response from JobManager: " + jmAnswer);
    }
}
Also used : InetSocketAddress(java.net.InetSocketAddress) FlinkUserCodeClassLoader(org.apache.flink.runtime.execution.librarycache.FlinkUserCodeClassLoader) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) BlobCache(org.apache.flink.runtime.blob.BlobCache) IOException(java.io.IOException) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) URL(java.net.URL) BlobKey(org.apache.flink.runtime.blob.BlobKey)

Example 2 with BlobCache

use of org.apache.flink.runtime.blob.BlobCache in project flink by apache.

the class TaskManagerLogHandler method respondAsLeader.

/**
	 * Response when running with leading JobManager.
	 */
@Override
protected void respondAsLeader(final ChannelHandlerContext ctx, final Routed routed, final ActorGateway jobManager) {
    if (cache == null) {
        scala.concurrent.Future<Object> portFuture = jobManager.ask(JobManagerMessages.getRequestBlobManagerPort(), timeout);
        scala.concurrent.Future<BlobCache> cacheFuture = portFuture.map(new Mapper<Object, BlobCache>() {

            @Override
            public BlobCache checkedApply(Object result) throws IOException {
                Option<String> hostOption = jobManager.actor().path().address().host();
                String host = hostOption.isDefined() ? hostOption.get() : "localhost";
                int port = (int) result;
                return new BlobCache(new InetSocketAddress(host, port), config);
            }
        }, executor);
        cache = new FlinkFuture<>(cacheFuture);
    }
    final String taskManagerID = routed.pathParams().get(TaskManagersHandler.TASK_MANAGER_ID_KEY);
    final HttpRequest request = routed.request();
    //fetch TaskManager logs if no other process is currently doing it
    if (lastRequestPending.putIfAbsent(taskManagerID, true) == null) {
        try {
            InstanceID instanceID = new InstanceID(StringUtils.hexStringToByte(taskManagerID));
            scala.concurrent.Future<JobManagerMessages.TaskManagerInstance> scalaTaskManagerFuture = jobManager.ask(new JobManagerMessages.RequestTaskManagerInstance(instanceID), timeout).mapTo(ClassTag$.MODULE$.<JobManagerMessages.TaskManagerInstance>apply(JobManagerMessages.TaskManagerInstance.class));
            Future<JobManagerMessages.TaskManagerInstance> taskManagerFuture = new FlinkFuture<>(scalaTaskManagerFuture);
            Future<BlobKey> blobKeyFuture = taskManagerFuture.thenCompose(new ApplyFunction<JobManagerMessages.TaskManagerInstance, Future<BlobKey>>() {

                @Override
                public Future<BlobKey> apply(JobManagerMessages.TaskManagerInstance value) {
                    Instance taskManager = value.instance().get();
                    if (serveLogFile) {
                        return taskManager.getTaskManagerGateway().requestTaskManagerLog(timeTimeout);
                    } else {
                        return taskManager.getTaskManagerGateway().requestTaskManagerStdout(timeTimeout);
                    }
                }
            });
            Future<String> logPathFuture = blobKeyFuture.thenCombine(cache, new BiFunction<BlobKey, BlobCache, Tuple2<BlobKey, BlobCache>>() {

                @Override
                public Tuple2<BlobKey, BlobCache> apply(BlobKey blobKey, BlobCache blobCache) {
                    return Tuple2.of(blobKey, blobCache);
                }
            }).thenComposeAsync(new ApplyFunction<Tuple2<BlobKey, BlobCache>, Future<String>>() {

                @Override
                public Future<String> apply(Tuple2<BlobKey, BlobCache> value) {
                    final BlobKey blobKey = value.f0;
                    final BlobCache blobCache = value.f1;
                    //delete previous log file, if it is different than the current one
                    HashMap<String, BlobKey> lastSubmittedFile = serveLogFile ? lastSubmittedLog : lastSubmittedStdout;
                    if (lastSubmittedFile.containsKey(taskManagerID)) {
                        if (!blobKey.equals(lastSubmittedFile.get(taskManagerID))) {
                            try {
                                blobCache.deleteGlobal(lastSubmittedFile.get(taskManagerID));
                            } catch (IOException e) {
                                return FlinkCompletableFuture.completedExceptionally(new Exception("Could not delete file for " + taskManagerID + '.', e));
                            }
                            lastSubmittedFile.put(taskManagerID, blobKey);
                        }
                    } else {
                        lastSubmittedFile.put(taskManagerID, blobKey);
                    }
                    try {
                        return FlinkCompletableFuture.completed(blobCache.getURL(blobKey).getFile());
                    } catch (IOException e) {
                        return FlinkCompletableFuture.completedExceptionally(new Exception("Could not retrieve blob for " + blobKey + '.', e));
                    }
                }
            }, executor);
            logPathFuture.exceptionally(new ApplyFunction<Throwable, Void>() {

                @Override
                public Void apply(Throwable failure) {
                    display(ctx, request, "Fetching TaskManager log failed.");
                    LOG.error("Fetching TaskManager log failed.", failure);
                    lastRequestPending.remove(taskManagerID);
                    return null;
                }
            });
            logPathFuture.thenAccept(new AcceptFunction<String>() {

                @Override
                public void accept(String filePath) {
                    File file = new File(filePath);
                    final RandomAccessFile raf;
                    try {
                        raf = new RandomAccessFile(file, "r");
                    } catch (FileNotFoundException e) {
                        display(ctx, request, "Displaying TaskManager log failed.");
                        LOG.error("Displaying TaskManager log failed.", e);
                        return;
                    }
                    long fileLength;
                    try {
                        fileLength = raf.length();
                    } catch (IOException ioe) {
                        display(ctx, request, "Displaying TaskManager log failed.");
                        LOG.error("Displaying TaskManager log failed.", ioe);
                        try {
                            raf.close();
                        } catch (IOException e) {
                            LOG.error("Could not close random access file.", e);
                        }
                        return;
                    }
                    final FileChannel fc = raf.getChannel();
                    HttpResponse response = new DefaultHttpResponse(HTTP_1_1, OK);
                    response.headers().set(CONTENT_TYPE, "text/plain");
                    if (HttpHeaders.isKeepAlive(request)) {
                        response.headers().set(CONNECTION, HttpHeaders.Values.KEEP_ALIVE);
                    }
                    HttpHeaders.setContentLength(response, fileLength);
                    // write the initial line and the header.
                    ctx.write(response);
                    // write the content.
                    ChannelFuture lastContentFuture;
                    final GenericFutureListener<io.netty.util.concurrent.Future<? super Void>> completionListener = new GenericFutureListener<io.netty.util.concurrent.Future<? super Void>>() {

                        @Override
                        public void operationComplete(io.netty.util.concurrent.Future<? super Void> future) throws Exception {
                            lastRequestPending.remove(taskManagerID);
                            fc.close();
                            raf.close();
                        }
                    };
                    if (ctx.pipeline().get(SslHandler.class) == null) {
                        ctx.write(new DefaultFileRegion(fc, 0, fileLength), ctx.newProgressivePromise()).addListener(completionListener);
                        lastContentFuture = ctx.writeAndFlush(LastHttpContent.EMPTY_LAST_CONTENT);
                    } else {
                        try {
                            lastContentFuture = ctx.writeAndFlush(new HttpChunkedInput(new ChunkedFile(raf, 0, fileLength, 8192)), ctx.newProgressivePromise()).addListener(completionListener);
                        } catch (IOException e) {
                            display(ctx, request, "Displaying TaskManager log failed.");
                            LOG.warn("Could not write http data.", e);
                            return;
                        }
                    // HttpChunkedInput will write the end marker (LastHttpContent) for us.
                    }
                    // close the connection, if no keep-alive is needed
                    if (!HttpHeaders.isKeepAlive(request)) {
                        lastContentFuture.addListener(ChannelFutureListener.CLOSE);
                    }
                }
            });
        } catch (Exception e) {
            display(ctx, request, "Error: " + e.getMessage());
            LOG.error("Fetching TaskManager log failed.", e);
            lastRequestPending.remove(taskManagerID);
        }
    } else {
        display(ctx, request, "loading...");
    }
}
Also used : InstanceID(org.apache.flink.runtime.instance.InstanceID) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) InetSocketAddress(java.net.InetSocketAddress) FileNotFoundException(java.io.FileNotFoundException) FlinkFuture(org.apache.flink.runtime.concurrent.impl.FlinkFuture) BlobKey(org.apache.flink.runtime.blob.BlobKey) GenericFutureListener(io.netty.util.concurrent.GenericFutureListener) ChannelFuture(io.netty.channel.ChannelFuture) ChunkedFile(io.netty.handler.stream.ChunkedFile) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) DefaultFileRegion(io.netty.channel.DefaultFileRegion) RandomAccessFile(java.io.RandomAccessFile) Option(scala.Option) RandomAccessFile(java.io.RandomAccessFile) ChunkedFile(io.netty.handler.stream.ChunkedFile) File(java.io.File) Instance(org.apache.flink.runtime.instance.Instance) BlobCache(org.apache.flink.runtime.blob.BlobCache) HttpChunkedInput(io.netty.handler.codec.http.HttpChunkedInput) HttpRequest(io.netty.handler.codec.http.HttpRequest) FileChannel(java.nio.channels.FileChannel) DefaultHttpResponse(io.netty.handler.codec.http.DefaultHttpResponse) HttpResponse(io.netty.handler.codec.http.HttpResponse) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) BiFunction(org.apache.flink.runtime.concurrent.BiFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) DefaultHttpResponse(io.netty.handler.codec.http.DefaultHttpResponse) FlinkFuture(org.apache.flink.runtime.concurrent.impl.FlinkFuture) Future(org.apache.flink.runtime.concurrent.Future) FlinkCompletableFuture(org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture) ChannelFuture(io.netty.channel.ChannelFuture)

Example 3 with BlobCache

use of org.apache.flink.runtime.blob.BlobCache in project flink by apache.

the class TaskExecutor method associateWithJobManager.

private JobManagerConnection associateWithJobManager(JobID jobID, ResourceID resourceID, JobMasterGateway jobMasterGateway, UUID jobManagerLeaderId, int blobPort) {
    Preconditions.checkNotNull(jobID);
    Preconditions.checkNotNull(resourceID);
    Preconditions.checkNotNull(jobManagerLeaderId);
    Preconditions.checkNotNull(jobMasterGateway);
    Preconditions.checkArgument(blobPort > 0 || blobPort < MAX_BLOB_PORT, "Blob server port is out of range.");
    TaskManagerActions taskManagerActions = new TaskManagerActionsImpl(jobManagerLeaderId, jobMasterGateway);
    CheckpointResponder checkpointResponder = new RpcCheckpointResponder(jobMasterGateway);
    InetSocketAddress blobServerAddress = new InetSocketAddress(jobMasterGateway.getHostname(), blobPort);
    final LibraryCacheManager libraryCacheManager;
    try {
        final BlobCache blobCache = new BlobCache(blobServerAddress, taskManagerConfiguration.getConfiguration(), haServices);
        libraryCacheManager = new BlobLibraryCacheManager(blobCache, taskManagerConfiguration.getCleanupInterval());
    } catch (IOException e) {
        // Can't pass the IOException up - we need a RuntimeException anyway
        // two levels up where this is run asynchronously. Also, we don't
        // know whether this is caught in the thread running this method.
        final String message = "Could not create BLOB cache or library cache.";
        log.error(message, e);
        throw new RuntimeException(message, e);
    }
    ResultPartitionConsumableNotifier resultPartitionConsumableNotifier = new RpcResultPartitionConsumableNotifier(jobManagerLeaderId, jobMasterGateway, getRpcService().getExecutor(), taskManagerConfiguration.getTimeout());
    PartitionProducerStateChecker partitionStateChecker = new RpcPartitionStateChecker(jobManagerLeaderId, jobMasterGateway);
    return new JobManagerConnection(jobID, resourceID, jobMasterGateway, jobManagerLeaderId, taskManagerActions, checkpointResponder, libraryCacheManager, resultPartitionConsumableNotifier, partitionStateChecker);
}
Also used : BlobLibraryCacheManager(org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) RpcCheckpointResponder(org.apache.flink.runtime.taskexecutor.rpc.RpcCheckpointResponder) RpcCheckpointResponder(org.apache.flink.runtime.taskexecutor.rpc.RpcCheckpointResponder) InetSocketAddress(java.net.InetSocketAddress) BlobCache(org.apache.flink.runtime.blob.BlobCache) BlobLibraryCacheManager(org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager) LibraryCacheManager(org.apache.flink.runtime.execution.librarycache.LibraryCacheManager) IOException(java.io.IOException) TaskManagerActions(org.apache.flink.runtime.taskmanager.TaskManagerActions) RpcPartitionStateChecker(org.apache.flink.runtime.taskexecutor.rpc.RpcPartitionStateChecker) RpcResultPartitionConsumableNotifier(org.apache.flink.runtime.taskexecutor.rpc.RpcResultPartitionConsumableNotifier) PartitionProducerStateChecker(org.apache.flink.runtime.io.network.netty.PartitionProducerStateChecker) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) RpcResultPartitionConsumableNotifier(org.apache.flink.runtime.taskexecutor.rpc.RpcResultPartitionConsumableNotifier)

Aggregations

IOException (java.io.IOException)3 InetSocketAddress (java.net.InetSocketAddress)3 BlobCache (org.apache.flink.runtime.blob.BlobCache)3 BlobKey (org.apache.flink.runtime.blob.BlobKey)2 JobManagerMessages (org.apache.flink.runtime.messages.JobManagerMessages)2 ChannelFuture (io.netty.channel.ChannelFuture)1 DefaultFileRegion (io.netty.channel.DefaultFileRegion)1 DefaultHttpResponse (io.netty.handler.codec.http.DefaultHttpResponse)1 HttpChunkedInput (io.netty.handler.codec.http.HttpChunkedInput)1 HttpRequest (io.netty.handler.codec.http.HttpRequest)1 HttpResponse (io.netty.handler.codec.http.HttpResponse)1 ChunkedFile (io.netty.handler.stream.ChunkedFile)1 GenericFutureListener (io.netty.util.concurrent.GenericFutureListener)1 File (java.io.File)1 FileNotFoundException (java.io.FileNotFoundException)1 RandomAccessFile (java.io.RandomAccessFile)1 URL (java.net.URL)1 FileChannel (java.nio.channels.FileChannel)1 HashMap (java.util.HashMap)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1