Search in sources :

Example 1 with HttpClientResponseHandler

use of org.apache.hc.core5.http.io.HttpClientResponseHandler in project OpenRefine by OpenRefine.

the class ImportingUtilities method retrieveContentFromPostRequest.

public static void retrieveContentFromPostRequest(HttpServletRequest request, Properties parameters, File rawDataDir, ObjectNode retrievalRecord, final Progress progress) throws IOException, FileUploadException {
    ArrayNode fileRecords = ParsingUtilities.mapper.createArrayNode();
    JSONUtilities.safePut(retrievalRecord, "files", fileRecords);
    int clipboardCount = 0;
    int uploadCount = 0;
    int downloadCount = 0;
    int archiveCount = 0;
    // This tracks the total progress, which involves uploading data from the client
    // as well as downloading data from URLs.
    final SavingUpdate update = new SavingUpdate() {

        @Override
        public void savedMore() {
            progress.setProgress(null, calculateProgressPercent(totalExpectedSize, totalRetrievedSize));
        }

        @Override
        public boolean isCanceled() {
            return progress.isCanceled();
        }
    };
    DiskFileItemFactory fileItemFactory = new DiskFileItemFactory();
    ServletFileUpload upload = new ServletFileUpload(fileItemFactory);
    upload.setProgressListener(new ProgressListener() {

        boolean setContentLength = false;

        long lastBytesRead = 0;

        @Override
        public void update(long bytesRead, long contentLength, int itemCount) {
            if (!setContentLength) {
                // Only try to set the content length if we really know it.
                if (contentLength >= 0) {
                    update.totalExpectedSize += contentLength;
                    setContentLength = true;
                }
            }
            if (setContentLength) {
                update.totalRetrievedSize += (bytesRead - lastBytesRead);
                lastBytesRead = bytesRead;
                update.savedMore();
            }
        }
    });
    List<FileItem> tempFiles = (List<FileItem>) upload.parseRequest(request);
    progress.setProgress("Uploading data ...", -1);
    parts: for (FileItem fileItem : tempFiles) {
        if (progress.isCanceled()) {
            break;
        }
        InputStream stream = fileItem.getInputStream();
        String name = fileItem.getFieldName().toLowerCase();
        if (fileItem.isFormField()) {
            if (name.equals("clipboard")) {
                String encoding = request.getCharacterEncoding();
                if (encoding == null) {
                    encoding = "UTF-8";
                }
                File file = allocateFile(rawDataDir, "clipboard.txt");
                ObjectNode fileRecord = ParsingUtilities.mapper.createObjectNode();
                JSONUtilities.safePut(fileRecord, "origin", "clipboard");
                JSONUtilities.safePut(fileRecord, "declaredEncoding", encoding);
                JSONUtilities.safePut(fileRecord, "declaredMimeType", (String) null);
                JSONUtilities.safePut(fileRecord, "format", "text");
                JSONUtilities.safePut(fileRecord, "fileName", "(clipboard)");
                JSONUtilities.safePut(fileRecord, "location", getRelativePath(file, rawDataDir));
                progress.setProgress("Uploading pasted clipboard text", calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
                JSONUtilities.safePut(fileRecord, "size", saveStreamToFile(stream, file, null));
                JSONUtilities.append(fileRecords, fileRecord);
                clipboardCount++;
            } else if (name.equals("download")) {
                String urlString = Streams.asString(stream);
                URL url = new URL(urlString);
                ObjectNode fileRecord = ParsingUtilities.mapper.createObjectNode();
                JSONUtilities.safePut(fileRecord, "origin", "download");
                JSONUtilities.safePut(fileRecord, "url", urlString);
                for (UrlRewriter rewriter : ImportingManager.urlRewriters) {
                    Result result = rewriter.rewrite(urlString);
                    if (result != null) {
                        urlString = result.rewrittenUrl;
                        url = new URL(urlString);
                        JSONUtilities.safePut(fileRecord, "url", urlString);
                        JSONUtilities.safePut(fileRecord, "format", result.format);
                        if (!result.download) {
                            downloadCount++;
                            JSONUtilities.append(fileRecords, fileRecord);
                            continue parts;
                        }
                    }
                }
                if ("http".equals(url.getProtocol()) || "https".equals(url.getProtocol())) {
                    final URL lastUrl = url;
                    final HttpClientResponseHandler<String> responseHandler = new HttpClientResponseHandler<String>() {

                        @Override
                        public String handleResponse(final ClassicHttpResponse response) throws IOException {
                            final int status = response.getCode();
                            if (status >= HttpStatus.SC_SUCCESS && status < HttpStatus.SC_REDIRECTION) {
                                final HttpEntity entity = response.getEntity();
                                if (entity == null) {
                                    throw new IOException("No content found in " + lastUrl.toExternalForm());
                                }
                                try {
                                    InputStream stream2 = entity.getContent();
                                    String mimeType = null;
                                    String charset = null;
                                    ContentType contentType = ContentType.parse(entity.getContentType());
                                    if (contentType != null) {
                                        mimeType = contentType.getMimeType();
                                        Charset cs = contentType.getCharset();
                                        if (cs != null) {
                                            charset = cs.toString();
                                        }
                                    }
                                    JSONUtilities.safePut(fileRecord, "declaredMimeType", mimeType);
                                    JSONUtilities.safePut(fileRecord, "declaredEncoding", charset);
                                    if (saveStream(stream2, lastUrl, rawDataDir, progress, update, fileRecord, fileRecords, entity.getContentLength())) {
                                        // signal to increment archive count
                                        return "saved";
                                    }
                                } catch (final IOException ex) {
                                    throw new ClientProtocolException(ex);
                                }
                                return null;
                            } else {
                                // String errorBody = EntityUtils.toString(response.getEntity());
                                throw new ClientProtocolException(String.format("HTTP error %d : %s for URL %s", status, response.getReasonPhrase(), lastUrl.toExternalForm()));
                            }
                        }
                    };
                    HttpClient httpClient = new HttpClient();
                    if (httpClient.getResponse(urlString, null, responseHandler) != null) {
                        archiveCount++;
                    }
                    ;
                    downloadCount++;
                } else {
                    // Fallback handling for non HTTP connections (only FTP?)
                    URLConnection urlConnection = url.openConnection();
                    urlConnection.setConnectTimeout(5000);
                    urlConnection.connect();
                    InputStream stream2 = urlConnection.getInputStream();
                    JSONUtilities.safePut(fileRecord, "declaredEncoding", urlConnection.getContentEncoding());
                    JSONUtilities.safePut(fileRecord, "declaredMimeType", urlConnection.getContentType());
                    try {
                        if (saveStream(stream2, url, rawDataDir, progress, update, fileRecord, fileRecords, urlConnection.getContentLength())) {
                            archiveCount++;
                        }
                        downloadCount++;
                    } finally {
                        stream2.close();
                    }
                }
            } else {
                String value = Streams.asString(stream);
                parameters.put(name, value);
            // TODO: We really want to store this on the request so it's available for everyone
            // request.getParameterMap().put(name, value);
            }
        } else {
            // is file content
            String fileName = fileItem.getName();
            if (fileName.length() > 0) {
                long fileSize = fileItem.getSize();
                File file = allocateFile(rawDataDir, fileName);
                ObjectNode fileRecord = ParsingUtilities.mapper.createObjectNode();
                JSONUtilities.safePut(fileRecord, "origin", "upload");
                JSONUtilities.safePut(fileRecord, "declaredEncoding", request.getCharacterEncoding());
                JSONUtilities.safePut(fileRecord, "declaredMimeType", fileItem.getContentType());
                JSONUtilities.safePut(fileRecord, "fileName", fileName);
                JSONUtilities.safePut(fileRecord, "location", getRelativePath(file, rawDataDir));
                progress.setProgress("Saving file " + fileName + " locally (" + formatBytes(fileSize) + " bytes)", calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
                JSONUtilities.safePut(fileRecord, "size", saveStreamToFile(stream, file, null));
                // TODO: This needs to be refactored to be able to test import from archives
                if (postProcessRetrievedFile(rawDataDir, file, fileRecord, fileRecords, progress)) {
                    archiveCount++;
                }
                uploadCount++;
            }
        }
        stream.close();
    }
    // Delete all temp files.
    for (FileItem fileItem : tempFiles) {
        fileItem.delete();
    }
    JSONUtilities.safePut(retrievalRecord, "uploadCount", uploadCount);
    JSONUtilities.safePut(retrievalRecord, "downloadCount", downloadCount);
    JSONUtilities.safePut(retrievalRecord, "clipboardCount", clipboardCount);
    JSONUtilities.safePut(retrievalRecord, "archiveCount", archiveCount);
}
Also used : HttpEntity(org.apache.hc.core5.http.HttpEntity) ContentType(org.apache.hc.core5.http.ContentType) URL(java.net.URL) Result(com.google.refine.importing.UrlRewriter.Result) ClientProtocolException(org.apache.hc.client5.http.ClientProtocolException) ServletFileUpload(org.apache.commons.fileupload.servlet.ServletFileUpload) List(java.util.List) ArrayList(java.util.ArrayList) ArrayNode(com.fasterxml.jackson.databind.node.ArrayNode) ClassicHttpResponse(org.apache.hc.core5.http.ClassicHttpResponse) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) GZIPInputStream(java.util.zip.GZIPInputStream) ZipInputStream(java.util.zip.ZipInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) Charset(java.nio.charset.Charset) IOException(java.io.IOException) DiskFileItemFactory(org.apache.commons.fileupload.disk.DiskFileItemFactory) URLConnection(java.net.URLConnection) FileItem(org.apache.commons.fileupload.FileItem) HttpClientResponseHandler(org.apache.hc.core5.http.io.HttpClientResponseHandler) ProgressListener(org.apache.commons.fileupload.ProgressListener) HttpClient(com.google.refine.util.HttpClient) File(java.io.File)

Aggregations

ArrayNode (com.fasterxml.jackson.databind.node.ArrayNode)1 ObjectNode (com.fasterxml.jackson.databind.node.ObjectNode)1 Result (com.google.refine.importing.UrlRewriter.Result)1 HttpClient (com.google.refine.util.HttpClient)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 URL (java.net.URL)1 URLConnection (java.net.URLConnection)1 Charset (java.nio.charset.Charset)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 ZipInputStream (java.util.zip.ZipInputStream)1 TarArchiveInputStream (org.apache.commons.compress.archivers.tar.TarArchiveInputStream)1 BZip2CompressorInputStream (org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream)1 FileItem (org.apache.commons.fileupload.FileItem)1 ProgressListener (org.apache.commons.fileupload.ProgressListener)1 DiskFileItemFactory (org.apache.commons.fileupload.disk.DiskFileItemFactory)1