Search in sources :

Example 1 with ProgressListener

use of org.apache.commons.fileupload.ProgressListener in project OpenRefine by OpenRefine.

the class ImportingUtilities method retrieveContentFromPostRequest.

public static void retrieveContentFromPostRequest(HttpServletRequest request, Properties parameters, File rawDataDir, JSONObject retrievalRecord, final Progress progress) throws Exception {
    JSONArray fileRecords = new JSONArray();
    JSONUtilities.safePut(retrievalRecord, "files", fileRecords);
    int clipboardCount = 0;
    int uploadCount = 0;
    int downloadCount = 0;
    int archiveCount = 0;
    // This tracks the total progress, which involves uploading data from the client
    // as well as downloading data from URLs.
    final SavingUpdate update = new SavingUpdate() {

        @Override
        public void savedMore() {
            progress.setProgress(null, calculateProgressPercent(totalExpectedSize, totalRetrievedSize));
        }

        @Override
        public boolean isCanceled() {
            return progress.isCanceled();
        }
    };
    DiskFileItemFactory fileItemFactory = new DiskFileItemFactory();
    ServletFileUpload upload = new ServletFileUpload(fileItemFactory);
    upload.setProgressListener(new ProgressListener() {

        boolean setContentLength = false;

        long lastBytesRead = 0;

        @Override
        public void update(long bytesRead, long contentLength, int itemCount) {
            if (!setContentLength) {
                // Only try to set the content length if we really know it.
                if (contentLength >= 0) {
                    update.totalExpectedSize += contentLength;
                    setContentLength = true;
                }
            }
            if (setContentLength) {
                update.totalRetrievedSize += (bytesRead - lastBytesRead);
                lastBytesRead = bytesRead;
                update.savedMore();
            }
        }
    });
    @SuppressWarnings("unchecked") List<FileItem> tempFiles = (List<FileItem>) upload.parseRequest(request);
    progress.setProgress("Uploading data ...", -1);
    parts: for (FileItem fileItem : tempFiles) {
        if (progress.isCanceled()) {
            break;
        }
        InputStream stream = fileItem.getInputStream();
        String name = fileItem.getFieldName().toLowerCase();
        if (fileItem.isFormField()) {
            if (name.equals("clipboard")) {
                String encoding = request.getCharacterEncoding();
                if (encoding == null) {
                    encoding = "UTF-8";
                }
                File file = allocateFile(rawDataDir, "clipboard.txt");
                JSONObject fileRecord = new JSONObject();
                JSONUtilities.safePut(fileRecord, "origin", "clipboard");
                JSONUtilities.safePut(fileRecord, "declaredEncoding", encoding);
                JSONUtilities.safePut(fileRecord, "declaredMimeType", (String) null);
                JSONUtilities.safePut(fileRecord, "format", "text");
                JSONUtilities.safePut(fileRecord, "fileName", "(clipboard)");
                JSONUtilities.safePut(fileRecord, "location", getRelativePath(file, rawDataDir));
                progress.setProgress("Uploading pasted clipboard text", calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
                JSONUtilities.safePut(fileRecord, "size", saveStreamToFile(stream, file, null));
                JSONUtilities.append(fileRecords, fileRecord);
                clipboardCount++;
            } else if (name.equals("download")) {
                String urlString = Streams.asString(stream);
                URL url = new URL(urlString);
                JSONObject fileRecord = new JSONObject();
                JSONUtilities.safePut(fileRecord, "origin", "download");
                JSONUtilities.safePut(fileRecord, "url", urlString);
                for (UrlRewriter rewriter : ImportingManager.urlRewriters) {
                    Result result = rewriter.rewrite(urlString);
                    if (result != null) {
                        urlString = result.rewrittenUrl;
                        url = new URL(urlString);
                        JSONUtilities.safePut(fileRecord, "url", urlString);
                        JSONUtilities.safePut(fileRecord, "format", result.format);
                        if (!result.download) {
                            downloadCount++;
                            JSONUtilities.append(fileRecords, fileRecord);
                            continue parts;
                        }
                    }
                }
                if ("http".equals(url.getProtocol()) || "https".equals(url.getProtocol())) {
                    DefaultHttpClient client = new DefaultHttpClient();
                    DecompressingHttpClient httpclient = new DecompressingHttpClient(client);
                    HttpGet httpGet = new HttpGet(url.toURI());
                    httpGet.setHeader("User-Agent", RefineServlet.getUserAgent());
                    if ("https".equals(url.getProtocol())) {
                        // HTTPS only - no sending password in the clear over HTTP
                        String userinfo = url.getUserInfo();
                        if (userinfo != null) {
                            int s = userinfo.indexOf(':');
                            if (s > 0) {
                                String user = userinfo.substring(0, s);
                                String pw = userinfo.substring(s + 1, userinfo.length());
                                client.getCredentialsProvider().setCredentials(new AuthScope(url.getHost(), 443), new UsernamePasswordCredentials(user, pw));
                            }
                        }
                    }
                    HttpResponse response = httpclient.execute(httpGet);
                    try {
                        response.getStatusLine();
                        HttpEntity entity = response.getEntity();
                        if (entity == null) {
                            throw new Exception("No content found in " + url.toString());
                        }
                        InputStream stream2 = entity.getContent();
                        String encoding = null;
                        if (entity.getContentEncoding() != null) {
                            encoding = entity.getContentEncoding().getValue();
                        }
                        JSONUtilities.safePut(fileRecord, "declaredEncoding", encoding);
                        String contentType = null;
                        if (entity.getContentType() != null) {
                            contentType = entity.getContentType().getValue();
                        }
                        JSONUtilities.safePut(fileRecord, "declaredMimeType", contentType);
                        if (saveStream(stream2, url, rawDataDir, progress, update, fileRecord, fileRecords, entity.getContentLength())) {
                            archiveCount++;
                        }
                        downloadCount++;
                        EntityUtils.consume(entity);
                    } finally {
                        httpGet.releaseConnection();
                    }
                } else {
                    // Fallback handling for non HTTP connections (only FTP?)
                    URLConnection urlConnection = url.openConnection();
                    urlConnection.setConnectTimeout(5000);
                    urlConnection.connect();
                    InputStream stream2 = urlConnection.getInputStream();
                    JSONUtilities.safePut(fileRecord, "declaredEncoding", urlConnection.getContentEncoding());
                    JSONUtilities.safePut(fileRecord, "declaredMimeType", urlConnection.getContentType());
                    try {
                        if (saveStream(stream2, url, rawDataDir, progress, update, fileRecord, fileRecords, urlConnection.getContentLength())) {
                            archiveCount++;
                        }
                        downloadCount++;
                    } finally {
                        stream2.close();
                    }
                }
            } else {
                String value = Streams.asString(stream);
                parameters.put(name, value);
            // TODO: We really want to store this on the request so it's available for everyone
            //                    request.getParameterMap().put(name, value);
            }
        } else {
            // is file content
            String fileName = fileItem.getName();
            if (fileName.length() > 0) {
                long fileSize = fileItem.getSize();
                File file = allocateFile(rawDataDir, fileName);
                JSONObject fileRecord = new JSONObject();
                JSONUtilities.safePut(fileRecord, "origin", "upload");
                JSONUtilities.safePut(fileRecord, "declaredEncoding", request.getCharacterEncoding());
                JSONUtilities.safePut(fileRecord, "declaredMimeType", fileItem.getContentType());
                JSONUtilities.safePut(fileRecord, "fileName", fileName);
                JSONUtilities.safePut(fileRecord, "location", getRelativePath(file, rawDataDir));
                progress.setProgress("Saving file " + fileName + " locally (" + formatBytes(fileSize) + " bytes)", calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
                JSONUtilities.safePut(fileRecord, "size", saveStreamToFile(stream, file, null));
                if (postProcessRetrievedFile(rawDataDir, file, fileRecord, fileRecords, progress)) {
                    archiveCount++;
                }
                uploadCount++;
            }
        }
        stream.close();
    }
    // Delete all temp files.
    for (FileItem fileItem : tempFiles) {
        fileItem.delete();
    }
    JSONUtilities.safePut(retrievalRecord, "uploadCount", uploadCount);
    JSONUtilities.safePut(retrievalRecord, "downloadCount", downloadCount);
    JSONUtilities.safePut(retrievalRecord, "clipboardCount", clipboardCount);
    JSONUtilities.safePut(retrievalRecord, "archiveCount", archiveCount);
}
Also used : HttpEntity(org.apache.http.HttpEntity) HttpGet(org.apache.http.client.methods.HttpGet) URL(java.net.URL) DefaultHttpClient(org.apache.http.impl.client.DefaultHttpClient) Result(com.google.refine.importing.UrlRewriter.Result) ServletFileUpload(org.apache.commons.fileupload.servlet.ServletFileUpload) List(java.util.List) ArrayList(java.util.ArrayList) GZIPInputStream(java.util.zip.GZIPInputStream) ZipInputStream(java.util.zip.ZipInputStream) CBZip2InputStream(org.apache.tools.bzip2.CBZip2InputStream) FileInputStream(java.io.FileInputStream) TarInputStream(org.apache.tools.tar.TarInputStream) InputStream(java.io.InputStream) JSONArray(org.json.JSONArray) HttpResponse(org.apache.http.HttpResponse) DiskFileItemFactory(org.apache.commons.fileupload.disk.DiskFileItemFactory) DecompressingHttpClient(org.apache.http.impl.client.DecompressingHttpClient) ServletException(javax.servlet.ServletException) FileNotFoundException(java.io.FileNotFoundException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException) URLConnection(java.net.URLConnection) UsernamePasswordCredentials(org.apache.http.auth.UsernamePasswordCredentials) FileItem(org.apache.commons.fileupload.FileItem) ProgressListener(org.apache.commons.fileupload.ProgressListener) JSONObject(org.json.JSONObject) AuthScope(org.apache.http.auth.AuthScope) File(java.io.File)

Aggregations

Result (com.google.refine.importing.UrlRewriter.Result)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 UnsupportedEncodingException (java.io.UnsupportedEncodingException)1 URL (java.net.URL)1 URLConnection (java.net.URLConnection)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 ZipInputStream (java.util.zip.ZipInputStream)1 ServletException (javax.servlet.ServletException)1 FileItem (org.apache.commons.fileupload.FileItem)1 ProgressListener (org.apache.commons.fileupload.ProgressListener)1 DiskFileItemFactory (org.apache.commons.fileupload.disk.DiskFileItemFactory)1 ServletFileUpload (org.apache.commons.fileupload.servlet.ServletFileUpload)1 HttpEntity (org.apache.http.HttpEntity)1 HttpResponse (org.apache.http.HttpResponse)1