Search in sources :

Example 1 with URLConnection

use of java.net.URLConnection in project lucida by claritylab.

the class HTMLConverter method url2text.

/**
	 * Fetches an HTML document from a URL and converts it into plain text.
	 * 
	 * @param url URL of HTML document
	 * @return plain text or <code>null</code> if the fetching or conversion failed
	 */
public static synchronized String url2text(String url) throws SocketTimeoutException {
    // connect to URL
    URLConnection conn = null;
    try {
        conn = (new URL(url)).openConnection();
        // only allow HTTP connections
        if (!(conn instanceof HttpURLConnection))
            return null;
    } catch (IOException e) {
        return null;
    }
    // pretend to be a browser
    conn.setRequestProperty("User-agent", "Mozilla/4.0");
    conn.setConnectTimeout(TIMEOUT);
    conn.setReadTimeout(TIMEOUT);
    // fetch URL and convert HTML document
    StringBean sb = new StringBean();
    // no links
    sb.setLinks(false);
    // replace non-breaking spaces
    sb.setReplaceNonBreakingSpaces(true);
    // replace sequences of whitespaces
    sb.setCollapse(true);
    sb.setConnection(conn);
    String docText = sb.getStrings();
    return docText;
}
Also used : HttpURLConnection(java.net.HttpURLConnection) StringBean(org.htmlparser.beans.StringBean) IOException(java.io.IOException) HttpURLConnection(java.net.HttpURLConnection) URLConnection(java.net.URLConnection) URL(java.net.URL)

Example 2 with URLConnection

use of java.net.URLConnection in project lucida by claritylab.

the class PageRankParser method getPageRank.

public static int getPageRank(String url) {
    int pageRank = -1;
    String query = getQueryUrl(url);
    if (query == null) {
        return pageRank;
    }
    BufferedReader in = null;
    try {
        URL pr = new URL(query);
        URLConnection conn = pr.openConnection();
        in = new BufferedReader(new InputStreamReader(conn.getInputStream()));
        String line = null;
        do {
            if ((line = in.readLine()) == null) {
                break;
            }
            if (line.contains(":")) {
                String[] tokens = line.split(":");
                if (tokens.length > 2)
                    pageRank = Integer.parseInt(tokens[2]);
            }
        } while (true);
    } catch (Exception e) {
    }
    return pageRank;
}
Also used : InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) URL(java.net.URL) URLConnection(java.net.URLConnection) MalformedURLException(java.net.MalformedURLException) UnsupportedEncodingException(java.io.UnsupportedEncodingException)

Example 3 with URLConnection

use of java.net.URLConnection in project elasticsearch by elastic.

the class InstallPluginCommand method downloadZip.

/** Downloads a zip from the url, into a temp file under the given temp dir. */
@SuppressForbidden(reason = "We use getInputStream to download plugins")
private Path downloadZip(Terminal terminal, String urlString, Path tmpDir) throws IOException {
    terminal.println(VERBOSE, "Retrieving zip from " + urlString);
    URL url = new URL(urlString);
    Path zip = Files.createTempFile(tmpDir, null, ".zip");
    URLConnection urlConnection = url.openConnection();
    urlConnection.addRequestProperty("User-Agent", "elasticsearch-plugin-installer");
    int contentLength = urlConnection.getContentLength();
    try (InputStream in = new TerminalProgressInputStream(urlConnection.getInputStream(), contentLength, terminal)) {
        // must overwrite since creating the temp file above actually created the file
        Files.copy(in, zip, StandardCopyOption.REPLACE_EXISTING);
    }
    return zip;
}
Also used : Path(java.nio.file.Path) ZipInputStream(java.util.zip.ZipInputStream) InputStream(java.io.InputStream) URL(java.net.URL) URLConnection(java.net.URLConnection) SuppressForbidden(org.elasticsearch.common.SuppressForbidden)

Example 4 with URLConnection

use of java.net.URLConnection in project jetty.project by eclipse.

the class JarResource method copyTo.

/* ------------------------------------------------------------ */
@Override
public void copyTo(File directory) throws IOException {
    if (!exists())
        return;
    if (LOG.isDebugEnabled())
        LOG.debug("Extract " + this + " to " + directory);
    String urlString = this.getURL().toExternalForm().trim();
    int endOfJarUrl = urlString.indexOf("!/");
    int startOfJarUrl = (endOfJarUrl >= 0 ? 4 : 0);
    if (endOfJarUrl < 0)
        throw new IOException("Not a valid jar url: " + urlString);
    URL jarFileURL = new URL(urlString.substring(startOfJarUrl, endOfJarUrl));
    String subEntryName = (endOfJarUrl + 2 < urlString.length() ? urlString.substring(endOfJarUrl + 2) : null);
    boolean subEntryIsDir = (subEntryName != null && subEntryName.endsWith("/") ? true : false);
    if (LOG.isDebugEnabled())
        LOG.debug("Extracting entry = " + subEntryName + " from jar " + jarFileURL);
    URLConnection c = jarFileURL.openConnection();
    c.setUseCaches(false);
    try (InputStream is = c.getInputStream();
        JarInputStream jin = new JarInputStream(is)) {
        JarEntry entry;
        boolean shouldExtract;
        while ((entry = jin.getNextJarEntry()) != null) {
            String entryName = entry.getName();
            if ((subEntryName != null) && (entryName.startsWith(subEntryName))) {
                // is the subentry really a dir?
                if (!subEntryIsDir && subEntryName.length() + 1 == entryName.length() && entryName.endsWith("/"))
                    subEntryIsDir = true;
                //extract it.
                if (subEntryIsDir) {
                    //if it is a subdirectory we are looking for, then we
                    //are looking to extract its contents into the target
                    //directory. Remove the name of the subdirectory so
                    //that we don't wind up creating it too.
                    entryName = entryName.substring(subEntryName.length());
                    if (!entryName.equals("")) {
                        //the entry is
                        shouldExtract = true;
                    } else
                        shouldExtract = false;
                } else
                    shouldExtract = true;
            } else if ((subEntryName != null) && (!entryName.startsWith(subEntryName))) {
                //there is a particular entry we are looking for, and this one
                //isn't it
                shouldExtract = false;
            } else {
                //we are extracting everything
                shouldExtract = true;
            }
            if (!shouldExtract) {
                if (LOG.isDebugEnabled())
                    LOG.debug("Skipping entry: " + entryName);
                continue;
            }
            String dotCheck = entryName.replace('\\', '/');
            dotCheck = URIUtil.canonicalPath(dotCheck);
            if (dotCheck == null) {
                if (LOG.isDebugEnabled())
                    LOG.debug("Invalid entry: " + entryName);
                continue;
            }
            File file = new File(directory, entryName);
            if (entry.isDirectory()) {
                // Make directory
                if (!file.exists())
                    file.mkdirs();
            } else {
                // make directory (some jars don't list dirs)
                File dir = new File(file.getParent());
                if (!dir.exists())
                    dir.mkdirs();
                // Make file
                try (OutputStream fout = new FileOutputStream(file)) {
                    IO.copy(jin, fout);
                }
                // touch the file.
                if (entry.getTime() >= 0)
                    file.setLastModified(entry.getTime());
            }
        }
        if ((subEntryName == null) || (subEntryName != null && subEntryName.equalsIgnoreCase("META-INF/MANIFEST.MF"))) {
            Manifest manifest = jin.getManifest();
            if (manifest != null) {
                File metaInf = new File(directory, "META-INF");
                metaInf.mkdir();
                File f = new File(metaInf, "MANIFEST.MF");
                try (OutputStream fout = new FileOutputStream(f)) {
                    manifest.write(fout);
                }
            }
        }
    }
}
Also used : JarInputStream(java.util.jar.JarInputStream) FilterInputStream(java.io.FilterInputStream) JarInputStream(java.util.jar.JarInputStream) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) JarEntry(java.util.jar.JarEntry) Manifest(java.util.jar.Manifest) URL(java.net.URL) URLConnection(java.net.URLConnection) JarURLConnection(java.net.JarURLConnection) FileOutputStream(java.io.FileOutputStream) File(java.io.File)

Example 5 with URLConnection

use of java.net.URLConnection in project zeppelin by apache.

the class PegdownWebSequencelPlugin method createWebsequenceUrl.

public static String createWebsequenceUrl(String style, String content) {
    style = StringUtils.defaultString(style, "default");
    OutputStreamWriter writer = null;
    BufferedReader reader = null;
    String webSeqUrl = "";
    try {
        String query = new StringBuilder().append("style=").append(style).append("&message=").append(URLEncoder.encode(content, "UTF-8")).append("&apiVersion=1").toString();
        URL url = new URL(WEBSEQ_URL);
        URLConnection conn = url.openConnection();
        conn.setDoOutput(true);
        writer = new OutputStreamWriter(conn.getOutputStream(), StandardCharsets.UTF_8);
        writer.write(query);
        writer.flush();
        StringBuilder response = new StringBuilder();
        reader = new BufferedReader(new InputStreamReader(conn.getInputStream(), StandardCharsets.UTF_8));
        String line;
        while ((line = reader.readLine()) != null) {
            response.append(line);
        }
        writer.close();
        reader.close();
        String json = response.toString();
        int start = json.indexOf("?png=");
        int end = json.indexOf("\"", start);
        if (start != -1 && end != -1) {
            webSeqUrl = WEBSEQ_URL + "/" + json.substring(start, end);
        }
    } catch (IOException e) {
        throw new RuntimeException("Failed to get proper response from websequencediagrams.com", e);
    } finally {
        IOUtils.closeQuietly(writer);
        IOUtils.closeQuietly(reader);
    }
    return webSeqUrl;
}
Also used : InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) OutputStreamWriter(java.io.OutputStreamWriter) IOException(java.io.IOException) URL(java.net.URL) URLConnection(java.net.URLConnection)

Aggregations

URLConnection (java.net.URLConnection)1767 URL (java.net.URL)1228 IOException (java.io.IOException)773 InputStream (java.io.InputStream)593 HttpURLConnection (java.net.HttpURLConnection)481 InputStreamReader (java.io.InputStreamReader)436 BufferedReader (java.io.BufferedReader)382 HttpsURLConnection (javax.net.ssl.HttpsURLConnection)220 Test (org.junit.Test)214 File (java.io.File)204 MalformedURLException (java.net.MalformedURLException)197 BufferedInputStream (java.io.BufferedInputStream)119 JarURLConnection (java.net.JarURLConnection)116 OutputStream (java.io.OutputStream)114 FileOutputStream (java.io.FileOutputStream)113 FileInputStream (java.io.FileInputStream)112 ArrayList (java.util.ArrayList)96 MockResponse (okhttp3.mockwebserver.MockResponse)76 ByteArrayOutputStream (java.io.ByteArrayOutputStream)75 URISyntaxException (java.net.URISyntaxException)65