use of java.net.URLConnection in project lucida by claritylab.
the class HTMLConverter method url2text.
/**
* Fetches an HTML document from a URL and converts it into plain text.
*
* @param url URL of HTML document
* @return plain text or <code>null</code> if the fetching or conversion failed
*/
public static synchronized String url2text(String url) throws SocketTimeoutException {
// connect to URL
URLConnection conn = null;
try {
conn = (new URL(url)).openConnection();
// only allow HTTP connections
if (!(conn instanceof HttpURLConnection))
return null;
} catch (IOException e) {
return null;
}
// pretend to be a browser
conn.setRequestProperty("User-agent", "Mozilla/4.0");
conn.setConnectTimeout(TIMEOUT);
conn.setReadTimeout(TIMEOUT);
// fetch URL and convert HTML document
StringBean sb = new StringBean();
// no links
sb.setLinks(false);
// replace non-breaking spaces
sb.setReplaceNonBreakingSpaces(true);
// replace sequences of whitespaces
sb.setCollapse(true);
sb.setConnection(conn);
String docText = sb.getStrings();
return docText;
}
use of java.net.URLConnection in project lucida by claritylab.
the class PageRankParser method getPageRank.
public static int getPageRank(String url) {
int pageRank = -1;
String query = getQueryUrl(url);
if (query == null) {
return pageRank;
}
BufferedReader in = null;
try {
URL pr = new URL(query);
URLConnection conn = pr.openConnection();
in = new BufferedReader(new InputStreamReader(conn.getInputStream()));
String line = null;
do {
if ((line = in.readLine()) == null) {
break;
}
if (line.contains(":")) {
String[] tokens = line.split(":");
if (tokens.length > 2)
pageRank = Integer.parseInt(tokens[2]);
}
} while (true);
} catch (Exception e) {
}
return pageRank;
}
use of java.net.URLConnection in project elasticsearch by elastic.
the class InstallPluginCommand method downloadZip.
/** Downloads a zip from the url, into a temp file under the given temp dir. */
@SuppressForbidden(reason = "We use getInputStream to download plugins")
private Path downloadZip(Terminal terminal, String urlString, Path tmpDir) throws IOException {
terminal.println(VERBOSE, "Retrieving zip from " + urlString);
URL url = new URL(urlString);
Path zip = Files.createTempFile(tmpDir, null, ".zip");
URLConnection urlConnection = url.openConnection();
urlConnection.addRequestProperty("User-Agent", "elasticsearch-plugin-installer");
int contentLength = urlConnection.getContentLength();
try (InputStream in = new TerminalProgressInputStream(urlConnection.getInputStream(), contentLength, terminal)) {
// must overwrite since creating the temp file above actually created the file
Files.copy(in, zip, StandardCopyOption.REPLACE_EXISTING);
}
return zip;
}
use of java.net.URLConnection in project jetty.project by eclipse.
the class JarResource method copyTo.
/* ------------------------------------------------------------ */
@Override
public void copyTo(File directory) throws IOException {
if (!exists())
return;
if (LOG.isDebugEnabled())
LOG.debug("Extract " + this + " to " + directory);
String urlString = this.getURL().toExternalForm().trim();
int endOfJarUrl = urlString.indexOf("!/");
int startOfJarUrl = (endOfJarUrl >= 0 ? 4 : 0);
if (endOfJarUrl < 0)
throw new IOException("Not a valid jar url: " + urlString);
URL jarFileURL = new URL(urlString.substring(startOfJarUrl, endOfJarUrl));
String subEntryName = (endOfJarUrl + 2 < urlString.length() ? urlString.substring(endOfJarUrl + 2) : null);
boolean subEntryIsDir = (subEntryName != null && subEntryName.endsWith("/") ? true : false);
if (LOG.isDebugEnabled())
LOG.debug("Extracting entry = " + subEntryName + " from jar " + jarFileURL);
URLConnection c = jarFileURL.openConnection();
c.setUseCaches(false);
try (InputStream is = c.getInputStream();
JarInputStream jin = new JarInputStream(is)) {
JarEntry entry;
boolean shouldExtract;
while ((entry = jin.getNextJarEntry()) != null) {
String entryName = entry.getName();
if ((subEntryName != null) && (entryName.startsWith(subEntryName))) {
// is the subentry really a dir?
if (!subEntryIsDir && subEntryName.length() + 1 == entryName.length() && entryName.endsWith("/"))
subEntryIsDir = true;
//extract it.
if (subEntryIsDir) {
//if it is a subdirectory we are looking for, then we
//are looking to extract its contents into the target
//directory. Remove the name of the subdirectory so
//that we don't wind up creating it too.
entryName = entryName.substring(subEntryName.length());
if (!entryName.equals("")) {
//the entry is
shouldExtract = true;
} else
shouldExtract = false;
} else
shouldExtract = true;
} else if ((subEntryName != null) && (!entryName.startsWith(subEntryName))) {
//there is a particular entry we are looking for, and this one
//isn't it
shouldExtract = false;
} else {
//we are extracting everything
shouldExtract = true;
}
if (!shouldExtract) {
if (LOG.isDebugEnabled())
LOG.debug("Skipping entry: " + entryName);
continue;
}
String dotCheck = entryName.replace('\\', '/');
dotCheck = URIUtil.canonicalPath(dotCheck);
if (dotCheck == null) {
if (LOG.isDebugEnabled())
LOG.debug("Invalid entry: " + entryName);
continue;
}
File file = new File(directory, entryName);
if (entry.isDirectory()) {
// Make directory
if (!file.exists())
file.mkdirs();
} else {
// make directory (some jars don't list dirs)
File dir = new File(file.getParent());
if (!dir.exists())
dir.mkdirs();
// Make file
try (OutputStream fout = new FileOutputStream(file)) {
IO.copy(jin, fout);
}
// touch the file.
if (entry.getTime() >= 0)
file.setLastModified(entry.getTime());
}
}
if ((subEntryName == null) || (subEntryName != null && subEntryName.equalsIgnoreCase("META-INF/MANIFEST.MF"))) {
Manifest manifest = jin.getManifest();
if (manifest != null) {
File metaInf = new File(directory, "META-INF");
metaInf.mkdir();
File f = new File(metaInf, "MANIFEST.MF");
try (OutputStream fout = new FileOutputStream(f)) {
manifest.write(fout);
}
}
}
}
}
use of java.net.URLConnection in project zeppelin by apache.
the class PegdownWebSequencelPlugin method createWebsequenceUrl.
public static String createWebsequenceUrl(String style, String content) {
style = StringUtils.defaultString(style, "default");
OutputStreamWriter writer = null;
BufferedReader reader = null;
String webSeqUrl = "";
try {
String query = new StringBuilder().append("style=").append(style).append("&message=").append(URLEncoder.encode(content, "UTF-8")).append("&apiVersion=1").toString();
URL url = new URL(WEBSEQ_URL);
URLConnection conn = url.openConnection();
conn.setDoOutput(true);
writer = new OutputStreamWriter(conn.getOutputStream(), StandardCharsets.UTF_8);
writer.write(query);
writer.flush();
StringBuilder response = new StringBuilder();
reader = new BufferedReader(new InputStreamReader(conn.getInputStream(), StandardCharsets.UTF_8));
String line;
while ((line = reader.readLine()) != null) {
response.append(line);
}
writer.close();
reader.close();
String json = response.toString();
int start = json.indexOf("?png=");
int end = json.indexOf("\"", start);
if (start != -1 && end != -1) {
webSeqUrl = WEBSEQ_URL + "/" + json.substring(start, end);
}
} catch (IOException e) {
throw new RuntimeException("Failed to get proper response from websequencediagrams.com", e);
} finally {
IOUtils.closeQuietly(writer);
IOUtils.closeQuietly(reader);
}
return webSeqUrl;
}
Aggregations