use of java.net.URLConnection in project hive by apache.
the class ClassNameCompleter method getClassNames.
public static String[] getClassNames() throws IOException {
Set urls = new HashSet();
for (ClassLoader loader = Thread.currentThread().getContextClassLoader(); loader != null; loader = loader.getParent()) {
if (!(loader instanceof URLClassLoader)) {
continue;
}
urls.addAll(Arrays.asList(((URLClassLoader) loader).getURLs()));
}
// Now add the URL that holds java.lang.String. This is because
// some JVMs do not report the core classes jar in the list of
// class loaders.
Class[] systemClasses = new Class[] { String.class, javax.swing.JFrame.class };
for (int i = 0; i < systemClasses.length; i++) {
URL classURL = systemClasses[i].getResource("/" + systemClasses[i].getName().replace('.', '/') + clazzFileNameExtension);
if (classURL != null) {
URLConnection uc = classURL.openConnection();
if (uc instanceof JarURLConnection) {
urls.add(((JarURLConnection) uc).getJarFileURL());
}
}
}
Set classes = new HashSet();
for (Iterator i = urls.iterator(); i.hasNext(); ) {
URL url = (URL) i.next();
try {
File file = new File(url.getFile());
if (file.isDirectory()) {
Set files = getClassFiles(file.getAbsolutePath(), new HashSet(), file, new int[] { 200 });
classes.addAll(files);
continue;
}
if (!isJarFile(file)) {
continue;
}
JarFile jf = new JarFile(file);
for (Enumeration e = jf.entries(); e.hasMoreElements(); ) {
JarEntry entry = (JarEntry) e.nextElement();
if (entry == null) {
continue;
}
String name = entry.getName();
if (isClazzFile(name)) {
/* only use class file */
classes.add(name);
} else if (isJarFile(name)) {
classes.addAll(getClassNamesFromJar(name));
} else {
continue;
}
}
} catch (IOException e) {
throw new IOException(String.format("Error reading classpath entry: %s", url), e);
}
}
// now filter classes by changing "/" to "." and trimming the
// trailing ".class"
Set classNames = new TreeSet();
for (Iterator i = classes.iterator(); i.hasNext(); ) {
String name = (String) i.next();
classNames.add(name.replace('/', '.').substring(0, name.length() - 6));
}
return (String[]) classNames.toArray(new String[classNames.size()]);
}
use of java.net.URLConnection in project hive by apache.
the class BeeLine method getManifest.
static Manifest getManifest() throws IOException {
URL base = BeeLine.class.getResource("/META-INF/MANIFEST.MF");
URLConnection c = base.openConnection();
if (c instanceof JarURLConnection) {
return ((JarURLConnection) c).getManifest();
}
return null;
}
use of java.net.URLConnection in project lucida by claritylab.
the class WikipediaTermImportanceFilter method getTermCounters.
/**
* fetch the term frequencies in the top X result snippets of a web search
* for some target
*
* @param target the target
* @return a HashMap mapping the terms in the web search results to their
* frequency in the snippets
*/
public HashMap<String, TermCounter> getTermCounters(String target) {
HashMap<String, TermCounter> rawTermCounters = null;
try {
String url = "http://en.wikipedia.org/wiki/" + target.replaceAll("\\s", "_");
URLConnection connection = new URL(url).openConnection();
connection.setDoInput(true);
connection.setDoOutput(true);
connection.setUseCaches(false);
connection.setRequestProperty("User-Agent", "Ephyra");
connection.connect();
BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
rawTermCounters = new HashMap<String, TermCounter>();
boolean inTag = false;
int c = 0;
StringBuffer term = new StringBuffer();
while ((c = reader.read()) != -1) {
if (c == '<') {
inTag = true;
if (term.length() != 0) {
String stemmedTerm = SnowballStemmer.stem(term.toString().toLowerCase());
System.out.println(stemmedTerm);
if (!rawTermCounters.containsKey(stemmedTerm))
rawTermCounters.put(stemmedTerm, new TermCounter());
rawTermCounters.get(stemmedTerm).increment(1);
term = new StringBuffer();
}
} else if (c == '>') {
inTag = false;
} else if (!inTag) {
if (c < 33) {
if (term.length() != 0) {
String stemmedTerm = SnowballStemmer.stem(term.toString().toLowerCase());
System.out.println(stemmedTerm);
if (!rawTermCounters.containsKey(stemmedTerm))
rawTermCounters.put(stemmedTerm, new TermCounter());
rawTermCounters.get(stemmedTerm).increment(1);
term = new StringBuffer();
}
} else
term.append((char) c);
}
}
} catch (IOException e) {
e.printStackTrace();
}
return rawTermCounters;
}
use of java.net.URLConnection in project lucida by claritylab.
the class WebDocumentFetcher method apply.
/**
* Fetches the top <code>MAX_DOCS</code> documents containing the given
* search engine snippets. The original snippets are dropped.
*
* @param results array of <code>Result</code> objects containing snippets
* @return array of <code>Result</code> objects containing entire documents
*/
public Result[] apply(Result[] results) {
// documents containing the search engine snippets
docs = new ArrayList<Result>();
// start document fetchers
HashSet<String> urls = new HashSet<String>();
for (Result result : results) {
// only apply this filter to results for the semantic parsing
// approach
Query query = result.getQuery();
Predicate[] ps = query.getAnalyzedQuestion().getPredicates();
if (!query.extractWith(FactoidsFromPredicatesFilter.ID) || ps.length == 0 || result.getScore() > Float.NEGATIVE_INFINITY)
continue;
// if result is not a web document then just make a copy
if (!result.getDocID().contains(":")) {
Result newResult = result.getCopy();
newResult.setScore(0);
docs.add(newResult);
continue;
}
// fetch at most MAX_DOCS documents
if (urls.size() >= MAX_DOCS)
break;
String url = result.getDocID();
// no forbidden document type
if (url.matches("(?i).*?" + FORBIDDEN_DOCS))
continue;
// only HTTP connections
try {
URLConnection conn = (new URL(url)).openConnection();
if (!(conn instanceof HttpURLConnection))
continue;
} catch (IOException e) {
continue;
}
// no duplicate document
if (!urls.add(url))
continue;
// if caching is enabled, try to read document from cache
if (CACHING) {
FileCache cache = new FileCache(CACHE_DIR);
String[] entries = cache.read(url);
if (entries != null) {
StringBuilder sb = new StringBuilder();
for (String entry : entries) {
sb.append(entry);
sb.append("\n");
}
String docText = sb.toString();
Result doc = new Result(docText, result.getQuery(), url, result.getHitPos());
doc.setScore(0);
docs.add(doc);
continue;
}
}
(new WebDocumentFetcher()).start(this, result);
}
// wait until all fetchers are done
waitForDocs();
// keep old results
Result[] newResults = docs.toArray(new Result[docs.size()]);
Result[] allResults = new Result[results.length + newResults.length];
for (int i = 0; i < results.length; i++) allResults[i] = results[i];
for (int i = 0; i < newResults.length; i++) allResults[results.length + i] = newResults[i];
return allResults;
}
use of java.net.URLConnection in project cogtool by cogtool.
the class FetchURLUtil method fetchURL.
/**
* requestProperties maps String property to String value;
* if <code>null</code>, then fetch the content without registering
* any HTTP request properties.
*
* For example, to "act as" a browser when making an http/s request,
* map "User-agent" to something like:
* "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"
*/
public static BufferedReader fetchURL(String urlStr, List<String> errors, Map<String, String> requestProperties) {
InputStream urlStream = null;
InputStreamReader urlReader = null;
try {
URL url = new URL(urlStr);
URLConnection urlConnection = url.openConnection();
// any HTTP request properties provided.
if (requestProperties != null) {
for (Map.Entry<String, String> keyValue : requestProperties.entrySet()) {
urlConnection.setRequestProperty(keyValue.getKey(), keyValue.getValue());
}
}
urlStream = urlConnection.getInputStream();
urlReader = new InputStreamReader(urlStream);
return new BufferedReader(urlReader);
} catch (IOException ex) {
if (errors != null) {
errors.add("fetchURL failed for url: " + urlStr + " with exception: " + ex);
}
if (urlReader != null) {
try {
urlReader.close();
} catch (IOException closeEx) {
// Ignore; irrelevant
}
}
if (urlStream != null) {
try {
urlStream.close();
} catch (IOException closeEx) {
// Ignore; irrelevant
}
}
}
return null;
}
Aggregations