use of org.jabref.logic.net.URLDownload in project jabref by JabRef.
the class ACMPortalFetcher method downloadEntryBibTeX.
private static Optional<BibEntry> downloadEntryBibTeX(String id, boolean downloadAbstract) {
try {
URL url = new URL(ACMPortalFetcher.START_URL + ACMPortalFetcher.BIBTEX_URL + id + ACMPortalFetcher.BIBTEX_URL_END);
URLConnection connection = url.openConnection();
// set user-agent to avoid being blocked as a crawler
connection.addRequestProperty("User-Agent", URLDownload.USER_AGENT);
Collection<BibEntry> items = null;
try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), StandardCharsets.UTF_8))) {
String htmlCode = in.lines().filter(s -> !s.isEmpty()).collect(Collectors.joining());
String bibtexString = htmlCode.substring(htmlCode.indexOf(START_BIBTEX_ENTRY), htmlCode.indexOf(END_BIBTEX_ENTRY_HTML));
items = new BibtexParser(Globals.prefs.getImportFormatPreferences()).parseEntries(bibtexString);
} catch (IOException | ParseException e) {
LOGGER.info("Download of BibTeX information from ACM Portal failed.", e);
}
if ((items == null) || items.isEmpty()) {
return Optional.empty();
}
BibEntry entry = items.iterator().next();
//wait between requests or you will be blocked by ACM
Thread.sleep(ACMPortalFetcher.WAIT_TIME);
// get abstract
if (downloadAbstract) {
URLDownload dl = new URLDownload(ACMPortalFetcher.START_URL + ACMPortalFetcher.ABSTRACT_URL + id);
String page = dl.asString(Globals.prefs.getDefaultEncoding());
Matcher absM = ACMPortalFetcher.ABSTRACT_PATTERN.matcher(page);
if (absM.find()) {
entry.setField(FieldName.ABSTRACT, absM.group(1).trim());
}
//wait between requests or you will be blocked by ACM
Thread.sleep(ACMPortalFetcher.WAIT_TIME);
}
return Optional.of(entry);
} catch (NoSuchElementException e) {
LOGGER.info("Bad BibTeX record read at: " + ACMPortalFetcher.BIBTEX_URL + id + ACMPortalFetcher.BIBTEX_URL_END, e);
} catch (MalformedURLException e) {
LOGGER.info("Malformed URL.", e);
} catch (IOException e) {
LOGGER.info("Cannot connect.", e);
} catch (InterruptedException ignored) {
// Ignored
}
return Optional.empty();
}
use of org.jabref.logic.net.URLDownload in project jabref by JabRef.
the class IEEE method findFullText.
@Override
public Optional<URL> findFullText(BibEntry entry) throws IOException {
Objects.requireNonNull(entry);
String stampString = "";
// Try URL first -- will primarily work for entries from the old IEEE search
Optional<String> urlString = entry.getField(FieldName.URL);
if (urlString.isPresent()) {
// Is the URL a direct link to IEEE?
Matcher matcher = STAMP_PATTERN.matcher(urlString.get());
if (matcher.find()) {
// Found it
stampString = matcher.group(1);
}
}
// If not, try DOI
if (stampString.isEmpty()) {
Optional<DOI> doi = entry.getField(FieldName.DOI).flatMap(DOI::parse);
if (doi.isPresent() && doi.get().getDOI().startsWith(IEEE_DOI) && doi.get().getExternalURI().isPresent()) {
// Download the HTML page from IEEE
String resolvedDOIPage = new URLDownload(doi.get().getExternalURI().get().toURL()).asString();
// Try to find the link
Matcher matcher = STAMP_PATTERN.matcher(resolvedDOIPage);
if (matcher.find()) {
// Found it
stampString = matcher.group(1);
}
}
}
// Any success?
if (stampString.isEmpty()) {
return Optional.empty();
}
// Download the HTML page containing a frame with the PDF
String framePage = new URLDownload(BASE_URL + stampString).asString();
// Try to find the direct PDF link
Matcher matcher = PDF_PATTERN.matcher(framePage);
if (matcher.find()) {
// The PDF was found
LOGGER.debug("Full text document found on IEEE Xplore");
return Optional.of(new URL(matcher.group(1)));
}
return Optional.empty();
}
use of org.jabref.logic.net.URLDownload in project jabref by JabRef.
the class GoogleScholar method downloadEntry.
private BibEntry downloadEntry(String link) throws IOException, FetcherException {
String downloadedContent = new URLDownload(link).asString();
BibtexParser parser = new BibtexParser(importFormatPreferences);
ParserResult result = parser.parse(new StringReader(downloadedContent));
if ((result == null) || (result.getDatabase() == null)) {
throw new FetcherException("Parsing entries from Google Scholar bib file failed.");
} else {
Collection<BibEntry> entries = result.getDatabase().getEntries();
if (entries.size() != 1) {
LOGGER.debug(entries.size() + " entries found! (" + link + ")");
throw new FetcherException("Parsing entries from Google Scholar bib file failed.");
} else {
BibEntry entry = entries.iterator().next();
return entry;
}
}
}
use of org.jabref.logic.net.URLDownload in project jabref by JabRef.
the class GoogleScholar method addHitsFromQuery.
private void addHitsFromQuery(List<BibEntry> entryList, String queryURL) throws IOException, FetcherException {
String content = new URLDownload(queryURL).asString();
Matcher matcher = LINK_TO_BIB_PATTERN.matcher(content);
while (matcher.find()) {
String citationsPageURL = matcher.group().replace("&", "&");
BibEntry newEntry = downloadEntry(citationsPageURL);
entryList.add(newEntry);
}
}
use of org.jabref.logic.net.URLDownload in project jabref by JabRef.
the class BibsonomyScraper method getEntry.
/**
* Return a BibEntry by looking up the given url from the BibSonomy scraper.
* @param entryUrl
* @return
*/
public static Optional<BibEntry> getEntry(String entryUrl, ImportFormatPreferences importFormatPreferences) {
try {
// Replace special characters by corresponding sequences:
String cleanURL = entryUrl.replace("%", "%25").replace(":", "%3A").replace("/", "%2F").replace("?", "%3F").replace("&", "%26").replace("=", "%3D");
URL url = new URL(BibsonomyScraper.BIBSONOMY_SCRAPER + cleanURL + BibsonomyScraper.BIBSONOMY_SCRAPER_POST);
String bibtex = new URLDownload(url).asString();
return BibtexParser.singleFromString(bibtex, importFormatPreferences);
} catch (IOException ex) {
LOGGER.warn("Could not download entry", ex);
return Optional.empty();
} catch (ParseException ex) {
LOGGER.warn("Could not parse entry", ex);
return Optional.empty();
} catch (RuntimeException ex) {
LOGGER.warn("Could not get entry", ex);
return Optional.empty();
}
}
Aggregations