use of org.jabref.logic.net.URLDownload in project jabref by JabRef.
the class FulltextFetchers method findFullTextPDF.
public Optional<URL> findFullTextPDF(BibEntry entry) {
// for accuracy, fetch DOI first but do not modify entry
BibEntry clonedEntry = (BibEntry) entry.clone();
Optional<DOI> doi = clonedEntry.getField(FieldName.DOI).flatMap(DOI::parse);
if (!doi.isPresent()) {
try {
WebFetchers.getIdFetcherForIdentifier(DOI.class).findIdentifier(clonedEntry).ifPresent(e -> clonedEntry.setField(FieldName.DOI, e.getDOI()));
} catch (FetcherException e) {
LOGGER.debug("Failed to find DOI", e);
}
}
for (FulltextFetcher finder : finders) {
try {
Optional<URL> result = finder.findFullText(clonedEntry);
if (result.isPresent() && new URLDownload(result.get().toString()).isPdf()) {
return result;
}
} catch (IOException | FetcherException e) {
LOGGER.debug("Failed to find fulltext PDF at given URL", e);
}
}
return Optional.empty();
}
use of org.jabref.logic.net.URLDownload in project jabref by JabRef.
the class DoiFetcher method performSearchById.
@Override
public Optional<BibEntry> performSearchById(String identifier) throws FetcherException {
Optional<DOI> doi = DOI.parse(identifier);
try {
if (doi.isPresent()) {
URL doiURL = new URL(doi.get().getURIAsASCIIString());
// BibTeX data
URLDownload download = new URLDownload(doiURL);
download.addHeader("Accept", "application/x-bibtex");
String bibtexString = download.asString();
// BibTeX entry
Optional<BibEntry> fetchedEntry = BibtexParser.singleFromString(bibtexString, preferences);
fetchedEntry.ifPresent(this::doPostCleanup);
return fetchedEntry;
} else {
throw new FetcherException(Localization.lang("Invalid_DOI:_'%0'.", identifier));
}
} catch (IOException e) {
throw new FetcherException(Localization.lang("Connection error"), e);
} catch (ParseException e) {
throw new FetcherException("Could not parse BibTeX entry", e);
}
}
use of org.jabref.logic.net.URLDownload in project jabref by JabRef.
the class DoiResolution method findFullText.
@Override
public Optional<URL> findFullText(BibEntry entry) throws IOException {
Objects.requireNonNull(entry);
Optional<URL> pdfLink = Optional.empty();
Optional<DOI> doi = entry.getField(FieldName.DOI).flatMap(DOI::parse);
if (doi.isPresent()) {
String sciLink = doi.get().getURIAsASCIIString();
// follow all redirects and scan for a single pdf link
if (!sciLink.isEmpty()) {
try {
Connection connection = Jsoup.connect(sciLink);
// pretend to be a browser (agent & referrer)
connection.userAgent(URLDownload.USER_AGENT);
connection.referrer("http://www.google.com");
connection.followRedirects(true);
connection.ignoreHttpErrors(true);
// some publishers are quite slow (default is 3s)
connection.timeout(5000);
Document html = connection.get();
// scan for PDF
Elements elements = html.body().select("a[href]");
List<Optional<URL>> links = new ArrayList<>();
for (Element element : elements) {
String href = element.attr("abs:href").toLowerCase(Locale.ENGLISH);
String hrefText = element.text().toLowerCase(Locale.ENGLISH);
// See https://github.com/lehner/LocalCopy for more scrape ideas
if ((href.contains("pdf") || hrefText.contains("pdf")) && new URLDownload(href).isPdf()) {
links.add(Optional.of(new URL(href)));
}
}
// return if only one link was found (high accuracy)
if (links.size() == 1) {
LOGGER.info("Fulltext PDF found @ " + sciLink);
pdfLink = links.get(0);
}
} catch (IOException e) {
LOGGER.warn("DoiResolution fetcher failed: ", e);
}
}
}
return pdfLink;
}
use of org.jabref.logic.net.URLDownload in project jabref by JabRef.
the class GoogleScholar method obtainAndModifyCookie.
private void obtainAndModifyCookie() throws FetcherException {
try {
URLDownload downloader = new URLDownload("https://scholar.google.com");
List<HttpCookie> cookies = downloader.getCookieFromUrl();
for (HttpCookie cookie : cookies) {
// append "CF=4" which represents "Citation format bibtex"
cookie.setValue(cookie.getValue() + ":CF=4");
}
} catch (IOException e) {
throw new FetcherException("Cookie configuration for Google Scholar failed.", e);
}
}
use of org.jabref.logic.net.URLDownload in project jabref by JabRef.
the class MrDLibFetcher method makeServerRequest.
/**
* Contact the server with the title of the selected item
*
* @param query: The query holds the title of the selected entry. Used to make a query to the MDL Server
* @return Returns the server response. This is an XML document as a String.
*/
private String makeServerRequest(String queryByTitle) throws FetcherException {
try {
URLDownload urlDownload = new URLDownload(constructQuery(queryByTitle));
urlDownload.bypassSSLVerification();
String response = urlDownload.asString();
//Conversion of < and >
response = response.replaceAll(">", ">");
response = response.replaceAll("<", "<");
return response;
} catch (IOException e) {
throw new FetcherException("Problem downloading", e);
}
}
Aggregations