use of org.jabref.logic.importer.FetcherException in project jabref by JabRef.
the class GoogleScholar method findFullText.
@Override
public Optional<URL> findFullText(BibEntry entry) throws IOException, FetcherException {
Objects.requireNonNull(entry);
Optional<URL> pdfLink = Optional.empty();
// Search in title
if (!entry.hasField(FieldName.TITLE)) {
return pdfLink;
}
try {
URIBuilder uriBuilder = new URIBuilder(SEARCH_IN_TITLE_URL);
uriBuilder.addParameter("as_q", "");
uriBuilder.addParameter("as_epq", entry.getField(FieldName.TITLE).orElse(null));
uriBuilder.addParameter("as_occt", "title");
Document doc = Jsoup.connect(uriBuilder.toString()).userAgent(URLDownload.USER_AGENT).get();
// TODO: link always on first result or none?
for (int i = 0; i < NUM_RESULTS; i++) {
Elements link = doc.select(String.format("#gs_ggsW%s a", i));
if (link.first() != null) {
String s = link.first().attr("href");
// link present?
if (!"".equals(s)) {
// TODO: check title inside pdf + length?
// TODO: report error function needed?! query -> result
LOGGER.info("Fulltext PDF found @ Google: " + s);
pdfLink = Optional.of(new URL(s));
break;
}
}
}
} catch (URISyntaxException e) {
throw new FetcherException("Building URI failed.", e);
}
return pdfLink;
}
use of org.jabref.logic.importer.FetcherException in project jabref by JabRef.
the class MrDLibFetcher method performSearch.
@Override
public List<BibEntry> performSearch(BibEntry entry) throws FetcherException {
Optional<String> title = entry.getLatexFreeField(FieldName.TITLE);
if (title.isPresent()) {
String response = makeServerRequest(title.get());
MrDLibImporter importer = new MrDLibImporter();
ParserResult parserResult = new ParserResult();
try {
if (importer.isRecognizedFormat(new BufferedReader(new StringReader(response)))) {
parserResult = importer.importDatabase(new BufferedReader(new StringReader(response)));
} else {
// For displaying An ErrorMessage
BibEntry errorBibEntry = new BibEntry();
errorBibEntry.setField("html_representation", Localization.lang("Error_while_fetching_from_%0", "Mr.DLib"));
BibDatabase errorBibDataBase = new BibDatabase();
errorBibDataBase.insertEntry(errorBibEntry);
parserResult = new ParserResult(errorBibDataBase);
}
} catch (IOException e) {
LOGGER.error(e.getMessage(), e);
throw new FetcherException("XML Parser IOException.");
}
return parserResult.getDatabase().getEntries();
} else {
// without a title there is no reason to ask MrDLib
return new ArrayList<>(0);
}
}
use of org.jabref.logic.importer.FetcherException in project jabref by JabRef.
the class MedlineFetcher method fetchMedline.
/**
* Fetch and parse an medline item from eutils.ncbi.nlm.nih.gov.
* The E-utilities generate a huge XML file containing all entries for the ids
*
* @param ids A list of IDs to search for.
* @return Will return an empty list on error.
*/
private List<BibEntry> fetchMedline(List<String> ids) throws FetcherException {
try {
//Separate the IDs with a comma to search multiple entries
URL fetchURL = getURLForID(String.join(",", ids));
URLConnection data = fetchURL.openConnection();
ParserResult result = new MedlineImporter().importDatabase(new BufferedReader(new InputStreamReader(data.getInputStream(), StandardCharsets.UTF_8)));
if (result.hasWarnings()) {
LOGGER.warn(result.getErrorMessage());
}
List<BibEntry> resultList = result.getDatabase().getEntries();
resultList.forEach(this::doPostCleanup);
return resultList;
} catch (URISyntaxException | MalformedURLException e) {
throw new FetcherException("Error while generating fetch URL", Localization.lang("Error while generating fetch URL"), e);
} catch (IOException e) {
throw new FetcherException("Error while fetching from Medline", Localization.lang("Error while fetching from %0", "Medline"), e);
}
}
use of org.jabref.logic.importer.FetcherException in project jabref by JabRef.
the class PdfContentImporter method importDatabase.
@Override
public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
final ArrayList<BibEntry> result = new ArrayList<>(1);
try (FileInputStream fileStream = new FileInputStream(filePath.toFile());
PDDocument document = XMPUtil.loadWithAutomaticDecryption(fileStream)) {
String firstPageContents = getFirstPageContents(document);
Optional<DOI> doi = DOI.findInText(firstPageContents);
if (doi.isPresent()) {
ParserResult parserResult = new ParserResult(result);
Optional<BibEntry> entry = new DoiFetcher(importFormatPreferences).performSearchById(doi.get().getDOI());
entry.ifPresent(parserResult.getDatabase()::insertEntry);
return parserResult;
}
// idea: split[] contains the different lines
// blocks are separated by empty lines
// treat each block
// or do special treatment at authors (which are not broken)
// therefore, we do a line-based and not a block-based splitting
// i points to the current line
// curString (mostly) contains the current block
// the different lines are joined into one and thereby separated by " "
lines = firstPageContents.split(System.lineSeparator());
proceedToNextNonEmptyLine();
if (i >= lines.length) {
// return empty list
return new ParserResult();
}
// we start at the current line
curString = lines[i];
// i might get incremented later and curString modified, too
i = i + 1;
String author;
String editor = null;
String abstractT = null;
String keywords = null;
String title;
String conference = null;
String DOI = null;
String series = null;
String volume = null;
String number = null;
String pages = null;
// year is a class variable as the method extractYear() uses it;
String publisher = null;
EntryType type = BibtexEntryTypes.INPROCEEDINGS;
if (curString.length() > 4) {
// special case: possibly conference as first line on the page
extractYear();
if (curString.contains("Conference")) {
fillCurStringWithNonEmptyLines();
conference = curString;
curString = "";
} else {
// e.g. Copyright (c) 1998 by the Genetics Society of America
// future work: get year using RegEx
String lower = curString.toLowerCase(Locale.ROOT);
if (lower.contains("copyright")) {
fillCurStringWithNonEmptyLines();
publisher = curString;
curString = "";
}
}
}
// start: title
fillCurStringWithNonEmptyLines();
title = streamlineTitle(curString);
curString = "";
//i points to the next non-empty line
// after title: authors
author = null;
while ((i < lines.length) && !"".equals(lines[i])) {
// author names are unlikely to be lines among different lines
// treat them line by line
curString = streamlineNames(lines[i]);
if (author == null) {
author = curString;
} else {
if ("".equals(curString)) {
// if lines[i] is "and" then "" is returned by streamlineNames -> do nothing
} else {
author = author.concat(" and ").concat(curString);
}
}
i++;
}
curString = "";
i++;
// then, abstract and keywords follow
while (i < lines.length) {
curString = lines[i];
if ((curString.length() >= "Abstract".length()) && "Abstract".equalsIgnoreCase(curString.substring(0, "Abstract".length()))) {
if (curString.length() == "Abstract".length()) {
// only word "abstract" found -- skip line
curString = "";
} else {
curString = curString.substring("Abstract".length() + 1).trim().concat(System.lineSeparator());
}
i++;
// whereas we need linebreak as separator
while ((i < lines.length) && !"".equals(lines[i])) {
curString = curString.concat(lines[i]).concat(System.lineSeparator());
i++;
}
abstractT = curString.trim();
i++;
} else if ((curString.length() >= "Keywords".length()) && "Keywords".equalsIgnoreCase(curString.substring(0, "Keywords".length()))) {
if (curString.length() == "Keywords".length()) {
// only word "Keywords" found -- skip line
curString = "";
} else {
curString = curString.substring("Keywords".length() + 1).trim();
}
i++;
fillCurStringWithNonEmptyLines();
keywords = removeNonLettersAtEnd(curString);
} else {
String lower = curString.toLowerCase(Locale.ROOT);
int pos = lower.indexOf("technical");
if (pos >= 0) {
type = BibtexEntryTypes.TECHREPORT;
pos = curString.trim().lastIndexOf(' ');
if (pos >= 0) {
// assumption: last character of curString is NOT ' '
// otherwise pos+1 leads to an out-of-bounds exception
number = curString.substring(pos + 1);
}
}
i++;
proceedToNextNonEmptyLine();
}
}
i = lines.length - 1;
while (i >= 0) {
readLastBlock();
// i now points to the block before or is -1
// curString contains the last block, separated by " "
extractYear();
int pos = curString.indexOf("(Eds.)");
if ((pos >= 0) && (publisher == null)) {
// looks like a Springer last line
// e.g: A. Persson and J. Stirna (Eds.): PoEM 2009, LNBIP 39, pp. 161-175, 2009.
publisher = "Springer";
editor = streamlineNames(curString.substring(0, pos - 1));
//+2 because of ":" after (Eds.) and the subsequent space
curString = curString.substring(pos + "(Eds.)".length() + 2);
String[] springerSplit = curString.split(", ");
if (springerSplit.length >= 4) {
conference = springerSplit[0];
String seriesData = springerSplit[1];
int lastSpace = seriesData.lastIndexOf(' ');
series = seriesData.substring(0, lastSpace);
volume = seriesData.substring(lastSpace + 1);
pages = springerSplit[2].substring(4);
if (springerSplit[3].length() >= 4) {
year = springerSplit[3].substring(0, 4);
}
}
} else {
if (DOI == null) {
pos = curString.indexOf("DOI");
if (pos < 0) {
pos = curString.indexOf(FieldName.DOI);
}
if (pos >= 0) {
pos += 3;
char delimiter = curString.charAt(pos);
if ((delimiter == ':') || (delimiter == ' ')) {
pos++;
}
int nextSpace = curString.indexOf(' ', pos);
if (nextSpace > 0) {
DOI = curString.substring(pos, nextSpace);
} else {
DOI = curString.substring(pos);
}
}
}
if ((publisher == null) && curString.contains("IEEE")) {
// IEEE has the conference things at the end
publisher = "IEEE";
if (conference == null) {
pos = curString.indexOf('$');
if (pos > 0) {
// we found the price
// before the price, the ISSN is stated
// skip that
pos -= 2;
while ((pos >= 0) && (curString.charAt(pos) != ' ')) {
pos--;
}
if (pos > 0) {
conference = curString.substring(0, pos);
}
}
}
}
}
}
BibEntry entry = new BibEntry();
entry.setType(type);
if (author != null) {
entry.setField(FieldName.AUTHOR, author);
}
if (editor != null) {
entry.setField(FieldName.EDITOR, editor);
}
if (abstractT != null) {
entry.setField(FieldName.ABSTRACT, abstractT);
}
if (!Strings.isNullOrEmpty(keywords)) {
entry.setField(FieldName.KEYWORDS, keywords);
}
if (title != null) {
entry.setField(FieldName.TITLE, title);
}
if (conference != null) {
entry.setField(FieldName.BOOKTITLE, conference);
}
if (DOI != null) {
entry.setField(FieldName.DOI, DOI);
}
if (series != null) {
entry.setField(FieldName.SERIES, series);
}
if (volume != null) {
entry.setField(FieldName.VOLUME, volume);
}
if (number != null) {
entry.setField(FieldName.NUMBER, number);
}
if (pages != null) {
entry.setField(FieldName.PAGES, pages);
}
if (year != null) {
entry.setField(FieldName.YEAR, year);
}
if (publisher != null) {
entry.setField(FieldName.PUBLISHER, publisher);
}
result.add(entry);
} catch (EncryptedPdfsNotSupportedException e) {
return ParserResult.fromErrorMessage(Localization.lang("Decryption not supported."));
} catch (IOException exception) {
return ParserResult.fromError(exception);
} catch (FetcherException e) {
return ParserResult.fromErrorMessage(e.getMessage());
}
return new ParserResult(result);
}
use of org.jabref.logic.importer.FetcherException in project jabref by JabRef.
the class CrossrefFetcherEvaluator method main.
public static void main(String[] args) throws IOException, InterruptedException {
Globals.prefs = JabRefPreferences.getInstance();
try (FileReader reader = new FileReader(args[0])) {
BibtexParser parser = new BibtexParser(Globals.prefs.getImportFormatPreferences());
ParserResult result = parser.parse(reader);
BibDatabase db = result.getDatabase();
List<BibEntry> entries = db.getEntries();
AtomicInteger dois = new AtomicInteger();
AtomicInteger doiFound = new AtomicInteger();
AtomicInteger doiNew = new AtomicInteger();
AtomicInteger doiIdentical = new AtomicInteger();
int total = entries.size();
CountDownLatch countDownLatch = new CountDownLatch(total);
ExecutorService executorService = Executors.newFixedThreadPool(5);
for (BibEntry entry : entries) {
executorService.execute(new Runnable() {
@Override
public void run() {
Optional<DOI> origDOI = entry.getField(FieldName.DOI).flatMap(DOI::parse);
if (origDOI.isPresent()) {
dois.incrementAndGet();
try {
Optional<DOI> crossrefDOI = new CrossRef().findIdentifier(entry);
if (crossrefDOI.isPresent()) {
doiFound.incrementAndGet();
if (origDOI.get().getDOI().equalsIgnoreCase(crossrefDOI.get().getDOI())) {
doiIdentical.incrementAndGet();
} else {
System.out.println("DOI not identical for : " + entry);
}
} else {
System.out.println("DOI not found for: " + entry);
}
} catch (FetcherException e) {
e.printStackTrace();
}
} else {
try {
Optional<DOI> crossrefDOI = new CrossRef().findIdentifier(entry);
if (crossrefDOI.isPresent()) {
System.out.println("New DOI found for: " + entry);
doiNew.incrementAndGet();
}
} catch (FetcherException e) {
e.printStackTrace();
}
}
countDownLatch.countDown();
}
});
}
countDownLatch.await();
System.out.println("---------------------------------");
System.out.println("Total DB size: " + total);
System.out.println("Total DOIs: " + dois);
System.out.println("DOIs found: " + doiFound);
System.out.println("DOIs identical: " + doiIdentical);
System.out.println("New DOIs found: " + doiNew);
executorService.shutdown();
}
}
Aggregations