use of org.jabref.model.entry.BibEntry in project jabref by JabRef.
the class GvkParser method parseEntries.
private List<BibEntry> parseEntries(Document content) {
List<BibEntry> result = new LinkedList<>();
// used for creating test cases
// XMLUtil.printDocument(content);
// Namespace srwNamespace = Namespace.getNamespace("srw","http://www.loc.gov/zing/srw/");
// Schleife ueber allen Teilergebnissen
//Element root = content.getDocumentElement();
Element root = (Element) content.getElementsByTagName("zs:searchRetrieveResponse").item(0);
Element srwrecords = getChild("zs:records", root);
if (srwrecords == null) {
// no records found -> return empty list
return result;
}
List<Element> records = getChildren("zs:record", srwrecords);
for (Element record : records) {
Element e = getChild("zs:recordData", record);
if (e != null) {
e = getChild("record", e);
if (e != null) {
result.add(parseEntry(e));
}
}
}
return result;
}
use of org.jabref.model.entry.BibEntry in project jabref by JabRef.
the class EntryBasedParserFetcher method performSearch.
@Override
default default List<BibEntry> performSearch(BibEntry entry) throws FetcherException {
Objects.requireNonNull(entry);
try (InputStream stream = new BufferedInputStream(getURLForEntry(entry).openStream())) {
List<BibEntry> fetchedEntries = getParser().parseEntries(stream);
// Post-cleanup
fetchedEntries.forEach(this::doPostCleanup);
return fetchedEntries;
} catch (URISyntaxException e) {
throw new FetcherException("Search URI is malformed", e);
} catch (IOException e) {
// TODO: Catch HTTP Response 401 errors and report that user has no rights to access resource
throw new FetcherException("An I/O exception occurred", e);
} catch (ParseException e) {
throw new FetcherException("An internal parser error occurred", e);
}
}
use of org.jabref.model.entry.BibEntry in project jabref by JabRef.
the class IdBasedParserFetcher method performSearchById.
@Override
default default Optional<BibEntry> performSearchById(String identifier) throws FetcherException {
if (StringUtil.isBlank(identifier)) {
return Optional.empty();
}
try (InputStream stream = new BufferedInputStream(getURLForID(identifier).openStream())) {
List<BibEntry> fetchedEntries = getParser().parseEntries(stream);
if (fetchedEntries.isEmpty()) {
return Optional.empty();
}
if (fetchedEntries.size() > 1) {
LOGGER.info("Fetcher " + getName() + "found more than one result for identifier " + identifier + ". We will use the first entry.");
}
BibEntry entry = fetchedEntries.get(0);
// Post-cleanup
doPostCleanup(entry);
return Optional.of(entry);
} catch (URISyntaxException e) {
throw new FetcherException("Search URI is malformed", e);
} catch (FileNotFoundException e) {
LOGGER.debug("Id not found");
return Optional.empty();
} catch (IOException e) {
// TODO: Catch HTTP Response 401 errors and report that user has no rights to access resource
throw new FetcherException("An I/O exception occurred", e);
} catch (ParseException e) {
throw new FetcherException("An internal parser error occurred", e);
}
}
use of org.jabref.model.entry.BibEntry in project jabref by JabRef.
the class OvidImporter method importDatabase.
@Override
public ParserResult importDatabase(BufferedReader reader) throws IOException {
List<BibEntry> bibitems = new ArrayList<>();
StringBuilder sb = new StringBuilder();
String line;
while ((line = reader.readLine()) != null) {
if (!line.isEmpty() && (line.charAt(0) != ' ')) {
sb.append("__NEWFIELD__");
}
sb.append(line);
sb.append('\n');
}
String[] items = sb.toString().split(OVID_PATTERN_STRING);
for (int i = 1; i < items.length; i++) {
Map<String, String> h = new HashMap<>();
String[] fields = items[i].split("__NEWFIELD__");
for (String field : fields) {
int linebreak = field.indexOf('\n');
String fieldName = field.substring(0, linebreak).trim();
String content = field.substring(linebreak).trim();
// Check if this is the author field (due to a minor special treatment for this field):
boolean isAuthor = (fieldName.indexOf("Author") == 0) && !fieldName.contains("Author Keywords") && !fieldName.contains("Author e-mail");
// in which case a dot at the end could be significant:
if (!isAuthor && content.endsWith(".")) {
content = content.substring(0, content.length() - 1);
}
if (isAuthor) {
h.put(FieldName.AUTHOR, content);
} else if (fieldName.startsWith("Title")) {
content = content.replaceAll("\\[.+\\]", "").trim();
if (content.endsWith(".")) {
content = content.substring(0, content.length() - 1);
}
h.put(FieldName.TITLE, content);
} else if (fieldName.startsWith("Chapter Title")) {
h.put("chaptertitle", content);
} else if (fieldName.startsWith("Source")) {
Matcher matcher;
if ((matcher = OvidImporter.OVID_SOURCE_PATTERN.matcher(content)).find()) {
h.put(FieldName.JOURNAL, matcher.group(1));
h.put(FieldName.VOLUME, matcher.group(2));
h.put(FieldName.ISSUE, matcher.group(3));
h.put(FieldName.PAGES, matcher.group(4));
h.put(FieldName.YEAR, matcher.group(5));
} else if ((matcher = OvidImporter.OVID_SOURCE_PATTERN_NO_ISSUE.matcher(content)).find()) {
// may be missing the issue
h.put(FieldName.JOURNAL, matcher.group(1));
h.put(FieldName.VOLUME, matcher.group(2));
h.put(FieldName.PAGES, matcher.group(3));
h.put(FieldName.YEAR, matcher.group(4));
} else if ((matcher = OvidImporter.OVID_SOURCE_PATTERN_2.matcher(content)).find()) {
h.put(FieldName.JOURNAL, matcher.group(1));
h.put(FieldName.VOLUME, matcher.group(2));
h.put(FieldName.ISSUE, matcher.group(3));
h.put(FieldName.MONTH, matcher.group(4));
h.put(FieldName.YEAR, matcher.group(5));
h.put(FieldName.PAGES, matcher.group(6));
} else if ((matcher = OvidImporter.INCOLLECTION_PATTERN.matcher(content)).find()) {
h.put(FieldName.EDITOR, matcher.group(1).replace(" (Ed)", ""));
h.put(FieldName.YEAR, matcher.group(2));
h.put(FieldName.BOOKTITLE, matcher.group(3));
h.put(FieldName.PAGES, matcher.group(4));
h.put(FieldName.ADDRESS, matcher.group(5));
h.put(FieldName.PUBLISHER, matcher.group(6));
} else if ((matcher = OvidImporter.BOOK_PATTERN.matcher(content)).find()) {
h.put(FieldName.YEAR, matcher.group(1));
h.put(FieldName.PAGES, matcher.group(2));
h.put(FieldName.ADDRESS, matcher.group(3));
h.put(FieldName.PUBLISHER, matcher.group(4));
}
// Add double hyphens to page ranges:
if (h.get(FieldName.PAGES) != null) {
h.put(FieldName.PAGES, h.get(FieldName.PAGES).replace("-", "--"));
}
} else if ("Abstract".equals(fieldName)) {
h.put(FieldName.ABSTRACT, content);
} else if ("Publication Type".equals(fieldName)) {
if (content.contains("Book")) {
h.put(BibEntry.TYPE_HEADER, "book");
} else if (content.contains("Journal")) {
h.put(BibEntry.TYPE_HEADER, "article");
} else if (content.contains("Conference Paper")) {
h.put(BibEntry.TYPE_HEADER, "inproceedings");
}
} else if (fieldName.startsWith("Language")) {
h.put(FieldName.LANGUAGE, content);
} else if (fieldName.startsWith("Author Keywords")) {
content = content.replace(";", ",").replace(" ", " ");
h.put(FieldName.KEYWORDS, content);
} else if (fieldName.startsWith("ISSN")) {
h.put(FieldName.ISSN, content);
} else if (fieldName.startsWith("DOI Number")) {
h.put(FieldName.DOI, content);
}
}
// Now we need to check if a book entry has given editors in the author field;
// if so, rearrange:
String auth = h.get(FieldName.AUTHOR);
if ((auth != null) && auth.contains(" [Ed]")) {
h.remove(FieldName.AUTHOR);
h.put(FieldName.EDITOR, auth.replace(" [Ed]", ""));
}
// Rearrange names properly:
auth = h.get(FieldName.AUTHOR);
if (auth != null) {
h.put(FieldName.AUTHOR, fixNames(auth));
}
auth = h.get(FieldName.EDITOR);
if (auth != null) {
h.put(FieldName.EDITOR, fixNames(auth));
}
// Set the entrytype properly:
String entryType = h.containsKey(BibEntry.TYPE_HEADER) ? h.get(BibEntry.TYPE_HEADER) : BibEntry.DEFAULT_TYPE;
h.remove(BibEntry.TYPE_HEADER);
if ("book".equals(entryType) && h.containsKey("chaptertitle")) {
// This means we have an "incollection" entry.
entryType = "incollection";
// Move the "chaptertitle" to just "title":
h.put(FieldName.TITLE, h.remove("chaptertitle"));
}
BibEntry b = new BibEntry(entryType);
b.setField(h);
bibitems.add(b);
}
return new ParserResult(bibitems);
}
use of org.jabref.model.entry.BibEntry in project jabref by JabRef.
the class PdfContentImporter method importDatabase.
@Override
public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
final ArrayList<BibEntry> result = new ArrayList<>(1);
try (FileInputStream fileStream = new FileInputStream(filePath.toFile());
PDDocument document = XMPUtil.loadWithAutomaticDecryption(fileStream)) {
String firstPageContents = getFirstPageContents(document);
Optional<DOI> doi = DOI.findInText(firstPageContents);
if (doi.isPresent()) {
ParserResult parserResult = new ParserResult(result);
Optional<BibEntry> entry = new DoiFetcher(importFormatPreferences).performSearchById(doi.get().getDOI());
entry.ifPresent(parserResult.getDatabase()::insertEntry);
return parserResult;
}
// idea: split[] contains the different lines
// blocks are separated by empty lines
// treat each block
// or do special treatment at authors (which are not broken)
// therefore, we do a line-based and not a block-based splitting
// i points to the current line
// curString (mostly) contains the current block
// the different lines are joined into one and thereby separated by " "
lines = firstPageContents.split(System.lineSeparator());
proceedToNextNonEmptyLine();
if (i >= lines.length) {
// return empty list
return new ParserResult();
}
// we start at the current line
curString = lines[i];
// i might get incremented later and curString modified, too
i = i + 1;
String author;
String editor = null;
String abstractT = null;
String keywords = null;
String title;
String conference = null;
String DOI = null;
String series = null;
String volume = null;
String number = null;
String pages = null;
// year is a class variable as the method extractYear() uses it;
String publisher = null;
EntryType type = BibtexEntryTypes.INPROCEEDINGS;
if (curString.length() > 4) {
// special case: possibly conference as first line on the page
extractYear();
if (curString.contains("Conference")) {
fillCurStringWithNonEmptyLines();
conference = curString;
curString = "";
} else {
// e.g. Copyright (c) 1998 by the Genetics Society of America
// future work: get year using RegEx
String lower = curString.toLowerCase(Locale.ROOT);
if (lower.contains("copyright")) {
fillCurStringWithNonEmptyLines();
publisher = curString;
curString = "";
}
}
}
// start: title
fillCurStringWithNonEmptyLines();
title = streamlineTitle(curString);
curString = "";
//i points to the next non-empty line
// after title: authors
author = null;
while ((i < lines.length) && !"".equals(lines[i])) {
// author names are unlikely to be lines among different lines
// treat them line by line
curString = streamlineNames(lines[i]);
if (author == null) {
author = curString;
} else {
if ("".equals(curString)) {
// if lines[i] is "and" then "" is returned by streamlineNames -> do nothing
} else {
author = author.concat(" and ").concat(curString);
}
}
i++;
}
curString = "";
i++;
// then, abstract and keywords follow
while (i < lines.length) {
curString = lines[i];
if ((curString.length() >= "Abstract".length()) && "Abstract".equalsIgnoreCase(curString.substring(0, "Abstract".length()))) {
if (curString.length() == "Abstract".length()) {
// only word "abstract" found -- skip line
curString = "";
} else {
curString = curString.substring("Abstract".length() + 1).trim().concat(System.lineSeparator());
}
i++;
// whereas we need linebreak as separator
while ((i < lines.length) && !"".equals(lines[i])) {
curString = curString.concat(lines[i]).concat(System.lineSeparator());
i++;
}
abstractT = curString.trim();
i++;
} else if ((curString.length() >= "Keywords".length()) && "Keywords".equalsIgnoreCase(curString.substring(0, "Keywords".length()))) {
if (curString.length() == "Keywords".length()) {
// only word "Keywords" found -- skip line
curString = "";
} else {
curString = curString.substring("Keywords".length() + 1).trim();
}
i++;
fillCurStringWithNonEmptyLines();
keywords = removeNonLettersAtEnd(curString);
} else {
String lower = curString.toLowerCase(Locale.ROOT);
int pos = lower.indexOf("technical");
if (pos >= 0) {
type = BibtexEntryTypes.TECHREPORT;
pos = curString.trim().lastIndexOf(' ');
if (pos >= 0) {
// assumption: last character of curString is NOT ' '
// otherwise pos+1 leads to an out-of-bounds exception
number = curString.substring(pos + 1);
}
}
i++;
proceedToNextNonEmptyLine();
}
}
i = lines.length - 1;
while (i >= 0) {
readLastBlock();
// i now points to the block before or is -1
// curString contains the last block, separated by " "
extractYear();
int pos = curString.indexOf("(Eds.)");
if ((pos >= 0) && (publisher == null)) {
// looks like a Springer last line
// e.g: A. Persson and J. Stirna (Eds.): PoEM 2009, LNBIP 39, pp. 161-175, 2009.
publisher = "Springer";
editor = streamlineNames(curString.substring(0, pos - 1));
//+2 because of ":" after (Eds.) and the subsequent space
curString = curString.substring(pos + "(Eds.)".length() + 2);
String[] springerSplit = curString.split(", ");
if (springerSplit.length >= 4) {
conference = springerSplit[0];
String seriesData = springerSplit[1];
int lastSpace = seriesData.lastIndexOf(' ');
series = seriesData.substring(0, lastSpace);
volume = seriesData.substring(lastSpace + 1);
pages = springerSplit[2].substring(4);
if (springerSplit[3].length() >= 4) {
year = springerSplit[3].substring(0, 4);
}
}
} else {
if (DOI == null) {
pos = curString.indexOf("DOI");
if (pos < 0) {
pos = curString.indexOf(FieldName.DOI);
}
if (pos >= 0) {
pos += 3;
char delimiter = curString.charAt(pos);
if ((delimiter == ':') || (delimiter == ' ')) {
pos++;
}
int nextSpace = curString.indexOf(' ', pos);
if (nextSpace > 0) {
DOI = curString.substring(pos, nextSpace);
} else {
DOI = curString.substring(pos);
}
}
}
if ((publisher == null) && curString.contains("IEEE")) {
// IEEE has the conference things at the end
publisher = "IEEE";
if (conference == null) {
pos = curString.indexOf('$');
if (pos > 0) {
// we found the price
// before the price, the ISSN is stated
// skip that
pos -= 2;
while ((pos >= 0) && (curString.charAt(pos) != ' ')) {
pos--;
}
if (pos > 0) {
conference = curString.substring(0, pos);
}
}
}
}
}
}
BibEntry entry = new BibEntry();
entry.setType(type);
if (author != null) {
entry.setField(FieldName.AUTHOR, author);
}
if (editor != null) {
entry.setField(FieldName.EDITOR, editor);
}
if (abstractT != null) {
entry.setField(FieldName.ABSTRACT, abstractT);
}
if (!Strings.isNullOrEmpty(keywords)) {
entry.setField(FieldName.KEYWORDS, keywords);
}
if (title != null) {
entry.setField(FieldName.TITLE, title);
}
if (conference != null) {
entry.setField(FieldName.BOOKTITLE, conference);
}
if (DOI != null) {
entry.setField(FieldName.DOI, DOI);
}
if (series != null) {
entry.setField(FieldName.SERIES, series);
}
if (volume != null) {
entry.setField(FieldName.VOLUME, volume);
}
if (number != null) {
entry.setField(FieldName.NUMBER, number);
}
if (pages != null) {
entry.setField(FieldName.PAGES, pages);
}
if (year != null) {
entry.setField(FieldName.YEAR, year);
}
if (publisher != null) {
entry.setField(FieldName.PUBLISHER, publisher);
}
result.add(entry);
} catch (EncryptedPdfsNotSupportedException e) {
return ParserResult.fromErrorMessage(Localization.lang("Decryption not supported."));
} catch (IOException exception) {
return ParserResult.fromError(exception);
} catch (FetcherException e) {
return ParserResult.fromErrorMessage(e.getMessage());
}
return new ParserResult(result);
}
Aggregations