use of org.jabref.logic.importer.ParserResult in project jabref by JabRef.
the class BibtexParser method initializeParserResult.
private void initializeParserResult() {
database = new BibDatabase();
// To store custom entry types parsed.
entryTypes = new HashMap<>();
parserResult = new ParserResult(database, new MetaData(), entryTypes);
}
use of org.jabref.logic.importer.ParserResult in project jabref by JabRef.
the class CopacImporter method importDatabase.
@Override
public ParserResult importDatabase(BufferedReader reader) throws IOException {
Objects.requireNonNull(reader);
List<String> entries = new LinkedList<>();
StringBuilder sb = new StringBuilder();
// Preprocess entries
String str;
while ((str = reader.readLine()) != null) {
if (str.length() < 4) {
continue;
}
String code = str.substring(0, 4);
if (" ".equals(code)) {
sb.append(' ').append(str.trim());
} else {
// begining of a new item
if ("TI- ".equals(str.substring(0, 4))) {
if (sb.length() > 0) {
entries.add(sb.toString());
}
sb = new StringBuilder();
}
sb.append('\n').append(str);
}
}
if (sb.length() > 0) {
entries.add(sb.toString());
}
List<BibEntry> results = new LinkedList<>();
for (String entry : entries) {
// Copac does not contain enough information on the type of the
// document. A book is assumed.
BibEntry b = new BibEntry("book");
String[] lines = entry.split("\n");
for (String line1 : lines) {
String line = line1.trim();
if (line.length() < 4) {
continue;
}
String code = line.substring(0, 4);
if ("TI- ".equals(code)) {
setOrAppend(b, FieldName.TITLE, line.substring(4).trim(), ", ");
} else if ("AU- ".equals(code)) {
setOrAppend(b, FieldName.AUTHOR, line.substring(4).trim(), " and ");
} else if ("PY- ".equals(code)) {
setOrAppend(b, FieldName.YEAR, line.substring(4).trim(), ", ");
} else if ("PU- ".equals(code)) {
setOrAppend(b, FieldName.PUBLISHER, line.substring(4).trim(), ", ");
} else if ("SE- ".equals(code)) {
setOrAppend(b, FieldName.SERIES, line.substring(4).trim(), ", ");
} else if ("IS- ".equals(code)) {
setOrAppend(b, FieldName.ISBN, line.substring(4).trim(), ", ");
} else if ("KW- ".equals(code)) {
setOrAppend(b, FieldName.KEYWORDS, line.substring(4).trim(), ", ");
} else if ("NT- ".equals(code)) {
setOrAppend(b, FieldName.NOTE, line.substring(4).trim(), ", ");
} else if ("PD- ".equals(code)) {
setOrAppend(b, "physicaldimensions", line.substring(4).trim(), ", ");
} else if ("DT- ".equals(code)) {
setOrAppend(b, "documenttype", line.substring(4).trim(), ", ");
} else {
setOrAppend(b, code.substring(0, 2), line.substring(4).trim(), ", ");
}
}
results.add(b);
}
return new ParserResult(results);
}
use of org.jabref.logic.importer.ParserResult in project jabref by JabRef.
the class EndnoteImporter method importDatabase.
@Override
public ParserResult importDatabase(BufferedReader reader) throws IOException {
List<BibEntry> bibitems = new ArrayList<>();
StringBuilder sb = new StringBuilder();
String str;
boolean first = true;
while ((str = reader.readLine()) != null) {
str = str.trim();
if (str.indexOf("%0") == 0) {
if (first) {
first = false;
} else {
sb.append(ENDOFRECORD);
}
sb.append(str);
} else {
sb.append(str);
}
sb.append('\n');
}
String[] entries = sb.toString().split(ENDOFRECORD);
Map<String, String> hm = new HashMap<>();
String author;
String type;
String editor;
String artnum;
for (String entry : entries) {
hm.clear();
author = "";
type = BibEntry.DEFAULT_TYPE;
editor = "";
artnum = "";
boolean isEditedBook = false;
String[] fields = entry.trim().substring(1).split("\n%");
for (String field : fields) {
if (field.length() < 3) {
continue;
}
/*
* Details of Refer format for Journal Article and Book:
*
* Generic Ref Journal Article Book Code Author %A Author Author Year %D
* Year Year Title %T Title Title Secondary Author %E Series Editor
* Secondary Title %B Journal Series Title Place Published %C City
* Publisher %I Publisher Volume %V Volume Volume Number of Volumes %6
* Number of Volumes Number %N Issue Pages %P Pages Number of Pages
* Edition %7 Edition Subsidiary Author %? Translator Alternate Title %J
* Alternate Journal Label %F Label Label Keywords %K Keywords Keywords
* Abstract %X Abstract Abstract Notes %O Notes Notes
*/
String prefix = field.substring(0, 1);
String val = field.substring(2);
if ("A".equals(prefix)) {
if ("".equals(author)) {
author = val;
} else {
author += " and " + val;
}
} else if ("E".equals(prefix)) {
if ("".equals(editor)) {
editor = val;
} else {
editor += " and " + val;
}
} else if ("T".equals(prefix)) {
hm.put(FieldName.TITLE, val);
} else if ("0".equals(prefix)) {
if (val.indexOf("Journal") == 0) {
type = "article";
} else if (val.indexOf("Book Section") == 0) {
type = "incollection";
} else if (val.indexOf("Book") == 0) {
type = "book";
} else if (val.indexOf("Edited Book") == 0) {
type = "book";
isEditedBook = true;
} else if (val.indexOf("Conference") == 0) {
type = "inproceedings";
} else if (val.indexOf("Report") == 0) {
type = "techreport";
} else if (val.indexOf("Review") == 0) {
type = "article";
} else if (val.indexOf("Thesis") == 0) {
type = "phdthesis";
} else {
//
type = BibEntry.DEFAULT_TYPE;
}
} else if ("7".equals(prefix)) {
hm.put(FieldName.EDITION, val);
} else if ("C".equals(prefix)) {
hm.put(FieldName.ADDRESS, val);
} else if ("D".equals(prefix)) {
hm.put(FieldName.YEAR, val);
} else if ("8".equals(prefix)) {
hm.put(FieldName.DATE, val);
} else if ("J".equals(prefix)) {
// "Alternate journal. Let's set it only if no journal
// has been set with %B.
hm.putIfAbsent(FieldName.JOURNAL, val);
} else if ("B".equals(prefix)) {
// "series" in a book entry.
if ("article".equals(type)) {
hm.put(FieldName.JOURNAL, val);
} else if ("book".equals(type) || "inbook".equals(type)) {
hm.put(FieldName.SERIES, val);
} else {
/* type = inproceedings */
hm.put(FieldName.BOOKTITLE, val);
}
} else if ("I".equals(prefix)) {
if ("phdthesis".equals(type)) {
hm.put(FieldName.SCHOOL, val);
} else {
hm.put(FieldName.PUBLISHER, val);
}
} else // replace single dash page ranges (23-45) with double dashes (23--45):
if ("P".equals(prefix)) {
hm.put(FieldName.PAGES, val.replaceAll("([0-9]) *- *([0-9])", "$1--$2"));
} else if ("V".equals(prefix)) {
hm.put(FieldName.VOLUME, val);
} else if ("N".equals(prefix)) {
hm.put(FieldName.NUMBER, val);
} else if ("U".equals(prefix)) {
hm.put(FieldName.URL, val);
} else if ("R".equals(prefix)) {
String doi = val;
if (doi.startsWith("doi:")) {
doi = doi.substring(4);
}
hm.put(FieldName.DOI, doi);
} else if ("O".equals(prefix)) {
// Notes may contain Article number
if (val.startsWith("Artn")) {
String[] tokens = val.split("\\s");
artnum = tokens[1];
} else {
hm.put(FieldName.NOTE, val);
}
} else if ("K".equals(prefix)) {
hm.put(FieldName.KEYWORDS, val);
} else if ("X".equals(prefix)) {
hm.put(FieldName.ABSTRACT, val);
} else if ("9".equals(prefix)) {
if (val.indexOf("Ph.D.") == 0) {
type = "phdthesis";
}
if (val.indexOf("Masters") == 0) {
type = "mastersthesis";
}
} else if ("F".equals(prefix)) {
hm.put(BibEntry.KEY_FIELD, BibtexKeyPatternUtil.checkLegalKey(val, preferences.getBibtexKeyPatternPreferences().isEnforceLegalKey()));
}
}
// We want them in the editor field so that bibtex knows it's an edited book
if (isEditedBook && "".equals(editor)) {
editor = author;
author = "";
}
//fixauthorscomma
if (!"".equals(author)) {
hm.put(FieldName.AUTHOR, fixAuthor(author));
}
if (!"".equals(editor)) {
hm.put(FieldName.EDITOR, fixAuthor(editor));
}
//if pages missing and article number given, use the article number
if (((hm.get(FieldName.PAGES) == null) || "-".equals(hm.get(FieldName.PAGES))) && !"".equals(artnum)) {
hm.put(FieldName.PAGES, artnum);
}
BibEntry b = new BibEntry(type);
b.setField(hm);
if (!b.getFieldNames().isEmpty()) {
bibitems.add(b);
}
}
return new ParserResult(bibitems);
}
use of org.jabref.logic.importer.ParserResult in project jabref by JabRef.
the class MedlineFetcher method fetchMedline.
/**
* Fetch and parse an medline item from eutils.ncbi.nlm.nih.gov.
* The E-utilities generate a huge XML file containing all entries for the ids
*
* @param ids A list of IDs to search for.
* @return Will return an empty list on error.
*/
private List<BibEntry> fetchMedline(List<String> ids) throws FetcherException {
try {
//Separate the IDs with a comma to search multiple entries
URL fetchURL = getURLForID(String.join(",", ids));
URLConnection data = fetchURL.openConnection();
ParserResult result = new MedlineImporter().importDatabase(new BufferedReader(new InputStreamReader(data.getInputStream(), StandardCharsets.UTF_8)));
if (result.hasWarnings()) {
LOGGER.warn(result.getErrorMessage());
}
List<BibEntry> resultList = result.getDatabase().getEntries();
resultList.forEach(this::doPostCleanup);
return resultList;
} catch (URISyntaxException | MalformedURLException e) {
throw new FetcherException("Error while generating fetch URL", Localization.lang("Error while generating fetch URL"), e);
} catch (IOException e) {
throw new FetcherException("Error while fetching from Medline", Localization.lang("Error while fetching from %0", "Medline"), e);
}
}
use of org.jabref.logic.importer.ParserResult in project jabref by JabRef.
the class OvidImporter method importDatabase.
@Override
public ParserResult importDatabase(BufferedReader reader) throws IOException {
List<BibEntry> bibitems = new ArrayList<>();
StringBuilder sb = new StringBuilder();
String line;
while ((line = reader.readLine()) != null) {
if (!line.isEmpty() && (line.charAt(0) != ' ')) {
sb.append("__NEWFIELD__");
}
sb.append(line);
sb.append('\n');
}
String[] items = sb.toString().split(OVID_PATTERN_STRING);
for (int i = 1; i < items.length; i++) {
Map<String, String> h = new HashMap<>();
String[] fields = items[i].split("__NEWFIELD__");
for (String field : fields) {
int linebreak = field.indexOf('\n');
String fieldName = field.substring(0, linebreak).trim();
String content = field.substring(linebreak).trim();
// Check if this is the author field (due to a minor special treatment for this field):
boolean isAuthor = (fieldName.indexOf("Author") == 0) && !fieldName.contains("Author Keywords") && !fieldName.contains("Author e-mail");
// in which case a dot at the end could be significant:
if (!isAuthor && content.endsWith(".")) {
content = content.substring(0, content.length() - 1);
}
if (isAuthor) {
h.put(FieldName.AUTHOR, content);
} else if (fieldName.startsWith("Title")) {
content = content.replaceAll("\\[.+\\]", "").trim();
if (content.endsWith(".")) {
content = content.substring(0, content.length() - 1);
}
h.put(FieldName.TITLE, content);
} else if (fieldName.startsWith("Chapter Title")) {
h.put("chaptertitle", content);
} else if (fieldName.startsWith("Source")) {
Matcher matcher;
if ((matcher = OvidImporter.OVID_SOURCE_PATTERN.matcher(content)).find()) {
h.put(FieldName.JOURNAL, matcher.group(1));
h.put(FieldName.VOLUME, matcher.group(2));
h.put(FieldName.ISSUE, matcher.group(3));
h.put(FieldName.PAGES, matcher.group(4));
h.put(FieldName.YEAR, matcher.group(5));
} else if ((matcher = OvidImporter.OVID_SOURCE_PATTERN_NO_ISSUE.matcher(content)).find()) {
// may be missing the issue
h.put(FieldName.JOURNAL, matcher.group(1));
h.put(FieldName.VOLUME, matcher.group(2));
h.put(FieldName.PAGES, matcher.group(3));
h.put(FieldName.YEAR, matcher.group(4));
} else if ((matcher = OvidImporter.OVID_SOURCE_PATTERN_2.matcher(content)).find()) {
h.put(FieldName.JOURNAL, matcher.group(1));
h.put(FieldName.VOLUME, matcher.group(2));
h.put(FieldName.ISSUE, matcher.group(3));
h.put(FieldName.MONTH, matcher.group(4));
h.put(FieldName.YEAR, matcher.group(5));
h.put(FieldName.PAGES, matcher.group(6));
} else if ((matcher = OvidImporter.INCOLLECTION_PATTERN.matcher(content)).find()) {
h.put(FieldName.EDITOR, matcher.group(1).replace(" (Ed)", ""));
h.put(FieldName.YEAR, matcher.group(2));
h.put(FieldName.BOOKTITLE, matcher.group(3));
h.put(FieldName.PAGES, matcher.group(4));
h.put(FieldName.ADDRESS, matcher.group(5));
h.put(FieldName.PUBLISHER, matcher.group(6));
} else if ((matcher = OvidImporter.BOOK_PATTERN.matcher(content)).find()) {
h.put(FieldName.YEAR, matcher.group(1));
h.put(FieldName.PAGES, matcher.group(2));
h.put(FieldName.ADDRESS, matcher.group(3));
h.put(FieldName.PUBLISHER, matcher.group(4));
}
// Add double hyphens to page ranges:
if (h.get(FieldName.PAGES) != null) {
h.put(FieldName.PAGES, h.get(FieldName.PAGES).replace("-", "--"));
}
} else if ("Abstract".equals(fieldName)) {
h.put(FieldName.ABSTRACT, content);
} else if ("Publication Type".equals(fieldName)) {
if (content.contains("Book")) {
h.put(BibEntry.TYPE_HEADER, "book");
} else if (content.contains("Journal")) {
h.put(BibEntry.TYPE_HEADER, "article");
} else if (content.contains("Conference Paper")) {
h.put(BibEntry.TYPE_HEADER, "inproceedings");
}
} else if (fieldName.startsWith("Language")) {
h.put(FieldName.LANGUAGE, content);
} else if (fieldName.startsWith("Author Keywords")) {
content = content.replace(";", ",").replace(" ", " ");
h.put(FieldName.KEYWORDS, content);
} else if (fieldName.startsWith("ISSN")) {
h.put(FieldName.ISSN, content);
} else if (fieldName.startsWith("DOI Number")) {
h.put(FieldName.DOI, content);
}
}
// Now we need to check if a book entry has given editors in the author field;
// if so, rearrange:
String auth = h.get(FieldName.AUTHOR);
if ((auth != null) && auth.contains(" [Ed]")) {
h.remove(FieldName.AUTHOR);
h.put(FieldName.EDITOR, auth.replace(" [Ed]", ""));
}
// Rearrange names properly:
auth = h.get(FieldName.AUTHOR);
if (auth != null) {
h.put(FieldName.AUTHOR, fixNames(auth));
}
auth = h.get(FieldName.EDITOR);
if (auth != null) {
h.put(FieldName.EDITOR, fixNames(auth));
}
// Set the entrytype properly:
String entryType = h.containsKey(BibEntry.TYPE_HEADER) ? h.get(BibEntry.TYPE_HEADER) : BibEntry.DEFAULT_TYPE;
h.remove(BibEntry.TYPE_HEADER);
if ("book".equals(entryType) && h.containsKey("chaptertitle")) {
// This means we have an "incollection" entry.
entryType = "incollection";
// Move the "chaptertitle" to just "title":
h.put(FieldName.TITLE, h.remove("chaptertitle"));
}
BibEntry b = new BibEntry(entryType);
b.setField(h);
bibitems.add(b);
}
return new ParserResult(bibitems);
}
Aggregations