use of org.jabref.logic.importer.ParserResult in project jabref by JabRef.
the class PdfContentImporter method importDatabase.
@Override
public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
final ArrayList<BibEntry> result = new ArrayList<>(1);
try (FileInputStream fileStream = new FileInputStream(filePath.toFile());
PDDocument document = XMPUtil.loadWithAutomaticDecryption(fileStream)) {
String firstPageContents = getFirstPageContents(document);
Optional<DOI> doi = DOI.findInText(firstPageContents);
if (doi.isPresent()) {
ParserResult parserResult = new ParserResult(result);
Optional<BibEntry> entry = new DoiFetcher(importFormatPreferences).performSearchById(doi.get().getDOI());
entry.ifPresent(parserResult.getDatabase()::insertEntry);
return parserResult;
}
// idea: split[] contains the different lines
// blocks are separated by empty lines
// treat each block
// or do special treatment at authors (which are not broken)
// therefore, we do a line-based and not a block-based splitting
// i points to the current line
// curString (mostly) contains the current block
// the different lines are joined into one and thereby separated by " "
lines = firstPageContents.split(System.lineSeparator());
proceedToNextNonEmptyLine();
if (i >= lines.length) {
// return empty list
return new ParserResult();
}
// we start at the current line
curString = lines[i];
// i might get incremented later and curString modified, too
i = i + 1;
String author;
String editor = null;
String abstractT = null;
String keywords = null;
String title;
String conference = null;
String DOI = null;
String series = null;
String volume = null;
String number = null;
String pages = null;
// year is a class variable as the method extractYear() uses it;
String publisher = null;
EntryType type = BibtexEntryTypes.INPROCEEDINGS;
if (curString.length() > 4) {
// special case: possibly conference as first line on the page
extractYear();
if (curString.contains("Conference")) {
fillCurStringWithNonEmptyLines();
conference = curString;
curString = "";
} else {
// e.g. Copyright (c) 1998 by the Genetics Society of America
// future work: get year using RegEx
String lower = curString.toLowerCase(Locale.ROOT);
if (lower.contains("copyright")) {
fillCurStringWithNonEmptyLines();
publisher = curString;
curString = "";
}
}
}
// start: title
fillCurStringWithNonEmptyLines();
title = streamlineTitle(curString);
curString = "";
//i points to the next non-empty line
// after title: authors
author = null;
while ((i < lines.length) && !"".equals(lines[i])) {
// author names are unlikely to be lines among different lines
// treat them line by line
curString = streamlineNames(lines[i]);
if (author == null) {
author = curString;
} else {
if ("".equals(curString)) {
// if lines[i] is "and" then "" is returned by streamlineNames -> do nothing
} else {
author = author.concat(" and ").concat(curString);
}
}
i++;
}
curString = "";
i++;
// then, abstract and keywords follow
while (i < lines.length) {
curString = lines[i];
if ((curString.length() >= "Abstract".length()) && "Abstract".equalsIgnoreCase(curString.substring(0, "Abstract".length()))) {
if (curString.length() == "Abstract".length()) {
// only word "abstract" found -- skip line
curString = "";
} else {
curString = curString.substring("Abstract".length() + 1).trim().concat(System.lineSeparator());
}
i++;
// whereas we need linebreak as separator
while ((i < lines.length) && !"".equals(lines[i])) {
curString = curString.concat(lines[i]).concat(System.lineSeparator());
i++;
}
abstractT = curString.trim();
i++;
} else if ((curString.length() >= "Keywords".length()) && "Keywords".equalsIgnoreCase(curString.substring(0, "Keywords".length()))) {
if (curString.length() == "Keywords".length()) {
// only word "Keywords" found -- skip line
curString = "";
} else {
curString = curString.substring("Keywords".length() + 1).trim();
}
i++;
fillCurStringWithNonEmptyLines();
keywords = removeNonLettersAtEnd(curString);
} else {
String lower = curString.toLowerCase(Locale.ROOT);
int pos = lower.indexOf("technical");
if (pos >= 0) {
type = BibtexEntryTypes.TECHREPORT;
pos = curString.trim().lastIndexOf(' ');
if (pos >= 0) {
// assumption: last character of curString is NOT ' '
// otherwise pos+1 leads to an out-of-bounds exception
number = curString.substring(pos + 1);
}
}
i++;
proceedToNextNonEmptyLine();
}
}
i = lines.length - 1;
while (i >= 0) {
readLastBlock();
// i now points to the block before or is -1
// curString contains the last block, separated by " "
extractYear();
int pos = curString.indexOf("(Eds.)");
if ((pos >= 0) && (publisher == null)) {
// looks like a Springer last line
// e.g: A. Persson and J. Stirna (Eds.): PoEM 2009, LNBIP 39, pp. 161-175, 2009.
publisher = "Springer";
editor = streamlineNames(curString.substring(0, pos - 1));
//+2 because of ":" after (Eds.) and the subsequent space
curString = curString.substring(pos + "(Eds.)".length() + 2);
String[] springerSplit = curString.split(", ");
if (springerSplit.length >= 4) {
conference = springerSplit[0];
String seriesData = springerSplit[1];
int lastSpace = seriesData.lastIndexOf(' ');
series = seriesData.substring(0, lastSpace);
volume = seriesData.substring(lastSpace + 1);
pages = springerSplit[2].substring(4);
if (springerSplit[3].length() >= 4) {
year = springerSplit[3].substring(0, 4);
}
}
} else {
if (DOI == null) {
pos = curString.indexOf("DOI");
if (pos < 0) {
pos = curString.indexOf(FieldName.DOI);
}
if (pos >= 0) {
pos += 3;
char delimiter = curString.charAt(pos);
if ((delimiter == ':') || (delimiter == ' ')) {
pos++;
}
int nextSpace = curString.indexOf(' ', pos);
if (nextSpace > 0) {
DOI = curString.substring(pos, nextSpace);
} else {
DOI = curString.substring(pos);
}
}
}
if ((publisher == null) && curString.contains("IEEE")) {
// IEEE has the conference things at the end
publisher = "IEEE";
if (conference == null) {
pos = curString.indexOf('$');
if (pos > 0) {
// we found the price
// before the price, the ISSN is stated
// skip that
pos -= 2;
while ((pos >= 0) && (curString.charAt(pos) != ' ')) {
pos--;
}
if (pos > 0) {
conference = curString.substring(0, pos);
}
}
}
}
}
}
BibEntry entry = new BibEntry();
entry.setType(type);
if (author != null) {
entry.setField(FieldName.AUTHOR, author);
}
if (editor != null) {
entry.setField(FieldName.EDITOR, editor);
}
if (abstractT != null) {
entry.setField(FieldName.ABSTRACT, abstractT);
}
if (!Strings.isNullOrEmpty(keywords)) {
entry.setField(FieldName.KEYWORDS, keywords);
}
if (title != null) {
entry.setField(FieldName.TITLE, title);
}
if (conference != null) {
entry.setField(FieldName.BOOKTITLE, conference);
}
if (DOI != null) {
entry.setField(FieldName.DOI, DOI);
}
if (series != null) {
entry.setField(FieldName.SERIES, series);
}
if (volume != null) {
entry.setField(FieldName.VOLUME, volume);
}
if (number != null) {
entry.setField(FieldName.NUMBER, number);
}
if (pages != null) {
entry.setField(FieldName.PAGES, pages);
}
if (year != null) {
entry.setField(FieldName.YEAR, year);
}
if (publisher != null) {
entry.setField(FieldName.PUBLISHER, publisher);
}
result.add(entry);
} catch (EncryptedPdfsNotSupportedException e) {
return ParserResult.fromErrorMessage(Localization.lang("Decryption not supported."));
} catch (IOException exception) {
return ParserResult.fromError(exception);
} catch (FetcherException e) {
return ParserResult.fromErrorMessage(e.getMessage());
}
return new ParserResult(result);
}
use of org.jabref.logic.importer.ParserResult in project jabref by JabRef.
the class RepecNepImporter method importDatabase.
@Override
public ParserResult importDatabase(BufferedReader reader) throws IOException {
Objects.requireNonNull(reader);
List<BibEntry> bibitems = new ArrayList<>();
String paperNoStr = null;
this.line = 0;
try {
// skip header and editor information
readLine(reader);
while (this.lastLine != null) {
if (this.lastLine.startsWith("-----------------------------")) {
this.inOverviewSection = this.preLine.startsWith("In this issue we have");
}
if (isStartOfWorkingPaper()) {
BibEntry be = new BibEntry();
be.setType("techreport");
paperNoStr = this.lastLine.substring(0, this.lastLine.indexOf('.'));
parseTitleString(be, reader);
if (startsWithKeyword(RepecNepImporter.RECOGNIZED_FIELDS)) {
parseAdditionalFields(be, false, reader);
} else {
// skip empty line
readLine(reader);
parseAuthors(be, reader);
// skip empty line
readLine(reader);
}
if (!startsWithKeyword(RepecNepImporter.RECOGNIZED_FIELDS)) {
parseAbstract(be, reader);
}
parseAdditionalFields(be, true, reader);
bibitems.add(be);
paperNoStr = null;
} else {
this.preLine = this.lastLine;
readLine(reader);
}
}
} catch (Exception e) {
String message = "Error in REPEC-NEP import on line " + this.line;
if (paperNoStr != null) {
message += ", paper no. " + paperNoStr + ": ";
}
message += e.getLocalizedMessage();
LOGGER.error(message, e);
return ParserResult.fromErrorMessage(message);
}
return new ParserResult(bibitems);
}
use of org.jabref.logic.importer.ParserResult in project jabref by JabRef.
the class SilverPlatterImporter method importDatabase.
@Override
public ParserResult importDatabase(BufferedReader reader) throws IOException {
List<BibEntry> bibitems = new ArrayList<>();
boolean isChapter = false;
String str;
StringBuilder sb = new StringBuilder();
while ((str = reader.readLine()) != null) {
if (str.length() < 2) {
sb.append("__::__").append(str);
} else {
sb.append("__NEWFIELD__").append(str);
}
}
String[] entries = sb.toString().split("__::__");
String type = "";
Map<String, String> h = new HashMap<>();
for (String entry : entries) {
if (entry.trim().length() < 6) {
continue;
}
h.clear();
String[] fields = entry.split("__NEWFIELD__");
for (String field : fields) {
if (field.length() < 6) {
continue;
}
String f3 = field.substring(0, 2);
String frest = field.substring(5);
if ("TI".equals(f3)) {
h.put(FieldName.TITLE, frest);
} else if ("AU".equals(f3)) {
if (frest.trim().endsWith("(ed)")) {
String ed = frest.trim();
ed = ed.substring(0, ed.length() - 4);
h.put(FieldName.EDITOR, AuthorList.fixAuthorLastNameFirst(ed.replace(",-", ", ").replace(";", " and ")));
} else {
h.put(FieldName.AUTHOR, AuthorList.fixAuthorLastNameFirst(frest.replace(",-", ", ").replace(";", " and ")));
}
} else if ("AB".equals(f3)) {
h.put(FieldName.ABSTRACT, frest);
} else if ("DE".equals(f3)) {
String kw = frest.replace("-;", ",").toLowerCase(Locale.ROOT);
h.put(FieldName.KEYWORDS, kw.substring(0, kw.length() - 1));
} else if ("SO".equals(f3)) {
int m = frest.indexOf('.');
if (m >= 0) {
String jr = frest.substring(0, m);
h.put(FieldName.JOURNAL, jr.replace("-", " "));
frest = frest.substring(m);
m = frest.indexOf(';');
if (m >= 5) {
String yr = frest.substring(m - 5, m).trim();
h.put(FieldName.YEAR, yr);
frest = frest.substring(m);
m = frest.indexOf(':');
int issueIndex = frest.indexOf('(');
int endIssueIndex = frest.indexOf(')');
if (m >= 0) {
String pg = frest.substring(m + 1).trim();
h.put(FieldName.PAGES, pg);
h.put(FieldName.VOLUME, frest.substring(1, issueIndex).trim());
h.put(FieldName.ISSUE, frest.substring(issueIndex + 1, endIssueIndex).trim());
}
}
}
} else if ("PB".equals(f3)) {
int m = frest.indexOf(':');
if (m >= 0) {
String jr = frest.substring(0, m);
h.put(FieldName.PUBLISHER, jr.replace("-", " ").trim());
frest = frest.substring(m);
m = frest.indexOf(", ");
if ((m + 2) < frest.length()) {
String yr = frest.substring(m + 2).trim();
try {
Integer.parseInt(yr);
h.put(FieldName.YEAR, yr);
} catch (NumberFormatException ex) {
// Let's assume that this wasn't a number, since it
// couldn't be parsed as an integer.
}
}
}
} else if ("AF".equals(f3)) {
h.put(FieldName.SCHOOL, frest.trim());
} else if ("DT".equals(f3)) {
frest = frest.trim();
if ("Monograph".equals(frest)) {
type = "book";
} else if (frest.startsWith("Dissertation")) {
type = "phdthesis";
} else if (frest.toLowerCase(Locale.ROOT).contains(FieldName.JOURNAL)) {
type = "article";
} else if ("Contribution".equals(frest) || "Chapter".equals(frest)) {
type = "incollection";
// This entry type contains page numbers and booktitle in the
// title field.
isChapter = true;
} else {
type = frest.replace(" ", "");
}
}
}
if (isChapter) {
String titleO = h.get(FieldName.TITLE);
if (titleO != null) {
String title = titleO.trim();
int inPos = title.indexOf("\" in ");
if (inPos > 1) {
h.put(FieldName.TITLE, title.substring(0, inPos));
}
}
}
BibEntry b = new BibEntry(type);
// create one here
b.setField(h);
bibitems.add(b);
}
return new ParserResult(bibitems);
}
use of org.jabref.logic.importer.ParserResult in project jabref by JabRef.
the class ModsImporter method importDatabase.
@Override
public ParserResult importDatabase(BufferedReader input) throws IOException {
Objects.requireNonNull(input);
List<BibEntry> bibItems = new ArrayList<>();
try {
if (context == null) {
context = JAXBContext.newInstance("org.jabref.logic.importer.fileformat.mods");
}
Unmarshaller unmarshaller = context.createUnmarshaller();
//The unmarshalled object is a jaxbElement.
JAXBElement<?> unmarshalledObject = (JAXBElement<?>) unmarshaller.unmarshal(input);
Optional<ModsCollectionDefinition> collection = getElement(unmarshalledObject.getValue(), ModsCollectionDefinition.class);
Optional<ModsDefinition> mods = getElement(unmarshalledObject.getValue(), ModsDefinition.class);
if (collection.isPresent()) {
List<ModsDefinition> modsDefinitions = collection.get().getMods();
parseModsCollection(bibItems, modsDefinitions);
} else if (mods.isPresent()) {
ModsDefinition modsDefinition = mods.get();
parseMods(bibItems, modsDefinition);
} else {
LOGGER.warn("Not expected root element found");
}
} catch (JAXBException e) {
LOGGER.debug("could not parse document", e);
return ParserResult.fromError(e);
}
return new ParserResult(bibItems);
}
use of org.jabref.logic.importer.ParserResult in project jabref by JabRef.
the class MrDLibImporter method parse.
/**
* Parses the input from the server to a ParserResult
* @param input A BufferedReader with a reference to a string with the servers response
* @throws IOException
*/
private void parse(BufferedReader input) throws IOException {
// The Bibdatabase that gets returned in the ParserResult.
BibDatabase bibDatabase = new BibDatabase();
// The document to parse
String recommendations = convertToString(input);
// The sorted BibEntries gets stored here later
List<BibEntry> bibEntries = new ArrayList<>();
//Parsing the response with a SAX parser
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
MrDlibImporterHandler handler = new MrDlibImporterHandler();
try (InputStream stream = new ByteArrayInputStream(recommendations.getBytes())) {
saxParser.parse(stream, handler);
} catch (SAXException e) {
LOGGER.error(e.getMessage(), e);
}
List<RankedBibEntry> rankedBibEntries = handler.getRankedBibEntries();
rankedBibEntries.sort((RankedBibEntry rankedBibEntry1, RankedBibEntry rankedBibEntry2) -> rankedBibEntry1.rank.compareTo(rankedBibEntry2.rank));
bibEntries = rankedBibEntries.stream().map(e -> e.entry).collect(Collectors.toList());
} catch (ParserConfigurationException | SAXException e) {
LOGGER.error(e.getMessage(), e);
}
for (BibEntry bibentry : bibEntries) {
bibDatabase.insertEntry(bibentry);
}
parserResult = new ParserResult(bibDatabase);
}
Aggregations