Search in sources :

Example 21 with ParserResult

use of org.jabref.logic.importer.ParserResult in project jabref by JabRef.

the class PdfContentImporter method importDatabase.

@Override
public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
    final ArrayList<BibEntry> result = new ArrayList<>(1);
    try (FileInputStream fileStream = new FileInputStream(filePath.toFile());
        PDDocument document = XMPUtil.loadWithAutomaticDecryption(fileStream)) {
        String firstPageContents = getFirstPageContents(document);
        Optional<DOI> doi = DOI.findInText(firstPageContents);
        if (doi.isPresent()) {
            ParserResult parserResult = new ParserResult(result);
            Optional<BibEntry> entry = new DoiFetcher(importFormatPreferences).performSearchById(doi.get().getDOI());
            entry.ifPresent(parserResult.getDatabase()::insertEntry);
            return parserResult;
        }
        // idea: split[] contains the different lines
        // blocks are separated by empty lines
        // treat each block
        //   or do special treatment at authors (which are not broken)
        //   therefore, we do a line-based and not a block-based splitting
        // i points to the current line
        // curString (mostly) contains the current block
        //   the different lines are joined into one and thereby separated by " "
        lines = firstPageContents.split(System.lineSeparator());
        proceedToNextNonEmptyLine();
        if (i >= lines.length) {
            // return empty list
            return new ParserResult();
        }
        // we start at the current line
        curString = lines[i];
        // i might get incremented later and curString modified, too
        i = i + 1;
        String author;
        String editor = null;
        String abstractT = null;
        String keywords = null;
        String title;
        String conference = null;
        String DOI = null;
        String series = null;
        String volume = null;
        String number = null;
        String pages = null;
        // year is a class variable as the method extractYear() uses it;
        String publisher = null;
        EntryType type = BibtexEntryTypes.INPROCEEDINGS;
        if (curString.length() > 4) {
            // special case: possibly conference as first line on the page
            extractYear();
            if (curString.contains("Conference")) {
                fillCurStringWithNonEmptyLines();
                conference = curString;
                curString = "";
            } else {
                // e.g. Copyright (c) 1998 by the Genetics Society of America
                // future work: get year using RegEx
                String lower = curString.toLowerCase(Locale.ROOT);
                if (lower.contains("copyright")) {
                    fillCurStringWithNonEmptyLines();
                    publisher = curString;
                    curString = "";
                }
            }
        }
        // start: title
        fillCurStringWithNonEmptyLines();
        title = streamlineTitle(curString);
        curString = "";
        //i points to the next non-empty line
        // after title: authors
        author = null;
        while ((i < lines.length) && !"".equals(lines[i])) {
            // author names are unlikely to be lines among different lines
            // treat them line by line
            curString = streamlineNames(lines[i]);
            if (author == null) {
                author = curString;
            } else {
                if ("".equals(curString)) {
                // if lines[i] is "and" then "" is returned by streamlineNames -> do nothing
                } else {
                    author = author.concat(" and ").concat(curString);
                }
            }
            i++;
        }
        curString = "";
        i++;
        // then, abstract and keywords follow
        while (i < lines.length) {
            curString = lines[i];
            if ((curString.length() >= "Abstract".length()) && "Abstract".equalsIgnoreCase(curString.substring(0, "Abstract".length()))) {
                if (curString.length() == "Abstract".length()) {
                    // only word "abstract" found -- skip line
                    curString = "";
                } else {
                    curString = curString.substring("Abstract".length() + 1).trim().concat(System.lineSeparator());
                }
                i++;
                // whereas we need linebreak as separator
                while ((i < lines.length) && !"".equals(lines[i])) {
                    curString = curString.concat(lines[i]).concat(System.lineSeparator());
                    i++;
                }
                abstractT = curString.trim();
                i++;
            } else if ((curString.length() >= "Keywords".length()) && "Keywords".equalsIgnoreCase(curString.substring(0, "Keywords".length()))) {
                if (curString.length() == "Keywords".length()) {
                    // only word "Keywords" found -- skip line
                    curString = "";
                } else {
                    curString = curString.substring("Keywords".length() + 1).trim();
                }
                i++;
                fillCurStringWithNonEmptyLines();
                keywords = removeNonLettersAtEnd(curString);
            } else {
                String lower = curString.toLowerCase(Locale.ROOT);
                int pos = lower.indexOf("technical");
                if (pos >= 0) {
                    type = BibtexEntryTypes.TECHREPORT;
                    pos = curString.trim().lastIndexOf(' ');
                    if (pos >= 0) {
                        // assumption: last character of curString is NOT ' '
                        //   otherwise pos+1 leads to an out-of-bounds exception
                        number = curString.substring(pos + 1);
                    }
                }
                i++;
                proceedToNextNonEmptyLine();
            }
        }
        i = lines.length - 1;
        while (i >= 0) {
            readLastBlock();
            // i now points to the block before or is -1
            // curString contains the last block, separated by " "
            extractYear();
            int pos = curString.indexOf("(Eds.)");
            if ((pos >= 0) && (publisher == null)) {
                // looks like a Springer last line
                // e.g: A. Persson and J. Stirna (Eds.): PoEM 2009, LNBIP 39, pp. 161-175, 2009.
                publisher = "Springer";
                editor = streamlineNames(curString.substring(0, pos - 1));
                //+2 because of ":" after (Eds.) and the subsequent space
                curString = curString.substring(pos + "(Eds.)".length() + 2);
                String[] springerSplit = curString.split(", ");
                if (springerSplit.length >= 4) {
                    conference = springerSplit[0];
                    String seriesData = springerSplit[1];
                    int lastSpace = seriesData.lastIndexOf(' ');
                    series = seriesData.substring(0, lastSpace);
                    volume = seriesData.substring(lastSpace + 1);
                    pages = springerSplit[2].substring(4);
                    if (springerSplit[3].length() >= 4) {
                        year = springerSplit[3].substring(0, 4);
                    }
                }
            } else {
                if (DOI == null) {
                    pos = curString.indexOf("DOI");
                    if (pos < 0) {
                        pos = curString.indexOf(FieldName.DOI);
                    }
                    if (pos >= 0) {
                        pos += 3;
                        char delimiter = curString.charAt(pos);
                        if ((delimiter == ':') || (delimiter == ' ')) {
                            pos++;
                        }
                        int nextSpace = curString.indexOf(' ', pos);
                        if (nextSpace > 0) {
                            DOI = curString.substring(pos, nextSpace);
                        } else {
                            DOI = curString.substring(pos);
                        }
                    }
                }
                if ((publisher == null) && curString.contains("IEEE")) {
                    // IEEE has the conference things at the end
                    publisher = "IEEE";
                    if (conference == null) {
                        pos = curString.indexOf('$');
                        if (pos > 0) {
                            // we found the price
                            // before the price, the ISSN is stated
                            // skip that
                            pos -= 2;
                            while ((pos >= 0) && (curString.charAt(pos) != ' ')) {
                                pos--;
                            }
                            if (pos > 0) {
                                conference = curString.substring(0, pos);
                            }
                        }
                    }
                }
            }
        }
        BibEntry entry = new BibEntry();
        entry.setType(type);
        if (author != null) {
            entry.setField(FieldName.AUTHOR, author);
        }
        if (editor != null) {
            entry.setField(FieldName.EDITOR, editor);
        }
        if (abstractT != null) {
            entry.setField(FieldName.ABSTRACT, abstractT);
        }
        if (!Strings.isNullOrEmpty(keywords)) {
            entry.setField(FieldName.KEYWORDS, keywords);
        }
        if (title != null) {
            entry.setField(FieldName.TITLE, title);
        }
        if (conference != null) {
            entry.setField(FieldName.BOOKTITLE, conference);
        }
        if (DOI != null) {
            entry.setField(FieldName.DOI, DOI);
        }
        if (series != null) {
            entry.setField(FieldName.SERIES, series);
        }
        if (volume != null) {
            entry.setField(FieldName.VOLUME, volume);
        }
        if (number != null) {
            entry.setField(FieldName.NUMBER, number);
        }
        if (pages != null) {
            entry.setField(FieldName.PAGES, pages);
        }
        if (year != null) {
            entry.setField(FieldName.YEAR, year);
        }
        if (publisher != null) {
            entry.setField(FieldName.PUBLISHER, publisher);
        }
        result.add(entry);
    } catch (EncryptedPdfsNotSupportedException e) {
        return ParserResult.fromErrorMessage(Localization.lang("Decryption not supported."));
    } catch (IOException exception) {
        return ParserResult.fromError(exception);
    } catch (FetcherException e) {
        return ParserResult.fromErrorMessage(e.getMessage());
    }
    return new ParserResult(result);
}
Also used : EncryptedPdfsNotSupportedException(org.jabref.logic.xmp.EncryptedPdfsNotSupportedException) BibEntry(org.jabref.model.entry.BibEntry) DoiFetcher(org.jabref.logic.importer.fetcher.DoiFetcher) ArrayList(java.util.ArrayList) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) ParserResult(org.jabref.logic.importer.ParserResult) FetcherException(org.jabref.logic.importer.FetcherException) EntryType(org.jabref.model.entry.EntryType) PDDocument(org.apache.pdfbox.pdmodel.PDDocument) DOI(org.jabref.model.entry.identifier.DOI)

Example 22 with ParserResult

use of org.jabref.logic.importer.ParserResult in project jabref by JabRef.

the class RepecNepImporter method importDatabase.

@Override
public ParserResult importDatabase(BufferedReader reader) throws IOException {
    Objects.requireNonNull(reader);
    List<BibEntry> bibitems = new ArrayList<>();
    String paperNoStr = null;
    this.line = 0;
    try {
        // skip header and editor information
        readLine(reader);
        while (this.lastLine != null) {
            if (this.lastLine.startsWith("-----------------------------")) {
                this.inOverviewSection = this.preLine.startsWith("In this issue we have");
            }
            if (isStartOfWorkingPaper()) {
                BibEntry be = new BibEntry();
                be.setType("techreport");
                paperNoStr = this.lastLine.substring(0, this.lastLine.indexOf('.'));
                parseTitleString(be, reader);
                if (startsWithKeyword(RepecNepImporter.RECOGNIZED_FIELDS)) {
                    parseAdditionalFields(be, false, reader);
                } else {
                    // skip empty line
                    readLine(reader);
                    parseAuthors(be, reader);
                    // skip empty line
                    readLine(reader);
                }
                if (!startsWithKeyword(RepecNepImporter.RECOGNIZED_FIELDS)) {
                    parseAbstract(be, reader);
                }
                parseAdditionalFields(be, true, reader);
                bibitems.add(be);
                paperNoStr = null;
            } else {
                this.preLine = this.lastLine;
                readLine(reader);
            }
        }
    } catch (Exception e) {
        String message = "Error in REPEC-NEP import on line " + this.line;
        if (paperNoStr != null) {
            message += ", paper no. " + paperNoStr + ": ";
        }
        message += e.getLocalizedMessage();
        LOGGER.error(message, e);
        return ParserResult.fromErrorMessage(message);
    }
    return new ParserResult(bibitems);
}
Also used : BibEntry(org.jabref.model.entry.BibEntry) ParserResult(org.jabref.logic.importer.ParserResult) ArrayList(java.util.ArrayList) IOException(java.io.IOException)

Example 23 with ParserResult

use of org.jabref.logic.importer.ParserResult in project jabref by JabRef.

the class SilverPlatterImporter method importDatabase.

@Override
public ParserResult importDatabase(BufferedReader reader) throws IOException {
    List<BibEntry> bibitems = new ArrayList<>();
    boolean isChapter = false;
    String str;
    StringBuilder sb = new StringBuilder();
    while ((str = reader.readLine()) != null) {
        if (str.length() < 2) {
            sb.append("__::__").append(str);
        } else {
            sb.append("__NEWFIELD__").append(str);
        }
    }
    String[] entries = sb.toString().split("__::__");
    String type = "";
    Map<String, String> h = new HashMap<>();
    for (String entry : entries) {
        if (entry.trim().length() < 6) {
            continue;
        }
        h.clear();
        String[] fields = entry.split("__NEWFIELD__");
        for (String field : fields) {
            if (field.length() < 6) {
                continue;
            }
            String f3 = field.substring(0, 2);
            String frest = field.substring(5);
            if ("TI".equals(f3)) {
                h.put(FieldName.TITLE, frest);
            } else if ("AU".equals(f3)) {
                if (frest.trim().endsWith("(ed)")) {
                    String ed = frest.trim();
                    ed = ed.substring(0, ed.length() - 4);
                    h.put(FieldName.EDITOR, AuthorList.fixAuthorLastNameFirst(ed.replace(",-", ", ").replace(";", " and ")));
                } else {
                    h.put(FieldName.AUTHOR, AuthorList.fixAuthorLastNameFirst(frest.replace(",-", ", ").replace(";", " and ")));
                }
            } else if ("AB".equals(f3)) {
                h.put(FieldName.ABSTRACT, frest);
            } else if ("DE".equals(f3)) {
                String kw = frest.replace("-;", ",").toLowerCase(Locale.ROOT);
                h.put(FieldName.KEYWORDS, kw.substring(0, kw.length() - 1));
            } else if ("SO".equals(f3)) {
                int m = frest.indexOf('.');
                if (m >= 0) {
                    String jr = frest.substring(0, m);
                    h.put(FieldName.JOURNAL, jr.replace("-", " "));
                    frest = frest.substring(m);
                    m = frest.indexOf(';');
                    if (m >= 5) {
                        String yr = frest.substring(m - 5, m).trim();
                        h.put(FieldName.YEAR, yr);
                        frest = frest.substring(m);
                        m = frest.indexOf(':');
                        int issueIndex = frest.indexOf('(');
                        int endIssueIndex = frest.indexOf(')');
                        if (m >= 0) {
                            String pg = frest.substring(m + 1).trim();
                            h.put(FieldName.PAGES, pg);
                            h.put(FieldName.VOLUME, frest.substring(1, issueIndex).trim());
                            h.put(FieldName.ISSUE, frest.substring(issueIndex + 1, endIssueIndex).trim());
                        }
                    }
                }
            } else if ("PB".equals(f3)) {
                int m = frest.indexOf(':');
                if (m >= 0) {
                    String jr = frest.substring(0, m);
                    h.put(FieldName.PUBLISHER, jr.replace("-", " ").trim());
                    frest = frest.substring(m);
                    m = frest.indexOf(", ");
                    if ((m + 2) < frest.length()) {
                        String yr = frest.substring(m + 2).trim();
                        try {
                            Integer.parseInt(yr);
                            h.put(FieldName.YEAR, yr);
                        } catch (NumberFormatException ex) {
                        // Let's assume that this wasn't a number, since it
                        // couldn't be parsed as an integer.
                        }
                    }
                }
            } else if ("AF".equals(f3)) {
                h.put(FieldName.SCHOOL, frest.trim());
            } else if ("DT".equals(f3)) {
                frest = frest.trim();
                if ("Monograph".equals(frest)) {
                    type = "book";
                } else if (frest.startsWith("Dissertation")) {
                    type = "phdthesis";
                } else if (frest.toLowerCase(Locale.ROOT).contains(FieldName.JOURNAL)) {
                    type = "article";
                } else if ("Contribution".equals(frest) || "Chapter".equals(frest)) {
                    type = "incollection";
                    // This entry type contains page numbers and booktitle in the
                    // title field.
                    isChapter = true;
                } else {
                    type = frest.replace(" ", "");
                }
            }
        }
        if (isChapter) {
            String titleO = h.get(FieldName.TITLE);
            if (titleO != null) {
                String title = titleO.trim();
                int inPos = title.indexOf("\" in ");
                if (inPos > 1) {
                    h.put(FieldName.TITLE, title.substring(0, inPos));
                }
            }
        }
        BibEntry b = new BibEntry(type);
        // create one here
        b.setField(h);
        bibitems.add(b);
    }
    return new ParserResult(bibitems);
}
Also used : BibEntry(org.jabref.model.entry.BibEntry) ParserResult(org.jabref.logic.importer.ParserResult) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList)

Example 24 with ParserResult

use of org.jabref.logic.importer.ParserResult in project jabref by JabRef.

the class ModsImporter method importDatabase.

@Override
public ParserResult importDatabase(BufferedReader input) throws IOException {
    Objects.requireNonNull(input);
    List<BibEntry> bibItems = new ArrayList<>();
    try {
        if (context == null) {
            context = JAXBContext.newInstance("org.jabref.logic.importer.fileformat.mods");
        }
        Unmarshaller unmarshaller = context.createUnmarshaller();
        //The unmarshalled object is a jaxbElement.
        JAXBElement<?> unmarshalledObject = (JAXBElement<?>) unmarshaller.unmarshal(input);
        Optional<ModsCollectionDefinition> collection = getElement(unmarshalledObject.getValue(), ModsCollectionDefinition.class);
        Optional<ModsDefinition> mods = getElement(unmarshalledObject.getValue(), ModsDefinition.class);
        if (collection.isPresent()) {
            List<ModsDefinition> modsDefinitions = collection.get().getMods();
            parseModsCollection(bibItems, modsDefinitions);
        } else if (mods.isPresent()) {
            ModsDefinition modsDefinition = mods.get();
            parseMods(bibItems, modsDefinition);
        } else {
            LOGGER.warn("Not expected root element found");
        }
    } catch (JAXBException e) {
        LOGGER.debug("could not parse document", e);
        return ParserResult.fromError(e);
    }
    return new ParserResult(bibItems);
}
Also used : BibEntry(org.jabref.model.entry.BibEntry) ModsCollectionDefinition(org.jabref.logic.importer.fileformat.mods.ModsCollectionDefinition) ModsDefinition(org.jabref.logic.importer.fileformat.mods.ModsDefinition) JAXBException(javax.xml.bind.JAXBException) ArrayList(java.util.ArrayList) JAXBElement(javax.xml.bind.JAXBElement) ParserResult(org.jabref.logic.importer.ParserResult) Unmarshaller(javax.xml.bind.Unmarshaller)

Example 25 with ParserResult

use of org.jabref.logic.importer.ParserResult in project jabref by JabRef.

the class MrDLibImporter method parse.

/**
     * Parses the input from the server to a ParserResult
     * @param input A BufferedReader with a reference to a string with the servers response
     * @throws IOException
     */
private void parse(BufferedReader input) throws IOException {
    // The Bibdatabase that gets returned in the ParserResult.
    BibDatabase bibDatabase = new BibDatabase();
    // The document to parse
    String recommendations = convertToString(input);
    // The sorted BibEntries gets stored here later
    List<BibEntry> bibEntries = new ArrayList<>();
    //Parsing the response with a SAX parser
    try {
        SAXParserFactory factory = SAXParserFactory.newInstance();
        SAXParser saxParser = factory.newSAXParser();
        MrDlibImporterHandler handler = new MrDlibImporterHandler();
        try (InputStream stream = new ByteArrayInputStream(recommendations.getBytes())) {
            saxParser.parse(stream, handler);
        } catch (SAXException e) {
            LOGGER.error(e.getMessage(), e);
        }
        List<RankedBibEntry> rankedBibEntries = handler.getRankedBibEntries();
        rankedBibEntries.sort((RankedBibEntry rankedBibEntry1, RankedBibEntry rankedBibEntry2) -> rankedBibEntry1.rank.compareTo(rankedBibEntry2.rank));
        bibEntries = rankedBibEntries.stream().map(e -> e.entry).collect(Collectors.toList());
    } catch (ParserConfigurationException | SAXException e) {
        LOGGER.error(e.getMessage(), e);
    }
    for (BibEntry bibentry : bibEntries) {
        bibDatabase.insertEntry(bibentry);
    }
    parserResult = new ParserResult(bibDatabase);
}
Also used : BibEntry(org.jabref.model.entry.BibEntry) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) SAXException(org.xml.sax.SAXException) ParserResult(org.jabref.logic.importer.ParserResult) ByteArrayInputStream(java.io.ByteArrayInputStream) SAXParser(javax.xml.parsers.SAXParser) ParserConfigurationException(javax.xml.parsers.ParserConfigurationException) BibDatabase(org.jabref.model.database.BibDatabase) SAXParserFactory(javax.xml.parsers.SAXParserFactory)

Aggregations

ParserResult (org.jabref.logic.importer.ParserResult)196 Test (org.junit.Test)145 BibEntry (org.jabref.model.entry.BibEntry)131 StringReader (java.io.StringReader)130 BibtexParser (org.jabref.logic.importer.fileformat.BibtexParser)38 BibtexString (org.jabref.model.entry.BibtexString)30 ArrayList (java.util.ArrayList)23 BibDatabase (org.jabref.model.database.BibDatabase)20 Path (java.nio.file.Path)14 IOException (java.io.IOException)12 StringWriter (java.io.StringWriter)12 File (java.io.File)10 InputStreamReader (java.io.InputStreamReader)10 HashMap (java.util.HashMap)10 BibDatabaseContext (org.jabref.model.database.BibDatabaseContext)9 InputStream (java.io.InputStream)8 Defaults (org.jabref.model.Defaults)8 Charset (java.nio.charset.Charset)6 Scanner (java.util.Scanner)5 BufferedReader (java.io.BufferedReader)4