Search in sources :

Example 16 with ParserResult

use of org.jabref.logic.importer.ParserResult in project jabref by JabRef.

the class BibtexParser method initializeParserResult.

private void initializeParserResult() {
    database = new BibDatabase();
    // To store custom entry types parsed.
    entryTypes = new HashMap<>();
    parserResult = new ParserResult(database, new MetaData(), entryTypes);
}
Also used : ParserResult(org.jabref.logic.importer.ParserResult) MetaData(org.jabref.model.metadata.MetaData) BibDatabase(org.jabref.model.database.BibDatabase)

Example 17 with ParserResult

use of org.jabref.logic.importer.ParserResult in project jabref by JabRef.

the class CopacImporter method importDatabase.

@Override
public ParserResult importDatabase(BufferedReader reader) throws IOException {
    Objects.requireNonNull(reader);
    List<String> entries = new LinkedList<>();
    StringBuilder sb = new StringBuilder();
    // Preprocess entries
    String str;
    while ((str = reader.readLine()) != null) {
        if (str.length() < 4) {
            continue;
        }
        String code = str.substring(0, 4);
        if ("    ".equals(code)) {
            sb.append(' ').append(str.trim());
        } else {
            // begining of a new item
            if ("TI- ".equals(str.substring(0, 4))) {
                if (sb.length() > 0) {
                    entries.add(sb.toString());
                }
                sb = new StringBuilder();
            }
            sb.append('\n').append(str);
        }
    }
    if (sb.length() > 0) {
        entries.add(sb.toString());
    }
    List<BibEntry> results = new LinkedList<>();
    for (String entry : entries) {
        // Copac does not contain enough information on the type of the
        // document. A book is assumed.
        BibEntry b = new BibEntry("book");
        String[] lines = entry.split("\n");
        for (String line1 : lines) {
            String line = line1.trim();
            if (line.length() < 4) {
                continue;
            }
            String code = line.substring(0, 4);
            if ("TI- ".equals(code)) {
                setOrAppend(b, FieldName.TITLE, line.substring(4).trim(), ", ");
            } else if ("AU- ".equals(code)) {
                setOrAppend(b, FieldName.AUTHOR, line.substring(4).trim(), " and ");
            } else if ("PY- ".equals(code)) {
                setOrAppend(b, FieldName.YEAR, line.substring(4).trim(), ", ");
            } else if ("PU- ".equals(code)) {
                setOrAppend(b, FieldName.PUBLISHER, line.substring(4).trim(), ", ");
            } else if ("SE- ".equals(code)) {
                setOrAppend(b, FieldName.SERIES, line.substring(4).trim(), ", ");
            } else if ("IS- ".equals(code)) {
                setOrAppend(b, FieldName.ISBN, line.substring(4).trim(), ", ");
            } else if ("KW- ".equals(code)) {
                setOrAppend(b, FieldName.KEYWORDS, line.substring(4).trim(), ", ");
            } else if ("NT- ".equals(code)) {
                setOrAppend(b, FieldName.NOTE, line.substring(4).trim(), ", ");
            } else if ("PD- ".equals(code)) {
                setOrAppend(b, "physicaldimensions", line.substring(4).trim(), ", ");
            } else if ("DT- ".equals(code)) {
                setOrAppend(b, "documenttype", line.substring(4).trim(), ", ");
            } else {
                setOrAppend(b, code.substring(0, 2), line.substring(4).trim(), ", ");
            }
        }
        results.add(b);
    }
    return new ParserResult(results);
}
Also used : BibEntry(org.jabref.model.entry.BibEntry) ParserResult(org.jabref.logic.importer.ParserResult) LinkedList(java.util.LinkedList)

Example 18 with ParserResult

use of org.jabref.logic.importer.ParserResult in project jabref by JabRef.

the class EndnoteImporter method importDatabase.

@Override
public ParserResult importDatabase(BufferedReader reader) throws IOException {
    List<BibEntry> bibitems = new ArrayList<>();
    StringBuilder sb = new StringBuilder();
    String str;
    boolean first = true;
    while ((str = reader.readLine()) != null) {
        str = str.trim();
        if (str.indexOf("%0") == 0) {
            if (first) {
                first = false;
            } else {
                sb.append(ENDOFRECORD);
            }
            sb.append(str);
        } else {
            sb.append(str);
        }
        sb.append('\n');
    }
    String[] entries = sb.toString().split(ENDOFRECORD);
    Map<String, String> hm = new HashMap<>();
    String author;
    String type;
    String editor;
    String artnum;
    for (String entry : entries) {
        hm.clear();
        author = "";
        type = BibEntry.DEFAULT_TYPE;
        editor = "";
        artnum = "";
        boolean isEditedBook = false;
        String[] fields = entry.trim().substring(1).split("\n%");
        for (String field : fields) {
            if (field.length() < 3) {
                continue;
            }
            /*
                 * Details of Refer format for Journal Article and Book:
                 *
                 * Generic Ref Journal Article Book Code Author %A Author Author Year %D
                 * Year Year Title %T Title Title Secondary Author %E Series Editor
                 * Secondary Title %B Journal Series Title Place Published %C City
                 * Publisher %I Publisher Volume %V Volume Volume Number of Volumes %6
                 * Number of Volumes Number %N Issue Pages %P Pages Number of Pages
                 * Edition %7 Edition Subsidiary Author %? Translator Alternate Title %J
                 * Alternate Journal Label %F Label Label Keywords %K Keywords Keywords
                 * Abstract %X Abstract Abstract Notes %O Notes Notes
                 */
            String prefix = field.substring(0, 1);
            String val = field.substring(2);
            if ("A".equals(prefix)) {
                if ("".equals(author)) {
                    author = val;
                } else {
                    author += " and " + val;
                }
            } else if ("E".equals(prefix)) {
                if ("".equals(editor)) {
                    editor = val;
                } else {
                    editor += " and " + val;
                }
            } else if ("T".equals(prefix)) {
                hm.put(FieldName.TITLE, val);
            } else if ("0".equals(prefix)) {
                if (val.indexOf("Journal") == 0) {
                    type = "article";
                } else if (val.indexOf("Book Section") == 0) {
                    type = "incollection";
                } else if (val.indexOf("Book") == 0) {
                    type = "book";
                } else if (val.indexOf("Edited Book") == 0) {
                    type = "book";
                    isEditedBook = true;
                } else if (val.indexOf("Conference") == 0) {
                    type = "inproceedings";
                } else if (val.indexOf("Report") == 0) {
                    type = "techreport";
                } else if (val.indexOf("Review") == 0) {
                    type = "article";
                } else if (val.indexOf("Thesis") == 0) {
                    type = "phdthesis";
                } else {
                    //
                    type = BibEntry.DEFAULT_TYPE;
                }
            } else if ("7".equals(prefix)) {
                hm.put(FieldName.EDITION, val);
            } else if ("C".equals(prefix)) {
                hm.put(FieldName.ADDRESS, val);
            } else if ("D".equals(prefix)) {
                hm.put(FieldName.YEAR, val);
            } else if ("8".equals(prefix)) {
                hm.put(FieldName.DATE, val);
            } else if ("J".equals(prefix)) {
                // "Alternate journal. Let's set it only if no journal
                // has been set with %B.
                hm.putIfAbsent(FieldName.JOURNAL, val);
            } else if ("B".equals(prefix)) {
                // "series" in a book entry.
                if ("article".equals(type)) {
                    hm.put(FieldName.JOURNAL, val);
                } else if ("book".equals(type) || "inbook".equals(type)) {
                    hm.put(FieldName.SERIES, val);
                } else {
                    /* type = inproceedings */
                    hm.put(FieldName.BOOKTITLE, val);
                }
            } else if ("I".equals(prefix)) {
                if ("phdthesis".equals(type)) {
                    hm.put(FieldName.SCHOOL, val);
                } else {
                    hm.put(FieldName.PUBLISHER, val);
                }
            } else // replace single dash page ranges (23-45) with double dashes (23--45):
            if ("P".equals(prefix)) {
                hm.put(FieldName.PAGES, val.replaceAll("([0-9]) *- *([0-9])", "$1--$2"));
            } else if ("V".equals(prefix)) {
                hm.put(FieldName.VOLUME, val);
            } else if ("N".equals(prefix)) {
                hm.put(FieldName.NUMBER, val);
            } else if ("U".equals(prefix)) {
                hm.put(FieldName.URL, val);
            } else if ("R".equals(prefix)) {
                String doi = val;
                if (doi.startsWith("doi:")) {
                    doi = doi.substring(4);
                }
                hm.put(FieldName.DOI, doi);
            } else if ("O".equals(prefix)) {
                // Notes may contain Article number
                if (val.startsWith("Artn")) {
                    String[] tokens = val.split("\\s");
                    artnum = tokens[1];
                } else {
                    hm.put(FieldName.NOTE, val);
                }
            } else if ("K".equals(prefix)) {
                hm.put(FieldName.KEYWORDS, val);
            } else if ("X".equals(prefix)) {
                hm.put(FieldName.ABSTRACT, val);
            } else if ("9".equals(prefix)) {
                if (val.indexOf("Ph.D.") == 0) {
                    type = "phdthesis";
                }
                if (val.indexOf("Masters") == 0) {
                    type = "mastersthesis";
                }
            } else if ("F".equals(prefix)) {
                hm.put(BibEntry.KEY_FIELD, BibtexKeyPatternUtil.checkLegalKey(val, preferences.getBibtexKeyPatternPreferences().isEnforceLegalKey()));
            }
        }
        // We want them in the editor field so that bibtex knows it's an edited book
        if (isEditedBook && "".equals(editor)) {
            editor = author;
            author = "";
        }
        //fixauthorscomma
        if (!"".equals(author)) {
            hm.put(FieldName.AUTHOR, fixAuthor(author));
        }
        if (!"".equals(editor)) {
            hm.put(FieldName.EDITOR, fixAuthor(editor));
        }
        //if pages missing and article number given, use the article number
        if (((hm.get(FieldName.PAGES) == null) || "-".equals(hm.get(FieldName.PAGES))) && !"".equals(artnum)) {
            hm.put(FieldName.PAGES, artnum);
        }
        BibEntry b = new BibEntry(type);
        b.setField(hm);
        if (!b.getFieldNames().isEmpty()) {
            bibitems.add(b);
        }
    }
    return new ParserResult(bibitems);
}
Also used : BibEntry(org.jabref.model.entry.BibEntry) ParserResult(org.jabref.logic.importer.ParserResult) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList)

Example 19 with ParserResult

use of org.jabref.logic.importer.ParserResult in project jabref by JabRef.

the class MedlineFetcher method fetchMedline.

/**
     * Fetch and parse an medline item from eutils.ncbi.nlm.nih.gov.
     * The E-utilities generate a huge XML file containing all entries for the ids
     *
     * @param ids A list of IDs to search for.
     * @return Will return an empty list on error.
     */
private List<BibEntry> fetchMedline(List<String> ids) throws FetcherException {
    try {
        //Separate the IDs with a comma to search multiple entries
        URL fetchURL = getURLForID(String.join(",", ids));
        URLConnection data = fetchURL.openConnection();
        ParserResult result = new MedlineImporter().importDatabase(new BufferedReader(new InputStreamReader(data.getInputStream(), StandardCharsets.UTF_8)));
        if (result.hasWarnings()) {
            LOGGER.warn(result.getErrorMessage());
        }
        List<BibEntry> resultList = result.getDatabase().getEntries();
        resultList.forEach(this::doPostCleanup);
        return resultList;
    } catch (URISyntaxException | MalformedURLException e) {
        throw new FetcherException("Error while generating fetch URL", Localization.lang("Error while generating fetch URL"), e);
    } catch (IOException e) {
        throw new FetcherException("Error while fetching from Medline", Localization.lang("Error while fetching from %0", "Medline"), e);
    }
}
Also used : BibEntry(org.jabref.model.entry.BibEntry) MalformedURLException(java.net.MalformedURLException) InputStreamReader(java.io.InputStreamReader) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException) URL(java.net.URL) URLConnection(java.net.URLConnection) ParserResult(org.jabref.logic.importer.ParserResult) FetcherException(org.jabref.logic.importer.FetcherException) BufferedReader(java.io.BufferedReader) MedlineImporter(org.jabref.logic.importer.fileformat.MedlineImporter)

Example 20 with ParserResult

use of org.jabref.logic.importer.ParserResult in project jabref by JabRef.

the class OvidImporter method importDatabase.

@Override
public ParserResult importDatabase(BufferedReader reader) throws IOException {
    List<BibEntry> bibitems = new ArrayList<>();
    StringBuilder sb = new StringBuilder();
    String line;
    while ((line = reader.readLine()) != null) {
        if (!line.isEmpty() && (line.charAt(0) != ' ')) {
            sb.append("__NEWFIELD__");
        }
        sb.append(line);
        sb.append('\n');
    }
    String[] items = sb.toString().split(OVID_PATTERN_STRING);
    for (int i = 1; i < items.length; i++) {
        Map<String, String> h = new HashMap<>();
        String[] fields = items[i].split("__NEWFIELD__");
        for (String field : fields) {
            int linebreak = field.indexOf('\n');
            String fieldName = field.substring(0, linebreak).trim();
            String content = field.substring(linebreak).trim();
            // Check if this is the author field (due to a minor special treatment for this field):
            boolean isAuthor = (fieldName.indexOf("Author") == 0) && !fieldName.contains("Author Keywords") && !fieldName.contains("Author e-mail");
            // in which case a dot at the end could be significant:
            if (!isAuthor && content.endsWith(".")) {
                content = content.substring(0, content.length() - 1);
            }
            if (isAuthor) {
                h.put(FieldName.AUTHOR, content);
            } else if (fieldName.startsWith("Title")) {
                content = content.replaceAll("\\[.+\\]", "").trim();
                if (content.endsWith(".")) {
                    content = content.substring(0, content.length() - 1);
                }
                h.put(FieldName.TITLE, content);
            } else if (fieldName.startsWith("Chapter Title")) {
                h.put("chaptertitle", content);
            } else if (fieldName.startsWith("Source")) {
                Matcher matcher;
                if ((matcher = OvidImporter.OVID_SOURCE_PATTERN.matcher(content)).find()) {
                    h.put(FieldName.JOURNAL, matcher.group(1));
                    h.put(FieldName.VOLUME, matcher.group(2));
                    h.put(FieldName.ISSUE, matcher.group(3));
                    h.put(FieldName.PAGES, matcher.group(4));
                    h.put(FieldName.YEAR, matcher.group(5));
                } else if ((matcher = OvidImporter.OVID_SOURCE_PATTERN_NO_ISSUE.matcher(content)).find()) {
                    // may be missing the issue
                    h.put(FieldName.JOURNAL, matcher.group(1));
                    h.put(FieldName.VOLUME, matcher.group(2));
                    h.put(FieldName.PAGES, matcher.group(3));
                    h.put(FieldName.YEAR, matcher.group(4));
                } else if ((matcher = OvidImporter.OVID_SOURCE_PATTERN_2.matcher(content)).find()) {
                    h.put(FieldName.JOURNAL, matcher.group(1));
                    h.put(FieldName.VOLUME, matcher.group(2));
                    h.put(FieldName.ISSUE, matcher.group(3));
                    h.put(FieldName.MONTH, matcher.group(4));
                    h.put(FieldName.YEAR, matcher.group(5));
                    h.put(FieldName.PAGES, matcher.group(6));
                } else if ((matcher = OvidImporter.INCOLLECTION_PATTERN.matcher(content)).find()) {
                    h.put(FieldName.EDITOR, matcher.group(1).replace(" (Ed)", ""));
                    h.put(FieldName.YEAR, matcher.group(2));
                    h.put(FieldName.BOOKTITLE, matcher.group(3));
                    h.put(FieldName.PAGES, matcher.group(4));
                    h.put(FieldName.ADDRESS, matcher.group(5));
                    h.put(FieldName.PUBLISHER, matcher.group(6));
                } else if ((matcher = OvidImporter.BOOK_PATTERN.matcher(content)).find()) {
                    h.put(FieldName.YEAR, matcher.group(1));
                    h.put(FieldName.PAGES, matcher.group(2));
                    h.put(FieldName.ADDRESS, matcher.group(3));
                    h.put(FieldName.PUBLISHER, matcher.group(4));
                }
                // Add double hyphens to page ranges:
                if (h.get(FieldName.PAGES) != null) {
                    h.put(FieldName.PAGES, h.get(FieldName.PAGES).replace("-", "--"));
                }
            } else if ("Abstract".equals(fieldName)) {
                h.put(FieldName.ABSTRACT, content);
            } else if ("Publication Type".equals(fieldName)) {
                if (content.contains("Book")) {
                    h.put(BibEntry.TYPE_HEADER, "book");
                } else if (content.contains("Journal")) {
                    h.put(BibEntry.TYPE_HEADER, "article");
                } else if (content.contains("Conference Paper")) {
                    h.put(BibEntry.TYPE_HEADER, "inproceedings");
                }
            } else if (fieldName.startsWith("Language")) {
                h.put(FieldName.LANGUAGE, content);
            } else if (fieldName.startsWith("Author Keywords")) {
                content = content.replace(";", ",").replace("  ", " ");
                h.put(FieldName.KEYWORDS, content);
            } else if (fieldName.startsWith("ISSN")) {
                h.put(FieldName.ISSN, content);
            } else if (fieldName.startsWith("DOI Number")) {
                h.put(FieldName.DOI, content);
            }
        }
        // Now we need to check if a book entry has given editors in the author field;
        // if so, rearrange:
        String auth = h.get(FieldName.AUTHOR);
        if ((auth != null) && auth.contains(" [Ed]")) {
            h.remove(FieldName.AUTHOR);
            h.put(FieldName.EDITOR, auth.replace(" [Ed]", ""));
        }
        // Rearrange names properly:
        auth = h.get(FieldName.AUTHOR);
        if (auth != null) {
            h.put(FieldName.AUTHOR, fixNames(auth));
        }
        auth = h.get(FieldName.EDITOR);
        if (auth != null) {
            h.put(FieldName.EDITOR, fixNames(auth));
        }
        // Set the entrytype properly:
        String entryType = h.containsKey(BibEntry.TYPE_HEADER) ? h.get(BibEntry.TYPE_HEADER) : BibEntry.DEFAULT_TYPE;
        h.remove(BibEntry.TYPE_HEADER);
        if ("book".equals(entryType) && h.containsKey("chaptertitle")) {
            // This means we have an "incollection" entry.
            entryType = "incollection";
            // Move the "chaptertitle" to just "title":
            h.put(FieldName.TITLE, h.remove("chaptertitle"));
        }
        BibEntry b = new BibEntry(entryType);
        b.setField(h);
        bibitems.add(b);
    }
    return new ParserResult(bibitems);
}
Also used : BibEntry(org.jabref.model.entry.BibEntry) ParserResult(org.jabref.logic.importer.ParserResult) HashMap(java.util.HashMap) Matcher(java.util.regex.Matcher) ArrayList(java.util.ArrayList)

Aggregations

ParserResult (org.jabref.logic.importer.ParserResult)196 Test (org.junit.Test)145 BibEntry (org.jabref.model.entry.BibEntry)131 StringReader (java.io.StringReader)130 BibtexParser (org.jabref.logic.importer.fileformat.BibtexParser)38 BibtexString (org.jabref.model.entry.BibtexString)30 ArrayList (java.util.ArrayList)23 BibDatabase (org.jabref.model.database.BibDatabase)20 Path (java.nio.file.Path)14 IOException (java.io.IOException)12 StringWriter (java.io.StringWriter)12 File (java.io.File)10 InputStreamReader (java.io.InputStreamReader)10 HashMap (java.util.HashMap)10 BibDatabaseContext (org.jabref.model.database.BibDatabaseContext)9 InputStream (java.io.InputStream)8 Defaults (org.jabref.model.Defaults)8 Charset (java.nio.charset.Charset)6 Scanner (java.util.Scanner)5 BufferedReader (java.io.BufferedReader)4