Search in sources :

Example 76 with BibEntry

use of org.jabref.model.entry.BibEntry in project jabref by JabRef.

the class GvkParser method parseEntries.

private List<BibEntry> parseEntries(Document content) {
    List<BibEntry> result = new LinkedList<>();
    // used for creating test cases
    // XMLUtil.printDocument(content);
    // Namespace srwNamespace = Namespace.getNamespace("srw","http://www.loc.gov/zing/srw/");
    // Schleife ueber allen Teilergebnissen
    //Element root = content.getDocumentElement();
    Element root = (Element) content.getElementsByTagName("zs:searchRetrieveResponse").item(0);
    Element srwrecords = getChild("zs:records", root);
    if (srwrecords == null) {
        // no records found -> return empty list
        return result;
    }
    List<Element> records = getChildren("zs:record", srwrecords);
    for (Element record : records) {
        Element e = getChild("zs:recordData", record);
        if (e != null) {
            e = getChild("record", e);
            if (e != null) {
                result.add(parseEntry(e));
            }
        }
    }
    return result;
}
Also used : BibEntry(org.jabref.model.entry.BibEntry) Element(org.w3c.dom.Element) LinkedList(java.util.LinkedList)

Example 77 with BibEntry

use of org.jabref.model.entry.BibEntry in project jabref by JabRef.

the class EntryBasedParserFetcher method performSearch.

@Override
default default List<BibEntry> performSearch(BibEntry entry) throws FetcherException {
    Objects.requireNonNull(entry);
    try (InputStream stream = new BufferedInputStream(getURLForEntry(entry).openStream())) {
        List<BibEntry> fetchedEntries = getParser().parseEntries(stream);
        // Post-cleanup
        fetchedEntries.forEach(this::doPostCleanup);
        return fetchedEntries;
    } catch (URISyntaxException e) {
        throw new FetcherException("Search URI is malformed", e);
    } catch (IOException e) {
        // TODO: Catch HTTP Response 401 errors and report that user has no rights to access resource
        throw new FetcherException("An I/O exception occurred", e);
    } catch (ParseException e) {
        throw new FetcherException("An internal parser error occurred", e);
    }
}
Also used : BibEntry(org.jabref.model.entry.BibEntry) BufferedInputStream(java.io.BufferedInputStream) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException)

Example 78 with BibEntry

use of org.jabref.model.entry.BibEntry in project jabref by JabRef.

the class IdBasedParserFetcher method performSearchById.

@Override
default default Optional<BibEntry> performSearchById(String identifier) throws FetcherException {
    if (StringUtil.isBlank(identifier)) {
        return Optional.empty();
    }
    try (InputStream stream = new BufferedInputStream(getURLForID(identifier).openStream())) {
        List<BibEntry> fetchedEntries = getParser().parseEntries(stream);
        if (fetchedEntries.isEmpty()) {
            return Optional.empty();
        }
        if (fetchedEntries.size() > 1) {
            LOGGER.info("Fetcher " + getName() + "found more than one result for identifier " + identifier + ". We will use the first entry.");
        }
        BibEntry entry = fetchedEntries.get(0);
        // Post-cleanup
        doPostCleanup(entry);
        return Optional.of(entry);
    } catch (URISyntaxException e) {
        throw new FetcherException("Search URI is malformed", e);
    } catch (FileNotFoundException e) {
        LOGGER.debug("Id not found");
        return Optional.empty();
    } catch (IOException e) {
        // TODO: Catch HTTP Response 401 errors and report that user has no rights to access resource
        throw new FetcherException("An I/O exception occurred", e);
    } catch (ParseException e) {
        throw new FetcherException("An internal parser error occurred", e);
    }
}
Also used : BibEntry(org.jabref.model.entry.BibEntry) BufferedInputStream(java.io.BufferedInputStream) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) FileNotFoundException(java.io.FileNotFoundException) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException)

Example 79 with BibEntry

use of org.jabref.model.entry.BibEntry in project jabref by JabRef.

the class OvidImporter method importDatabase.

@Override
public ParserResult importDatabase(BufferedReader reader) throws IOException {
    List<BibEntry> bibitems = new ArrayList<>();
    StringBuilder sb = new StringBuilder();
    String line;
    while ((line = reader.readLine()) != null) {
        if (!line.isEmpty() && (line.charAt(0) != ' ')) {
            sb.append("__NEWFIELD__");
        }
        sb.append(line);
        sb.append('\n');
    }
    String[] items = sb.toString().split(OVID_PATTERN_STRING);
    for (int i = 1; i < items.length; i++) {
        Map<String, String> h = new HashMap<>();
        String[] fields = items[i].split("__NEWFIELD__");
        for (String field : fields) {
            int linebreak = field.indexOf('\n');
            String fieldName = field.substring(0, linebreak).trim();
            String content = field.substring(linebreak).trim();
            // Check if this is the author field (due to a minor special treatment for this field):
            boolean isAuthor = (fieldName.indexOf("Author") == 0) && !fieldName.contains("Author Keywords") && !fieldName.contains("Author e-mail");
            // in which case a dot at the end could be significant:
            if (!isAuthor && content.endsWith(".")) {
                content = content.substring(0, content.length() - 1);
            }
            if (isAuthor) {
                h.put(FieldName.AUTHOR, content);
            } else if (fieldName.startsWith("Title")) {
                content = content.replaceAll("\\[.+\\]", "").trim();
                if (content.endsWith(".")) {
                    content = content.substring(0, content.length() - 1);
                }
                h.put(FieldName.TITLE, content);
            } else if (fieldName.startsWith("Chapter Title")) {
                h.put("chaptertitle", content);
            } else if (fieldName.startsWith("Source")) {
                Matcher matcher;
                if ((matcher = OvidImporter.OVID_SOURCE_PATTERN.matcher(content)).find()) {
                    h.put(FieldName.JOURNAL, matcher.group(1));
                    h.put(FieldName.VOLUME, matcher.group(2));
                    h.put(FieldName.ISSUE, matcher.group(3));
                    h.put(FieldName.PAGES, matcher.group(4));
                    h.put(FieldName.YEAR, matcher.group(5));
                } else if ((matcher = OvidImporter.OVID_SOURCE_PATTERN_NO_ISSUE.matcher(content)).find()) {
                    // may be missing the issue
                    h.put(FieldName.JOURNAL, matcher.group(1));
                    h.put(FieldName.VOLUME, matcher.group(2));
                    h.put(FieldName.PAGES, matcher.group(3));
                    h.put(FieldName.YEAR, matcher.group(4));
                } else if ((matcher = OvidImporter.OVID_SOURCE_PATTERN_2.matcher(content)).find()) {
                    h.put(FieldName.JOURNAL, matcher.group(1));
                    h.put(FieldName.VOLUME, matcher.group(2));
                    h.put(FieldName.ISSUE, matcher.group(3));
                    h.put(FieldName.MONTH, matcher.group(4));
                    h.put(FieldName.YEAR, matcher.group(5));
                    h.put(FieldName.PAGES, matcher.group(6));
                } else if ((matcher = OvidImporter.INCOLLECTION_PATTERN.matcher(content)).find()) {
                    h.put(FieldName.EDITOR, matcher.group(1).replace(" (Ed)", ""));
                    h.put(FieldName.YEAR, matcher.group(2));
                    h.put(FieldName.BOOKTITLE, matcher.group(3));
                    h.put(FieldName.PAGES, matcher.group(4));
                    h.put(FieldName.ADDRESS, matcher.group(5));
                    h.put(FieldName.PUBLISHER, matcher.group(6));
                } else if ((matcher = OvidImporter.BOOK_PATTERN.matcher(content)).find()) {
                    h.put(FieldName.YEAR, matcher.group(1));
                    h.put(FieldName.PAGES, matcher.group(2));
                    h.put(FieldName.ADDRESS, matcher.group(3));
                    h.put(FieldName.PUBLISHER, matcher.group(4));
                }
                // Add double hyphens to page ranges:
                if (h.get(FieldName.PAGES) != null) {
                    h.put(FieldName.PAGES, h.get(FieldName.PAGES).replace("-", "--"));
                }
            } else if ("Abstract".equals(fieldName)) {
                h.put(FieldName.ABSTRACT, content);
            } else if ("Publication Type".equals(fieldName)) {
                if (content.contains("Book")) {
                    h.put(BibEntry.TYPE_HEADER, "book");
                } else if (content.contains("Journal")) {
                    h.put(BibEntry.TYPE_HEADER, "article");
                } else if (content.contains("Conference Paper")) {
                    h.put(BibEntry.TYPE_HEADER, "inproceedings");
                }
            } else if (fieldName.startsWith("Language")) {
                h.put(FieldName.LANGUAGE, content);
            } else if (fieldName.startsWith("Author Keywords")) {
                content = content.replace(";", ",").replace("  ", " ");
                h.put(FieldName.KEYWORDS, content);
            } else if (fieldName.startsWith("ISSN")) {
                h.put(FieldName.ISSN, content);
            } else if (fieldName.startsWith("DOI Number")) {
                h.put(FieldName.DOI, content);
            }
        }
        // Now we need to check if a book entry has given editors in the author field;
        // if so, rearrange:
        String auth = h.get(FieldName.AUTHOR);
        if ((auth != null) && auth.contains(" [Ed]")) {
            h.remove(FieldName.AUTHOR);
            h.put(FieldName.EDITOR, auth.replace(" [Ed]", ""));
        }
        // Rearrange names properly:
        auth = h.get(FieldName.AUTHOR);
        if (auth != null) {
            h.put(FieldName.AUTHOR, fixNames(auth));
        }
        auth = h.get(FieldName.EDITOR);
        if (auth != null) {
            h.put(FieldName.EDITOR, fixNames(auth));
        }
        // Set the entrytype properly:
        String entryType = h.containsKey(BibEntry.TYPE_HEADER) ? h.get(BibEntry.TYPE_HEADER) : BibEntry.DEFAULT_TYPE;
        h.remove(BibEntry.TYPE_HEADER);
        if ("book".equals(entryType) && h.containsKey("chaptertitle")) {
            // This means we have an "incollection" entry.
            entryType = "incollection";
            // Move the "chaptertitle" to just "title":
            h.put(FieldName.TITLE, h.remove("chaptertitle"));
        }
        BibEntry b = new BibEntry(entryType);
        b.setField(h);
        bibitems.add(b);
    }
    return new ParserResult(bibitems);
}
Also used : BibEntry(org.jabref.model.entry.BibEntry) ParserResult(org.jabref.logic.importer.ParserResult) HashMap(java.util.HashMap) Matcher(java.util.regex.Matcher) ArrayList(java.util.ArrayList)

Example 80 with BibEntry

use of org.jabref.model.entry.BibEntry in project jabref by JabRef.

the class PdfContentImporter method importDatabase.

@Override
public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
    final ArrayList<BibEntry> result = new ArrayList<>(1);
    try (FileInputStream fileStream = new FileInputStream(filePath.toFile());
        PDDocument document = XMPUtil.loadWithAutomaticDecryption(fileStream)) {
        String firstPageContents = getFirstPageContents(document);
        Optional<DOI> doi = DOI.findInText(firstPageContents);
        if (doi.isPresent()) {
            ParserResult parserResult = new ParserResult(result);
            Optional<BibEntry> entry = new DoiFetcher(importFormatPreferences).performSearchById(doi.get().getDOI());
            entry.ifPresent(parserResult.getDatabase()::insertEntry);
            return parserResult;
        }
        // idea: split[] contains the different lines
        // blocks are separated by empty lines
        // treat each block
        //   or do special treatment at authors (which are not broken)
        //   therefore, we do a line-based and not a block-based splitting
        // i points to the current line
        // curString (mostly) contains the current block
        //   the different lines are joined into one and thereby separated by " "
        lines = firstPageContents.split(System.lineSeparator());
        proceedToNextNonEmptyLine();
        if (i >= lines.length) {
            // return empty list
            return new ParserResult();
        }
        // we start at the current line
        curString = lines[i];
        // i might get incremented later and curString modified, too
        i = i + 1;
        String author;
        String editor = null;
        String abstractT = null;
        String keywords = null;
        String title;
        String conference = null;
        String DOI = null;
        String series = null;
        String volume = null;
        String number = null;
        String pages = null;
        // year is a class variable as the method extractYear() uses it;
        String publisher = null;
        EntryType type = BibtexEntryTypes.INPROCEEDINGS;
        if (curString.length() > 4) {
            // special case: possibly conference as first line on the page
            extractYear();
            if (curString.contains("Conference")) {
                fillCurStringWithNonEmptyLines();
                conference = curString;
                curString = "";
            } else {
                // e.g. Copyright (c) 1998 by the Genetics Society of America
                // future work: get year using RegEx
                String lower = curString.toLowerCase(Locale.ROOT);
                if (lower.contains("copyright")) {
                    fillCurStringWithNonEmptyLines();
                    publisher = curString;
                    curString = "";
                }
            }
        }
        // start: title
        fillCurStringWithNonEmptyLines();
        title = streamlineTitle(curString);
        curString = "";
        //i points to the next non-empty line
        // after title: authors
        author = null;
        while ((i < lines.length) && !"".equals(lines[i])) {
            // author names are unlikely to be lines among different lines
            // treat them line by line
            curString = streamlineNames(lines[i]);
            if (author == null) {
                author = curString;
            } else {
                if ("".equals(curString)) {
                // if lines[i] is "and" then "" is returned by streamlineNames -> do nothing
                } else {
                    author = author.concat(" and ").concat(curString);
                }
            }
            i++;
        }
        curString = "";
        i++;
        // then, abstract and keywords follow
        while (i < lines.length) {
            curString = lines[i];
            if ((curString.length() >= "Abstract".length()) && "Abstract".equalsIgnoreCase(curString.substring(0, "Abstract".length()))) {
                if (curString.length() == "Abstract".length()) {
                    // only word "abstract" found -- skip line
                    curString = "";
                } else {
                    curString = curString.substring("Abstract".length() + 1).trim().concat(System.lineSeparator());
                }
                i++;
                // whereas we need linebreak as separator
                while ((i < lines.length) && !"".equals(lines[i])) {
                    curString = curString.concat(lines[i]).concat(System.lineSeparator());
                    i++;
                }
                abstractT = curString.trim();
                i++;
            } else if ((curString.length() >= "Keywords".length()) && "Keywords".equalsIgnoreCase(curString.substring(0, "Keywords".length()))) {
                if (curString.length() == "Keywords".length()) {
                    // only word "Keywords" found -- skip line
                    curString = "";
                } else {
                    curString = curString.substring("Keywords".length() + 1).trim();
                }
                i++;
                fillCurStringWithNonEmptyLines();
                keywords = removeNonLettersAtEnd(curString);
            } else {
                String lower = curString.toLowerCase(Locale.ROOT);
                int pos = lower.indexOf("technical");
                if (pos >= 0) {
                    type = BibtexEntryTypes.TECHREPORT;
                    pos = curString.trim().lastIndexOf(' ');
                    if (pos >= 0) {
                        // assumption: last character of curString is NOT ' '
                        //   otherwise pos+1 leads to an out-of-bounds exception
                        number = curString.substring(pos + 1);
                    }
                }
                i++;
                proceedToNextNonEmptyLine();
            }
        }
        i = lines.length - 1;
        while (i >= 0) {
            readLastBlock();
            // i now points to the block before or is -1
            // curString contains the last block, separated by " "
            extractYear();
            int pos = curString.indexOf("(Eds.)");
            if ((pos >= 0) && (publisher == null)) {
                // looks like a Springer last line
                // e.g: A. Persson and J. Stirna (Eds.): PoEM 2009, LNBIP 39, pp. 161-175, 2009.
                publisher = "Springer";
                editor = streamlineNames(curString.substring(0, pos - 1));
                //+2 because of ":" after (Eds.) and the subsequent space
                curString = curString.substring(pos + "(Eds.)".length() + 2);
                String[] springerSplit = curString.split(", ");
                if (springerSplit.length >= 4) {
                    conference = springerSplit[0];
                    String seriesData = springerSplit[1];
                    int lastSpace = seriesData.lastIndexOf(' ');
                    series = seriesData.substring(0, lastSpace);
                    volume = seriesData.substring(lastSpace + 1);
                    pages = springerSplit[2].substring(4);
                    if (springerSplit[3].length() >= 4) {
                        year = springerSplit[3].substring(0, 4);
                    }
                }
            } else {
                if (DOI == null) {
                    pos = curString.indexOf("DOI");
                    if (pos < 0) {
                        pos = curString.indexOf(FieldName.DOI);
                    }
                    if (pos >= 0) {
                        pos += 3;
                        char delimiter = curString.charAt(pos);
                        if ((delimiter == ':') || (delimiter == ' ')) {
                            pos++;
                        }
                        int nextSpace = curString.indexOf(' ', pos);
                        if (nextSpace > 0) {
                            DOI = curString.substring(pos, nextSpace);
                        } else {
                            DOI = curString.substring(pos);
                        }
                    }
                }
                if ((publisher == null) && curString.contains("IEEE")) {
                    // IEEE has the conference things at the end
                    publisher = "IEEE";
                    if (conference == null) {
                        pos = curString.indexOf('$');
                        if (pos > 0) {
                            // we found the price
                            // before the price, the ISSN is stated
                            // skip that
                            pos -= 2;
                            while ((pos >= 0) && (curString.charAt(pos) != ' ')) {
                                pos--;
                            }
                            if (pos > 0) {
                                conference = curString.substring(0, pos);
                            }
                        }
                    }
                }
            }
        }
        BibEntry entry = new BibEntry();
        entry.setType(type);
        if (author != null) {
            entry.setField(FieldName.AUTHOR, author);
        }
        if (editor != null) {
            entry.setField(FieldName.EDITOR, editor);
        }
        if (abstractT != null) {
            entry.setField(FieldName.ABSTRACT, abstractT);
        }
        if (!Strings.isNullOrEmpty(keywords)) {
            entry.setField(FieldName.KEYWORDS, keywords);
        }
        if (title != null) {
            entry.setField(FieldName.TITLE, title);
        }
        if (conference != null) {
            entry.setField(FieldName.BOOKTITLE, conference);
        }
        if (DOI != null) {
            entry.setField(FieldName.DOI, DOI);
        }
        if (series != null) {
            entry.setField(FieldName.SERIES, series);
        }
        if (volume != null) {
            entry.setField(FieldName.VOLUME, volume);
        }
        if (number != null) {
            entry.setField(FieldName.NUMBER, number);
        }
        if (pages != null) {
            entry.setField(FieldName.PAGES, pages);
        }
        if (year != null) {
            entry.setField(FieldName.YEAR, year);
        }
        if (publisher != null) {
            entry.setField(FieldName.PUBLISHER, publisher);
        }
        result.add(entry);
    } catch (EncryptedPdfsNotSupportedException e) {
        return ParserResult.fromErrorMessage(Localization.lang("Decryption not supported."));
    } catch (IOException exception) {
        return ParserResult.fromError(exception);
    } catch (FetcherException e) {
        return ParserResult.fromErrorMessage(e.getMessage());
    }
    return new ParserResult(result);
}
Also used : EncryptedPdfsNotSupportedException(org.jabref.logic.xmp.EncryptedPdfsNotSupportedException) BibEntry(org.jabref.model.entry.BibEntry) DoiFetcher(org.jabref.logic.importer.fetcher.DoiFetcher) ArrayList(java.util.ArrayList) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) ParserResult(org.jabref.logic.importer.ParserResult) FetcherException(org.jabref.logic.importer.FetcherException) EntryType(org.jabref.model.entry.EntryType) PDDocument(org.apache.pdfbox.pdmodel.PDDocument) DOI(org.jabref.model.entry.identifier.DOI)

Aggregations

BibEntry (org.jabref.model.entry.BibEntry)716 Test (org.junit.Test)466 ParserResult (org.jabref.logic.importer.ParserResult)131 StringReader (java.io.StringReader)107 ArrayList (java.util.ArrayList)75 BibDatabase (org.jabref.model.database.BibDatabase)63 Path (java.nio.file.Path)52 IOException (java.io.IOException)43 HashMap (java.util.HashMap)37 Before (org.junit.Before)36 NamedCompound (org.jabref.gui.undo.NamedCompound)30 BibtexParser (org.jabref.logic.importer.fileformat.BibtexParser)28 BibtexString (org.jabref.model.entry.BibtexString)28 List (java.util.List)23 File (java.io.File)21 StringWriter (java.io.StringWriter)19 Optional (java.util.Optional)19 BasePanel (org.jabref.gui.BasePanel)19 FieldChange (org.jabref.model.FieldChange)18 InputStream (java.io.InputStream)16