Search in sources :

Example 1 with EncryptedPdfsNotSupportedException

use of org.jabref.logic.xmp.EncryptedPdfsNotSupportedException in project jabref by JabRef.

the class PdfContentImporter method importDatabase.

@Override
public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
    final ArrayList<BibEntry> result = new ArrayList<>(1);
    try (FileInputStream fileStream = new FileInputStream(filePath.toFile());
        PDDocument document = XMPUtil.loadWithAutomaticDecryption(fileStream)) {
        String firstPageContents = getFirstPageContents(document);
        Optional<DOI> doi = DOI.findInText(firstPageContents);
        if (doi.isPresent()) {
            ParserResult parserResult = new ParserResult(result);
            Optional<BibEntry> entry = new DoiFetcher(importFormatPreferences).performSearchById(doi.get().getDOI());
            entry.ifPresent(parserResult.getDatabase()::insertEntry);
            return parserResult;
        }
        // idea: split[] contains the different lines
        // blocks are separated by empty lines
        // treat each block
        //   or do special treatment at authors (which are not broken)
        //   therefore, we do a line-based and not a block-based splitting
        // i points to the current line
        // curString (mostly) contains the current block
        //   the different lines are joined into one and thereby separated by " "
        lines = firstPageContents.split(System.lineSeparator());
        proceedToNextNonEmptyLine();
        if (i >= lines.length) {
            // return empty list
            return new ParserResult();
        }
        // we start at the current line
        curString = lines[i];
        // i might get incremented later and curString modified, too
        i = i + 1;
        String author;
        String editor = null;
        String abstractT = null;
        String keywords = null;
        String title;
        String conference = null;
        String DOI = null;
        String series = null;
        String volume = null;
        String number = null;
        String pages = null;
        // year is a class variable as the method extractYear() uses it;
        String publisher = null;
        EntryType type = BibtexEntryTypes.INPROCEEDINGS;
        if (curString.length() > 4) {
            // special case: possibly conference as first line on the page
            extractYear();
            if (curString.contains("Conference")) {
                fillCurStringWithNonEmptyLines();
                conference = curString;
                curString = "";
            } else {
                // e.g. Copyright (c) 1998 by the Genetics Society of America
                // future work: get year using RegEx
                String lower = curString.toLowerCase(Locale.ROOT);
                if (lower.contains("copyright")) {
                    fillCurStringWithNonEmptyLines();
                    publisher = curString;
                    curString = "";
                }
            }
        }
        // start: title
        fillCurStringWithNonEmptyLines();
        title = streamlineTitle(curString);
        curString = "";
        //i points to the next non-empty line
        // after title: authors
        author = null;
        while ((i < lines.length) && !"".equals(lines[i])) {
            // author names are unlikely to be lines among different lines
            // treat them line by line
            curString = streamlineNames(lines[i]);
            if (author == null) {
                author = curString;
            } else {
                if ("".equals(curString)) {
                // if lines[i] is "and" then "" is returned by streamlineNames -> do nothing
                } else {
                    author = author.concat(" and ").concat(curString);
                }
            }
            i++;
        }
        curString = "";
        i++;
        // then, abstract and keywords follow
        while (i < lines.length) {
            curString = lines[i];
            if ((curString.length() >= "Abstract".length()) && "Abstract".equalsIgnoreCase(curString.substring(0, "Abstract".length()))) {
                if (curString.length() == "Abstract".length()) {
                    // only word "abstract" found -- skip line
                    curString = "";
                } else {
                    curString = curString.substring("Abstract".length() + 1).trim().concat(System.lineSeparator());
                }
                i++;
                // whereas we need linebreak as separator
                while ((i < lines.length) && !"".equals(lines[i])) {
                    curString = curString.concat(lines[i]).concat(System.lineSeparator());
                    i++;
                }
                abstractT = curString.trim();
                i++;
            } else if ((curString.length() >= "Keywords".length()) && "Keywords".equalsIgnoreCase(curString.substring(0, "Keywords".length()))) {
                if (curString.length() == "Keywords".length()) {
                    // only word "Keywords" found -- skip line
                    curString = "";
                } else {
                    curString = curString.substring("Keywords".length() + 1).trim();
                }
                i++;
                fillCurStringWithNonEmptyLines();
                keywords = removeNonLettersAtEnd(curString);
            } else {
                String lower = curString.toLowerCase(Locale.ROOT);
                int pos = lower.indexOf("technical");
                if (pos >= 0) {
                    type = BibtexEntryTypes.TECHREPORT;
                    pos = curString.trim().lastIndexOf(' ');
                    if (pos >= 0) {
                        // assumption: last character of curString is NOT ' '
                        //   otherwise pos+1 leads to an out-of-bounds exception
                        number = curString.substring(pos + 1);
                    }
                }
                i++;
                proceedToNextNonEmptyLine();
            }
        }
        i = lines.length - 1;
        while (i >= 0) {
            readLastBlock();
            // i now points to the block before or is -1
            // curString contains the last block, separated by " "
            extractYear();
            int pos = curString.indexOf("(Eds.)");
            if ((pos >= 0) && (publisher == null)) {
                // looks like a Springer last line
                // e.g: A. Persson and J. Stirna (Eds.): PoEM 2009, LNBIP 39, pp. 161-175, 2009.
                publisher = "Springer";
                editor = streamlineNames(curString.substring(0, pos - 1));
                //+2 because of ":" after (Eds.) and the subsequent space
                curString = curString.substring(pos + "(Eds.)".length() + 2);
                String[] springerSplit = curString.split(", ");
                if (springerSplit.length >= 4) {
                    conference = springerSplit[0];
                    String seriesData = springerSplit[1];
                    int lastSpace = seriesData.lastIndexOf(' ');
                    series = seriesData.substring(0, lastSpace);
                    volume = seriesData.substring(lastSpace + 1);
                    pages = springerSplit[2].substring(4);
                    if (springerSplit[3].length() >= 4) {
                        year = springerSplit[3].substring(0, 4);
                    }
                }
            } else {
                if (DOI == null) {
                    pos = curString.indexOf("DOI");
                    if (pos < 0) {
                        pos = curString.indexOf(FieldName.DOI);
                    }
                    if (pos >= 0) {
                        pos += 3;
                        char delimiter = curString.charAt(pos);
                        if ((delimiter == ':') || (delimiter == ' ')) {
                            pos++;
                        }
                        int nextSpace = curString.indexOf(' ', pos);
                        if (nextSpace > 0) {
                            DOI = curString.substring(pos, nextSpace);
                        } else {
                            DOI = curString.substring(pos);
                        }
                    }
                }
                if ((publisher == null) && curString.contains("IEEE")) {
                    // IEEE has the conference things at the end
                    publisher = "IEEE";
                    if (conference == null) {
                        pos = curString.indexOf('$');
                        if (pos > 0) {
                            // we found the price
                            // before the price, the ISSN is stated
                            // skip that
                            pos -= 2;
                            while ((pos >= 0) && (curString.charAt(pos) != ' ')) {
                                pos--;
                            }
                            if (pos > 0) {
                                conference = curString.substring(0, pos);
                            }
                        }
                    }
                }
            }
        }
        BibEntry entry = new BibEntry();
        entry.setType(type);
        if (author != null) {
            entry.setField(FieldName.AUTHOR, author);
        }
        if (editor != null) {
            entry.setField(FieldName.EDITOR, editor);
        }
        if (abstractT != null) {
            entry.setField(FieldName.ABSTRACT, abstractT);
        }
        if (!Strings.isNullOrEmpty(keywords)) {
            entry.setField(FieldName.KEYWORDS, keywords);
        }
        if (title != null) {
            entry.setField(FieldName.TITLE, title);
        }
        if (conference != null) {
            entry.setField(FieldName.BOOKTITLE, conference);
        }
        if (DOI != null) {
            entry.setField(FieldName.DOI, DOI);
        }
        if (series != null) {
            entry.setField(FieldName.SERIES, series);
        }
        if (volume != null) {
            entry.setField(FieldName.VOLUME, volume);
        }
        if (number != null) {
            entry.setField(FieldName.NUMBER, number);
        }
        if (pages != null) {
            entry.setField(FieldName.PAGES, pages);
        }
        if (year != null) {
            entry.setField(FieldName.YEAR, year);
        }
        if (publisher != null) {
            entry.setField(FieldName.PUBLISHER, publisher);
        }
        result.add(entry);
    } catch (EncryptedPdfsNotSupportedException e) {
        return ParserResult.fromErrorMessage(Localization.lang("Decryption not supported."));
    } catch (IOException exception) {
        return ParserResult.fromError(exception);
    } catch (FetcherException e) {
        return ParserResult.fromErrorMessage(e.getMessage());
    }
    return new ParserResult(result);
}
Also used : EncryptedPdfsNotSupportedException(org.jabref.logic.xmp.EncryptedPdfsNotSupportedException) BibEntry(org.jabref.model.entry.BibEntry) DoiFetcher(org.jabref.logic.importer.fetcher.DoiFetcher) ArrayList(java.util.ArrayList) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) ParserResult(org.jabref.logic.importer.ParserResult) FetcherException(org.jabref.logic.importer.FetcherException) EntryType(org.jabref.model.entry.EntryType) PDDocument(org.apache.pdfbox.pdmodel.PDDocument) DOI(org.jabref.model.entry.identifier.DOI)

Aggregations

FileInputStream (java.io.FileInputStream)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 PDDocument (org.apache.pdfbox.pdmodel.PDDocument)1 FetcherException (org.jabref.logic.importer.FetcherException)1 ParserResult (org.jabref.logic.importer.ParserResult)1 DoiFetcher (org.jabref.logic.importer.fetcher.DoiFetcher)1 EncryptedPdfsNotSupportedException (org.jabref.logic.xmp.EncryptedPdfsNotSupportedException)1 BibEntry (org.jabref.model.entry.BibEntry)1 EntryType (org.jabref.model.entry.EntryType)1 DOI (org.jabref.model.entry.identifier.DOI)1