Search in sources :

Example 6 with URLDownload

use of org.jabref.logic.net.URLDownload in project jabref by JabRef.

the class ACMPortalFetcher method downloadEntryBibTeX.

private static Optional<BibEntry> downloadEntryBibTeX(String id, boolean downloadAbstract) {
    try {
        URL url = new URL(ACMPortalFetcher.START_URL + ACMPortalFetcher.BIBTEX_URL + id + ACMPortalFetcher.BIBTEX_URL_END);
        URLConnection connection = url.openConnection();
        // set user-agent to avoid being blocked as a crawler
        connection.addRequestProperty("User-Agent", URLDownload.USER_AGENT);
        Collection<BibEntry> items = null;
        try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), StandardCharsets.UTF_8))) {
            String htmlCode = in.lines().filter(s -> !s.isEmpty()).collect(Collectors.joining());
            String bibtexString = htmlCode.substring(htmlCode.indexOf(START_BIBTEX_ENTRY), htmlCode.indexOf(END_BIBTEX_ENTRY_HTML));
            items = new BibtexParser(Globals.prefs.getImportFormatPreferences()).parseEntries(bibtexString);
        } catch (IOException | ParseException e) {
            LOGGER.info("Download of BibTeX information from ACM Portal failed.", e);
        }
        if ((items == null) || items.isEmpty()) {
            return Optional.empty();
        }
        BibEntry entry = items.iterator().next();
        //wait between requests or you will be blocked by ACM
        Thread.sleep(ACMPortalFetcher.WAIT_TIME);
        // get abstract
        if (downloadAbstract) {
            URLDownload dl = new URLDownload(ACMPortalFetcher.START_URL + ACMPortalFetcher.ABSTRACT_URL + id);
            String page = dl.asString(Globals.prefs.getDefaultEncoding());
            Matcher absM = ACMPortalFetcher.ABSTRACT_PATTERN.matcher(page);
            if (absM.find()) {
                entry.setField(FieldName.ABSTRACT, absM.group(1).trim());
            }
            //wait between requests or you will be blocked by ACM
            Thread.sleep(ACMPortalFetcher.WAIT_TIME);
        }
        return Optional.of(entry);
    } catch (NoSuchElementException e) {
        LOGGER.info("Bad BibTeX record read at: " + ACMPortalFetcher.BIBTEX_URL + id + ACMPortalFetcher.BIBTEX_URL_END, e);
    } catch (MalformedURLException e) {
        LOGGER.info("Malformed URL.", e);
    } catch (IOException e) {
        LOGGER.info("Cannot connect.", e);
    } catch (InterruptedException ignored) {
    // Ignored
    }
    return Optional.empty();
}
Also used : FetcherPreviewDialog(org.jabref.gui.importer.FetcherPreviewDialog) FieldName(org.jabref.model.entry.FieldName) URL(java.net.URL) HtmlToLatexFormatter(org.jabref.logic.formatter.bibtexfields.HtmlToLatexFormatter) OutputPrinter(org.jabref.logic.importer.OutputPrinter) JabRefPreferences(org.jabref.preferences.JabRefPreferences) URLDownload(org.jabref.logic.net.URLDownload) GridLayout(java.awt.GridLayout) LinkedHashMap(java.util.LinkedHashMap) Matcher(java.util.regex.Matcher) URLConnection(java.net.URLConnection) Map(java.util.Map) BibtexParser(org.jabref.logic.importer.fileformat.BibtexParser) Localization(org.jabref.logic.l10n.Localization) NoSuchElementException(java.util.NoSuchElementException) ProtectedTermsLoader(org.jabref.logic.protectedterms.ProtectedTermsLoader) ProtectTermsFormatter(org.jabref.logic.formatter.casechanger.ProtectTermsFormatter) HelpFile(org.jabref.logic.help.HelpFile) MalformedURLException(java.net.MalformedURLException) ButtonGroup(javax.swing.ButtonGroup) Collection(java.util.Collection) BibEntry(org.jabref.model.entry.BibEntry) IOException(java.io.IOException) JOptionPane(javax.swing.JOptionPane) InputStreamReader(java.io.InputStreamReader) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) JRadioButton(javax.swing.JRadioButton) Globals(org.jabref.Globals) ParseException(org.jabref.logic.importer.ParseException) Dimension(java.awt.Dimension) JLabel(javax.swing.JLabel) JCheckBox(javax.swing.JCheckBox) Optional(java.util.Optional) Log(org.apache.commons.logging.Log) BufferedReader(java.io.BufferedReader) Pattern(java.util.regex.Pattern) UnitsToLatexFormatter(org.jabref.logic.formatter.bibtexfields.UnitsToLatexFormatter) ImportInspector(org.jabref.logic.importer.ImportInspector) LogFactory(org.apache.commons.logging.LogFactory) JPanel(javax.swing.JPanel) BibEntry(org.jabref.model.entry.BibEntry) MalformedURLException(java.net.MalformedURLException) InputStreamReader(java.io.InputStreamReader) Matcher(java.util.regex.Matcher) BibtexParser(org.jabref.logic.importer.fileformat.BibtexParser) IOException(java.io.IOException) URLDownload(org.jabref.logic.net.URLDownload) URL(java.net.URL) URLConnection(java.net.URLConnection) BufferedReader(java.io.BufferedReader) ParseException(org.jabref.logic.importer.ParseException) NoSuchElementException(java.util.NoSuchElementException)

Example 7 with URLDownload

use of org.jabref.logic.net.URLDownload in project jabref by JabRef.

the class IEEE method findFullText.

@Override
public Optional<URL> findFullText(BibEntry entry) throws IOException {
    Objects.requireNonNull(entry);
    String stampString = "";
    // Try URL first -- will primarily work for entries from the old IEEE search
    Optional<String> urlString = entry.getField(FieldName.URL);
    if (urlString.isPresent()) {
        // Is the URL a direct link to IEEE?
        Matcher matcher = STAMP_PATTERN.matcher(urlString.get());
        if (matcher.find()) {
            // Found it
            stampString = matcher.group(1);
        }
    }
    // If not, try DOI
    if (stampString.isEmpty()) {
        Optional<DOI> doi = entry.getField(FieldName.DOI).flatMap(DOI::parse);
        if (doi.isPresent() && doi.get().getDOI().startsWith(IEEE_DOI) && doi.get().getExternalURI().isPresent()) {
            // Download the HTML page from IEEE
            String resolvedDOIPage = new URLDownload(doi.get().getExternalURI().get().toURL()).asString();
            // Try to find the link
            Matcher matcher = STAMP_PATTERN.matcher(resolvedDOIPage);
            if (matcher.find()) {
                // Found it
                stampString = matcher.group(1);
            }
        }
    }
    // Any success?
    if (stampString.isEmpty()) {
        return Optional.empty();
    }
    // Download the HTML page containing a frame with the PDF
    String framePage = new URLDownload(BASE_URL + stampString).asString();
    // Try to find the direct PDF link
    Matcher matcher = PDF_PATTERN.matcher(framePage);
    if (matcher.find()) {
        // The PDF was found
        LOGGER.debug("Full text document found on IEEE Xplore");
        return Optional.of(new URL(matcher.group(1)));
    }
    return Optional.empty();
}
Also used : Matcher(java.util.regex.Matcher) URLDownload(org.jabref.logic.net.URLDownload) URL(java.net.URL) DOI(org.jabref.model.entry.identifier.DOI)

Example 8 with URLDownload

use of org.jabref.logic.net.URLDownload in project jabref by JabRef.

the class GoogleScholar method downloadEntry.

private BibEntry downloadEntry(String link) throws IOException, FetcherException {
    String downloadedContent = new URLDownload(link).asString();
    BibtexParser parser = new BibtexParser(importFormatPreferences);
    ParserResult result = parser.parse(new StringReader(downloadedContent));
    if ((result == null) || (result.getDatabase() == null)) {
        throw new FetcherException("Parsing entries from Google Scholar bib file failed.");
    } else {
        Collection<BibEntry> entries = result.getDatabase().getEntries();
        if (entries.size() != 1) {
            LOGGER.debug(entries.size() + " entries found! (" + link + ")");
            throw new FetcherException("Parsing entries from Google Scholar bib file failed.");
        } else {
            BibEntry entry = entries.iterator().next();
            return entry;
        }
    }
}
Also used : ParserResult(org.jabref.logic.importer.ParserResult) BibEntry(org.jabref.model.entry.BibEntry) FetcherException(org.jabref.logic.importer.FetcherException) BibtexParser(org.jabref.logic.importer.fileformat.BibtexParser) StringReader(java.io.StringReader) URLDownload(org.jabref.logic.net.URLDownload)

Example 9 with URLDownload

use of org.jabref.logic.net.URLDownload in project jabref by JabRef.

the class GoogleScholar method addHitsFromQuery.

private void addHitsFromQuery(List<BibEntry> entryList, String queryURL) throws IOException, FetcherException {
    String content = new URLDownload(queryURL).asString();
    Matcher matcher = LINK_TO_BIB_PATTERN.matcher(content);
    while (matcher.find()) {
        String citationsPageURL = matcher.group().replace("&amp;", "&");
        BibEntry newEntry = downloadEntry(citationsPageURL);
        entryList.add(newEntry);
    }
}
Also used : BibEntry(org.jabref.model.entry.BibEntry) Matcher(java.util.regex.Matcher) URLDownload(org.jabref.logic.net.URLDownload)

Example 10 with URLDownload

use of org.jabref.logic.net.URLDownload in project jabref by JabRef.

the class BibsonomyScraper method getEntry.

/**
     * Return a BibEntry by looking up the given url from the BibSonomy scraper.
     * @param entryUrl
     * @return
     */
public static Optional<BibEntry> getEntry(String entryUrl, ImportFormatPreferences importFormatPreferences) {
    try {
        // Replace special characters by corresponding sequences:
        String cleanURL = entryUrl.replace("%", "%25").replace(":", "%3A").replace("/", "%2F").replace("?", "%3F").replace("&", "%26").replace("=", "%3D");
        URL url = new URL(BibsonomyScraper.BIBSONOMY_SCRAPER + cleanURL + BibsonomyScraper.BIBSONOMY_SCRAPER_POST);
        String bibtex = new URLDownload(url).asString();
        return BibtexParser.singleFromString(bibtex, importFormatPreferences);
    } catch (IOException ex) {
        LOGGER.warn("Could not download entry", ex);
        return Optional.empty();
    } catch (ParseException ex) {
        LOGGER.warn("Could not parse entry", ex);
        return Optional.empty();
    } catch (RuntimeException ex) {
        LOGGER.warn("Could not get entry", ex);
        return Optional.empty();
    }
}
Also used : IOException(java.io.IOException) ParseException(org.jabref.logic.importer.ParseException) URLDownload(org.jabref.logic.net.URLDownload) URL(java.net.URL)

Aggregations

URLDownload (org.jabref.logic.net.URLDownload)19 IOException (java.io.IOException)11 URL (java.net.URL)7 BibEntry (org.jabref.model.entry.BibEntry)7 Matcher (java.util.regex.Matcher)5 ParseException (org.jabref.logic.importer.ParseException)4 FetcherException (org.jabref.logic.importer.FetcherException)3 BibtexParser (org.jabref.logic.importer.fileformat.BibtexParser)3 DOI (org.jabref.model.entry.identifier.DOI)3 File (java.io.File)2 Path (java.nio.file.Path)2 LinkedHashMap (java.util.LinkedHashMap)2 Map (java.util.Map)2 Optional (java.util.Optional)2 JLabel (javax.swing.JLabel)2 ExternalFileType (org.jabref.gui.externalfiletype.ExternalFileType)2 OutputPrinter (org.jabref.logic.importer.OutputPrinter)2 ParserResult (org.jabref.logic.importer.ParserResult)2 Dimension (java.awt.Dimension)1 GridLayout (java.awt.GridLayout)1