Search in sources :

Example 26 with Elements

use of org.jsoup.select.Elements in project Java-readability by basis-technology-corp.

the class Readability method getLinkDensity.

private double getLinkDensity(Element e) {
    Elements links = e.getElementsByTag("a");
    double textLength = e.text().length();
    double linkLength = 0;
    for (Element link : links) {
        linkLength += link.text().length();
    }
    return linkLength / textLength;
}
Also used : Element(org.jsoup.nodes.Element) Elements(org.jsoup.select.Elements)

Example 27 with Elements

use of org.jsoup.select.Elements in project Java-readability by basis-technology-corp.

the class Readability method clean.

/**
     * Clean a node of all elements of type "tag".
     * 
     * @param Element
     * @param string tag to clean
     **/
private void clean(Element e, String tag) {
    Elements targetList = e.getElementsByTag(tag);
    targetList.remove();
}
Also used : Elements(org.jsoup.select.Elements)

Example 28 with Elements

use of org.jsoup.select.Elements in project MusicDNA by harjot-oberai.

the class Genius method fromURL.

public static Lyrics fromURL(String url, String artist, String title) {
    Document lyricsPage;
    String text;
    try {
        lyricsPage = Jsoup.connect(url).userAgent(Net.USER_AGENT).get();
        Elements lyricsDiv = lyricsPage.select(".lyrics");
        if (lyricsDiv.isEmpty())
            throw new StringIndexOutOfBoundsException();
        else
            text = Jsoup.clean(lyricsDiv.html(), Whitelist.none().addTags("br")).trim();
    } catch (HttpStatusException e) {
        return new Lyrics(Lyrics.NO_RESULT);
    } catch (IOException | StringIndexOutOfBoundsException e) {
        e.printStackTrace();
        return new Lyrics(Lyrics.ERROR);
    }
    if (artist == null) {
        title = lyricsPage.getElementsByClass("text_title").get(0).text();
        artist = lyricsPage.getElementsByClass("text_artist").get(0).text();
    }
    Lyrics result = new Lyrics(Lyrics.POSITIVE_RESULT);
    if ("[Instrumental]".equals(text))
        result = new Lyrics(Lyrics.NEGATIVE_RESULT);
    Pattern pattern = Pattern.compile("\\[.+\\]");
    StringBuilder builder = new StringBuilder();
    for (String line : text.split("<br> ")) {
        String strippedLine = line.replaceAll("\\s", "");
        if (!pattern.matcher(strippedLine).matches() && !(strippedLine.isEmpty() && builder.length() == 0))
            builder.append(line.replaceAll("\\P{Print}", "")).append("<br/>");
    }
    if (builder.length() > 5)
        builder.delete(builder.length() - 5, builder.length());
    result.setArtist(artist);
    result.setTitle(title);
    result.setText(Normalizer.normalize(builder.toString(), Normalizer.Form.NFD));
    result.setURL(url);
    result.setSource("Genius");
    return result;
}
Also used : Pattern(java.util.regex.Pattern) HttpStatusException(org.jsoup.HttpStatusException) IOException(java.io.IOException) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements)

Example 29 with Elements

use of org.jsoup.select.Elements in project jstructure by JonStargaryen.

the class SiftsMappingAnnotator method mapGroup.

private ResidueMapping mapGroup(final Document document, final String chainId, final String pdbResidueNumber) {
    Element describingElement = mapToDescribingElement(document, chainId, pdbResidueNumber);
    Elements uniProtElements = describingElement.getElementsByAttributeValue("dbSource", "UniProt");
    if (!uniProtElements.isEmpty()) {
        Element uniProtElement = uniProtElements.first();
        return new ResidueMapping(this, uniProtElement.attr("dbResNum"));
    } else {
        logger.warn("could not retrieve UniProt mapping for " + chainId + "-" + pdbResidueNumber);
        return new ResidueMapping(this);
    }
}
Also used : Element(org.jsoup.nodes.Element) Elements(org.jsoup.select.Elements)

Example 30 with Elements

use of org.jsoup.select.Elements in project LeMondeRssReader by MBach.

the class ArticleActivity method extractComments.

private List<Model> extractComments(Element doc, boolean loadMoreComments) {
    List<Model> commentList = new ArrayList<>();
    // Extract header
    if (!loadMoreComments) {
        Elements header = doc.select("[itemprop='InteractionCount']");
        if (atLeastOneChild(header)) {
            TextView commentHeader = new TextView(getBaseContext());
            commentHeader.setText(String.format("Commentaires %s", header.text()));
            commentHeader.setTypeface(null, Typeface.BOLD);
            commentHeader.setTextColor(Color.WHITE);
            commentHeader.setPadding(0, 0, 0, Constants.PADDING_COMMENT_ANSWER);
            commentList.add(new Model(commentHeader, 0));
        }
    }
    // Extract comments
    Elements comments = doc.select("[itemprop='commentText']");
    for (Element comment : comments) {
        Elements refs = comment.select("p.references");
        if (atLeastOneChild(refs)) {
            // Clear date
            refs.select("span").remove();
            TextView author = new TextView(getBaseContext());
            author.setTypeface(null, Typeface.BOLD);
            author.setText(refs.text());
            author.setTextColor(Color.WHITE);
            Elements commentComment = refs.next();
            if (atLeastOneChild(commentComment)) {
                TextView content = new TextView(getBaseContext());
                content.setText(commentComment.first().text());
                content.setTextColor(Color.WHITE);
                if (comment.hasClass("reponse")) {
                    author.setPadding(Constants.PADDING_COMMENT_ANSWER, 0, 0, 12);
                    content.setPadding(Constants.PADDING_COMMENT_ANSWER, 0, 0, 16);
                } else {
                    author.setPadding(0, 0, 0, 12);
                    content.setPadding(0, 0, 0, 16);
                }
                Integer commentId = Integer.valueOf(comment.attr("data-reaction_id"));
                commentList.add(new Model(author, commentId));
                commentList.add(new Model(content, commentId));
            }
        }
    }
    // Extract full comments page URI
    Elements div = doc.select("div.reactions");
    if (atLeastOneChild(div)) {
        Element fullComments = div.first().nextElementSibling();
        Elements next = fullComments.select("a");
        if (atLeastOneChild(next)) {
            commentsURI = next.first().attr("href");
        }
    }
    return commentList;
}
Also used : Element(org.jsoup.nodes.Element) ArrayList(java.util.ArrayList) TextView(android.widget.TextView) Elements(org.jsoup.select.Elements)

Aggregations

Elements (org.jsoup.select.Elements)168 Element (org.jsoup.nodes.Element)96 Document (org.jsoup.nodes.Document)70 ArrayList (java.util.ArrayList)38 Test (org.junit.Test)33 IOException (java.io.IOException)24 URL (java.net.URL)13 ParseTest (org.jsoup.integration.ParseTest)11 File (java.io.File)9 List (java.util.List)8 HashMap (java.util.HashMap)7 SSPHandler (org.asqatasun.processor.SSPHandler)7 TestSolutionHandler (org.asqatasun.ruleimplementation.TestSolutionHandler)7 ProcessRemarkService (org.asqatasun.service.ProcessRemarkService)7 MalformedURLException (java.net.MalformedURLException)6 Logger (org.slf4j.Logger)5 LoggerFactory (org.slf4j.LoggerFactory)5 TextView (android.widget.TextView)4 RequestUtil (com.kyj.fx.voeditor.visual.util.RequestUtil)4 ResponseHandler (com.kyj.fx.voeditor.visual.util.ResponseHandler)4