use of org.jsoup.select.Elements in project Java-readability by basis-technology-corp.
the class Readability method getLinkDensity.
private double getLinkDensity(Element e) {
Elements links = e.getElementsByTag("a");
double textLength = e.text().length();
double linkLength = 0;
for (Element link : links) {
linkLength += link.text().length();
}
return linkLength / textLength;
}
use of org.jsoup.select.Elements in project Java-readability by basis-technology-corp.
the class Readability method clean.
/**
* Clean a node of all elements of type "tag".
*
* @param Element
* @param string tag to clean
**/
private void clean(Element e, String tag) {
Elements targetList = e.getElementsByTag(tag);
targetList.remove();
}
use of org.jsoup.select.Elements in project MusicDNA by harjot-oberai.
the class Genius method fromURL.
public static Lyrics fromURL(String url, String artist, String title) {
Document lyricsPage;
String text;
try {
lyricsPage = Jsoup.connect(url).userAgent(Net.USER_AGENT).get();
Elements lyricsDiv = lyricsPage.select(".lyrics");
if (lyricsDiv.isEmpty())
throw new StringIndexOutOfBoundsException();
else
text = Jsoup.clean(lyricsDiv.html(), Whitelist.none().addTags("br")).trim();
} catch (HttpStatusException e) {
return new Lyrics(Lyrics.NO_RESULT);
} catch (IOException | StringIndexOutOfBoundsException e) {
e.printStackTrace();
return new Lyrics(Lyrics.ERROR);
}
if (artist == null) {
title = lyricsPage.getElementsByClass("text_title").get(0).text();
artist = lyricsPage.getElementsByClass("text_artist").get(0).text();
}
Lyrics result = new Lyrics(Lyrics.POSITIVE_RESULT);
if ("[Instrumental]".equals(text))
result = new Lyrics(Lyrics.NEGATIVE_RESULT);
Pattern pattern = Pattern.compile("\\[.+\\]");
StringBuilder builder = new StringBuilder();
for (String line : text.split("<br> ")) {
String strippedLine = line.replaceAll("\\s", "");
if (!pattern.matcher(strippedLine).matches() && !(strippedLine.isEmpty() && builder.length() == 0))
builder.append(line.replaceAll("\\P{Print}", "")).append("<br/>");
}
if (builder.length() > 5)
builder.delete(builder.length() - 5, builder.length());
result.setArtist(artist);
result.setTitle(title);
result.setText(Normalizer.normalize(builder.toString(), Normalizer.Form.NFD));
result.setURL(url);
result.setSource("Genius");
return result;
}
use of org.jsoup.select.Elements in project jstructure by JonStargaryen.
the class SiftsMappingAnnotator method mapGroup.
private ResidueMapping mapGroup(final Document document, final String chainId, final String pdbResidueNumber) {
Element describingElement = mapToDescribingElement(document, chainId, pdbResidueNumber);
Elements uniProtElements = describingElement.getElementsByAttributeValue("dbSource", "UniProt");
if (!uniProtElements.isEmpty()) {
Element uniProtElement = uniProtElements.first();
return new ResidueMapping(this, uniProtElement.attr("dbResNum"));
} else {
logger.warn("could not retrieve UniProt mapping for " + chainId + "-" + pdbResidueNumber);
return new ResidueMapping(this);
}
}
use of org.jsoup.select.Elements in project LeMondeRssReader by MBach.
the class ArticleActivity method extractComments.
private List<Model> extractComments(Element doc, boolean loadMoreComments) {
List<Model> commentList = new ArrayList<>();
// Extract header
if (!loadMoreComments) {
Elements header = doc.select("[itemprop='InteractionCount']");
if (atLeastOneChild(header)) {
TextView commentHeader = new TextView(getBaseContext());
commentHeader.setText(String.format("Commentaires %s", header.text()));
commentHeader.setTypeface(null, Typeface.BOLD);
commentHeader.setTextColor(Color.WHITE);
commentHeader.setPadding(0, 0, 0, Constants.PADDING_COMMENT_ANSWER);
commentList.add(new Model(commentHeader, 0));
}
}
// Extract comments
Elements comments = doc.select("[itemprop='commentText']");
for (Element comment : comments) {
Elements refs = comment.select("p.references");
if (atLeastOneChild(refs)) {
// Clear date
refs.select("span").remove();
TextView author = new TextView(getBaseContext());
author.setTypeface(null, Typeface.BOLD);
author.setText(refs.text());
author.setTextColor(Color.WHITE);
Elements commentComment = refs.next();
if (atLeastOneChild(commentComment)) {
TextView content = new TextView(getBaseContext());
content.setText(commentComment.first().text());
content.setTextColor(Color.WHITE);
if (comment.hasClass("reponse")) {
author.setPadding(Constants.PADDING_COMMENT_ANSWER, 0, 0, 12);
content.setPadding(Constants.PADDING_COMMENT_ANSWER, 0, 0, 16);
} else {
author.setPadding(0, 0, 0, 12);
content.setPadding(0, 0, 0, 16);
}
Integer commentId = Integer.valueOf(comment.attr("data-reaction_id"));
commentList.add(new Model(author, commentId));
commentList.add(new Model(content, commentId));
}
}
}
// Extract full comments page URI
Elements div = doc.select("div.reactions");
if (atLeastOneChild(div)) {
Element fullComments = div.first().nextElementSibling();
Elements next = fullComments.select("a");
if (atLeastOneChild(next)) {
commentsURI = next.first().attr("href");
}
}
return commentList;
}
Aggregations