use of org.jsoup.nodes.Element in project jsoup by jhy.
the class ParseTest method testNewsHomepage.
@Test
public void testNewsHomepage() throws IOException {
File in = getFile("/htmltests/news-com-au-home.html");
Document doc = Jsoup.parse(in, "UTF-8", "http://www.news.com.au/");
assertEquals("News.com.au | News from Australia and around the world online | NewsComAu", doc.title());
assertEquals("Brace yourself for Metro meltdown", doc.select(".id1225817868581 h4").text().trim());
Element a = doc.select("a[href=/entertainment/horoscopes]").first();
assertEquals("/entertainment/horoscopes", a.attr("href"));
assertEquals("http://www.news.com.au/entertainment/horoscopes", a.attr("abs:href"));
Element hs = doc.select("a[href*=naughty-corners-are-a-bad-idea]").first();
assertEquals("http://www.heraldsun.com.au/news/naughty-corners-are-a-bad-idea-for-kids/story-e6frf7jo-1225817899003", hs.attr("href"));
assertEquals(hs.attr("href"), hs.attr("abs:href"));
}
use of org.jsoup.nodes.Element in project jsoup by jhy.
the class ParseTest method testNytArticle.
@Test
public void testNytArticle() throws IOException {
// has tags like <nyt_text>
File in = getFile("/htmltests/nyt-article-1.html");
Document doc = Jsoup.parse(in, null, "http://www.nytimes.com/2010/07/26/business/global/26bp.html?hp");
Element headline = doc.select("nyt_headline[version=1.0]").first();
assertEquals("As BP Lays Out Future, It Will Not Include Hayward", headline.text());
}
use of org.jsoup.nodes.Element in project jsoup by jhy.
the class Cleaner method createSafeElement.
private ElementMeta createSafeElement(Element sourceEl) {
String sourceTag = sourceEl.tagName();
Attributes destAttrs = new Attributes();
Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
int numDiscarded = 0;
Attributes sourceAttrs = sourceEl.attributes();
for (Attribute sourceAttr : sourceAttrs) {
if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr))
destAttrs.put(sourceAttr);
else
numDiscarded++;
}
Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
destAttrs.addAll(enforcedAttrs);
return new ElementMeta(dest, numDiscarded);
}
use of org.jsoup.nodes.Element in project jsoup by jhy.
the class Elements method siblings.
private Elements siblings(String query, boolean next, boolean all) {
Elements els = new Elements();
Evaluator eval = query != null ? QueryParser.parse(query) : null;
for (Element e : this) {
do {
Element sib = next ? e.nextElementSibling() : e.previousElementSibling();
if (sib == null)
break;
if (eval == null)
els.add(sib);
else if (sib.is(eval))
els.add(sib);
e = sib;
} while (all);
}
return els;
}
use of org.jsoup.nodes.Element in project jsoup by jhy.
the class Selector method select.
/**
* Find elements matching selector.
*
* @param query CSS selector
* @param roots root elements to descend into
* @return matching elements, empty if none
*/
public static Elements select(String query, Iterable<Element> roots) {
Validate.notEmpty(query);
Validate.notNull(roots);
Evaluator evaluator = QueryParser.parse(query);
ArrayList<Element> elements = new ArrayList<Element>();
IdentityHashMap<Element, Boolean> seenElements = new IdentityHashMap<Element, Boolean>();
for (Element root : roots) {
final Elements found = select(evaluator, root);
for (Element el : found) {
if (!seenElements.containsKey(el)) {
elements.add(el);
seenElements.put(el, Boolean.TRUE);
}
}
}
return new Elements(elements);
}
Aggregations