use of org.jsoup.nodes.Document in project jsoup by jhy.
the class SelectorTest method testRelaxedTags.
@Test
public void testRelaxedTags() {
Document doc = Jsoup.parse("<abc_def id=1>Hello</abc_def> <abc-def id=2>There</abc-def>");
Elements el1 = doc.select("abc_def");
assertEquals(1, el1.size());
assertEquals("1", el1.first().id());
Elements el2 = doc.select("abc-def");
assertEquals(1, el2.size());
assertEquals("2", el2.first().id());
}
use of org.jsoup.nodes.Document in project ZhihuDailyPurify by izzyleung.
the class NewsListFromZhihuObservable method convertToDailyNews.
private static Optional<DailyNews> convertToDailyNews(Pair<Story, Document> pair) {
DailyNews result = null;
Story story = pair.first;
Document document = pair.second;
String dailyTitle = story.getDailyTitle();
List<Question> questions = getQuestions(document, dailyTitle);
if (Stream.of(questions).allMatch(Question::isValidZhihuQuestion)) {
result = new DailyNews();
result.setDailyTitle(dailyTitle);
result.setThumbnailUrl(story.getThumbnailUrl());
result.setQuestions(questions);
}
return Optional.ofNullable(result);
}
use of org.jsoup.nodes.Document in project jsoup by jhy.
the class ParseTest method testYahooJp.
@Test
public void testYahooJp() throws IOException {
File in = getFile("/htmltests/yahoo-jp.html");
// http charset is utf-8.
Document doc = Jsoup.parse(in, "UTF-8", "http://www.yahoo.co.jp/index.html");
assertEquals("Yahoo! JAPAN", doc.title());
Element a = doc.select("a[href=t/2322m2]").first();
assertEquals("http://www.yahoo.co.jp/_ylh=X3oDMTB0NWxnaGxsBF9TAzIwNzcyOTYyNjUEdGlkAzEyBHRtcGwDZ2Ex/t/2322m2", // session put into <base>
a.attr("abs:href"));
assertEquals("全国、人気の駅ランキング", a.text());
}
use of org.jsoup.nodes.Document in project jsoup by jhy.
the class ParseTest method testHtml5Charset.
@Test
public void testHtml5Charset() throws IOException {
// test that <meta charset="gb2312"> works
File in = getFile("/htmltests/meta-charset-1.html");
//gb2312, has html5 <meta charset>
Document doc = Jsoup.parse(in, null, "http://example.com/");
assertEquals("新", doc.text());
assertEquals("GB2312", doc.outputSettings().charset().displayName());
// double check, no charset, falls back to utf8 which is incorrect
//
in = getFile("/htmltests/meta-charset-2.html");
// gb2312, no charset
doc = Jsoup.parse(in, null, "http://example.com");
assertEquals("UTF-8", doc.outputSettings().charset().displayName());
assertFalse("新".equals(doc.text()));
// confirm fallback to utf8
in = getFile("/htmltests/meta-charset-3.html");
// utf8, no charset
doc = Jsoup.parse(in, null, "http://example.com/");
assertEquals("UTF-8", doc.outputSettings().charset().displayName());
assertEquals("新", doc.text());
}
use of org.jsoup.nodes.Document in project jsoup by jhy.
the class ParseTest method testNewsHomepage.
@Test
public void testNewsHomepage() throws IOException {
File in = getFile("/htmltests/news-com-au-home.html");
Document doc = Jsoup.parse(in, "UTF-8", "http://www.news.com.au/");
assertEquals("News.com.au | News from Australia and around the world online | NewsComAu", doc.title());
assertEquals("Brace yourself for Metro meltdown", doc.select(".id1225817868581 h4").text().trim());
Element a = doc.select("a[href=/entertainment/horoscopes]").first();
assertEquals("/entertainment/horoscopes", a.attr("href"));
assertEquals("http://www.news.com.au/entertainment/horoscopes", a.attr("abs:href"));
Element hs = doc.select("a[href*=naughty-corners-are-a-bad-idea]").first();
assertEquals("http://www.heraldsun.com.au/news/naughty-corners-are-a-bad-idea-for-kids/story-e6frf7jo-1225817899003", hs.attr("href"));
assertEquals(hs.attr("href"), hs.attr("abs:href"));
}
Aggregations