use of org.jsoup.nodes.Document in project jsoup by jhy.
the class ParseTest method testBinary.
@Test
public void testBinary() throws IOException {
File in = getFile("/htmltests/thumb.jpg");
Document doc = Jsoup.parse(in, "UTF-8");
// nothing useful, but did not blow up
assertTrue(doc.text().contains("gd-jpeg"));
}
use of org.jsoup.nodes.Document in project jsoup by jhy.
the class ParseTest method testNytArticle.
@Test
public void testNytArticle() throws IOException {
// has tags like <nyt_text>
File in = getFile("/htmltests/nyt-article-1.html");
Document doc = Jsoup.parse(in, null, "http://www.nytimes.com/2010/07/26/business/global/26bp.html?hp");
Element headline = doc.select("nyt_headline[version=1.0]").first();
assertEquals("As BP Lays Out Future, It Will Not Include Hayward", headline.text());
}
use of org.jsoup.nodes.Document in project jsoup by jhy.
the class ParseTest method testBrokenHtml5CharsetWithASingleDoubleQuote.
@Test
public void testBrokenHtml5CharsetWithASingleDoubleQuote() throws IOException {
InputStream in = inputStreamFrom("<html>\n" + "<head><meta charset=UTF-8\"></head>\n" + "<body></body>\n" + "</html>");
Document doc = Jsoup.parse(in, null, "http://example.com/");
assertEquals("UTF-8", doc.outputSettings().charset().displayName());
}
use of org.jsoup.nodes.Document in project jsoup by jhy.
the class ParseTest method testGoogleSearchIpod.
@Test
public void testGoogleSearchIpod() throws IOException {
File in = getFile("/htmltests/google-ipod.html");
Document doc = Jsoup.parse(in, "UTF-8", "http://www.google.com/search?hl=en&q=ipod&aq=f&oq=&aqi=g10");
assertEquals("ipod - Google Search", doc.title());
Elements results = doc.select("h3.r > a");
assertEquals(12, results.size());
assertEquals("http://news.google.com/news?hl=en&q=ipod&um=1&ie=UTF-8&ei=uYlKS4SbBoGg6gPf-5XXCw&sa=X&oi=news_group&ct=title&resnum=1&ved=0CCIQsQQwAA", results.get(0).attr("href"));
assertEquals("http://www.apple.com/itunes/", results.get(1).attr("href"));
}
use of org.jsoup.nodes.Document in project jsoup by jhy.
the class ParseTest method testBaiduVariant.
@Test
public void testBaiduVariant() throws IOException {
// tests <meta charset> when preceded by another <meta>
File in = getFile("/htmltests/baidu-variant.html");
Document doc = Jsoup.parse(in, null, // http charset is gb2312, but NOT specifying it, to test http-equiv parse
"http://www.baidu.com/");
// check auto-detect from meta
assertEquals("GB2312", doc.outputSettings().charset().displayName());
assertEquals("<title>百度一下,你就知道</title>", doc.select("title").outerHtml());
}
Aggregations