use of us.codecraft.webmagic.selector.Html in project webmagic by code4craft.
the class HtmlTest method testDisableJsoupHtmlEntityEscape.
@Ignore("not work in jsoup 1.8.x")
@Test
public void testDisableJsoupHtmlEntityEscape() throws Exception {
Html.DISABLE_HTML_ENTITY_ESCAPE = true;
Html html = new Html("aaaaaaa&b");
assertThat(html.regex("(aaaaaaa&b)").toString()).isEqualTo("aaaaaaa&b");
}
use of us.codecraft.webmagic.selector.Html in project webmagic by code4craft.
the class HttpClientDownloaderTest method testDownloader.
@Test
public void testDownloader() {
HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
Html html = httpClientDownloader.download("https://www.baidu.com/");
assertTrue(!html.getFirstSourceText().isEmpty());
}
use of us.codecraft.webmagic.selector.Html in project webmagic by code4craft.
the class HtmlTest method testEnableJsoupHtmlEntityEscape.
@Test
public void testEnableJsoupHtmlEntityEscape() throws Exception {
Html html = new Html("aaaaaaa&b");
assertThat(html.regex("(aaaaaaa&b)").toString()).isEqualTo("aaaaaaa&b");
}
use of us.codecraft.webmagic.selector.Html in project webmagic by code4craft.
the class HtmlTest method testGetHrefsByJsoup.
@Test
public void testGetHrefsByJsoup() {
Html html = new Html("<html><a href='issues'>issues</a><img src='webmagic.jpg'/></html>", "https://github.com/code4craft/webmagic/");
assertThat(html.xpath("//a[1]/@abs:href").get()).isEqualTo("https://github.com/code4craft/webmagic/issues");
assertThat(html.xpath("//img/@abs:src").get()).isEqualTo("https://github.com/code4craft/webmagic/webmagic.jpg");
html = new Html("<html><base href='https://github.com/code4craft/webmagic/'><a href='issues'>issues</a><img src='webmagic.jpg'/></base></html>");
assertThat(html.xpath("//a[1]/@abs:href").get()).isEqualTo("https://github.com/code4craft/webmagic/issues");
assertThat(html.xpath("//img/@abs:src").get()).isEqualTo("https://github.com/code4craft/webmagic/webmagic.jpg");
}
use of us.codecraft.webmagic.selector.Html in project webmagic by code4craft.
the class HtmlTest method testNthNodesGet.
@Test
public void testNthNodesGet() {
Html html = new Html("<a data-tip=\"p$t$xxx\" href=\"/xx/xx\">xx</a>");
assertThat(html.xpath("//a[1]/@href").get()).isEqualTo("/xx/xx");
Selectable selectable = html.xpath("//a[1]").nodes().get(0);
assertThat(selectable.xpath("/a/@href").get()).isEqualTo("/xx/xx");
}
Aggregations