use of us.codecraft.webmagic.Request in project webmagic by code4craft.
the class HttpClientDownloaderTest method test_set_site_cookie.
@Test
public void test_set_site_cookie() throws Exception {
HttpServer server = httpServer(13423);
server.get(eq(cookie("cookie"), "cookie-webmagic")).response("ok");
Runner.running(server, new Runnable() {
@Override
public void run() throws Exception {
HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
Request request = new Request();
request.setUrl("http://127.0.0.1:13423");
Site site = Site.me().addCookie("cookie", "cookie-webmagic").setDomain("127.0.0.1");
Page page = httpClientDownloader.download(request, site.toTask());
assertThat(page.getRawText()).isEqualTo("ok");
}
});
}
use of us.codecraft.webmagic.Request in project webmagic by code4craft.
the class HttpClientDownloaderTest method test_disableCookieManagement.
@Test
public void test_disableCookieManagement() throws Exception {
HttpServer server = httpServer(13423);
server.get(not(eq(cookie("cookie"), "cookie-webmagic"))).response("ok");
Runner.running(server, new Runnable() {
@Override
public void run() throws Exception {
HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
Request request = new Request();
request.setUrl("http://127.0.0.1:13423");
request.addCookie("cookie", "cookie-webmagic");
Page page = httpClientDownloader.download(request, Site.me().setDisableCookieManagement(true).toTask());
assertThat(page.getRawText()).isEqualTo("ok");
}
});
}
use of us.codecraft.webmagic.Request in project webmagic by code4craft.
the class HttpClientDownloaderTest method test_download_binary_content.
@Test
public void test_download_binary_content() throws Exception {
HttpServer server = httpServer(13423);
server.response("binary");
Runner.running(server, new Runnable() {
@Override
public void run() throws Exception {
final HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
Request request = new Request();
request.setBinaryContent(true);
request.setUrl("http://127.0.0.1:13423/");
Page page = httpClientDownloader.download(request, Site.me().toTask());
assertThat(page.getRawText()).isNull();
assertThat(page.getBytes()).isEqualTo("binary".getBytes());
}
});
}
use of us.codecraft.webmagic.Request in project webmagic by code4craft.
the class HttpClientDownloaderTest method test_download_fail.
@Test
public void test_download_fail() {
HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
Task task = Site.me().setDomain("localhost").setCycleRetryTimes(5).toTask();
Request request = new Request(PAGE_ALWAYS_NOT_EXISTS);
Page page = httpClientDownloader.download(request, task);
assertThat(page.isDownloadSuccess()).isFalse();
}
use of us.codecraft.webmagic.Request in project webmagic by code4craft.
the class ZipCodePageProcessor method processProvince.
private void processProvince(Page page) {
// 这里仅靠xpath没法精准定位,所以使用正则作为筛选,不符合正则的会被过滤掉
List<String> districts = page.getHtml().xpath("//body/table/tbody/tr[@bgcolor=\"#ffffff\"]").all();
Pattern pattern = Pattern.compile("<td>([^<>]+)</td>.*?href=\"(.*?)\"", Pattern.DOTALL);
for (String district : districts) {
Matcher matcher = pattern.matcher(district);
while (matcher.find()) {
String title = matcher.group(1);
String link = matcher.group(2);
Request request = new Request(link).setPriority(1).putExtra("province", page.getRequest().getExtra("province")).putExtra("district", title);
page.addTargetRequest(request);
}
}
}
Aggregations