use of us.codecraft.webmagic.Request in project webmagic by code4craft.
the class HttpClientDownloaderTest method test_selectRequestMethod.
@Test
public void test_selectRequestMethod() throws Exception {
HttpServer server = httpserver(12306);
server.get(eq(query("q"), "webmagic")).response("get");
server.post(eq(form("q"), "webmagic")).response("post");
server.put(eq(form("q"), "webmagic")).response("put");
server.delete(eq(query("q"), "webmagic")).response("delete");
server.request(and(by(method("HEAD")), eq(query("q"), "webmagic"))).response(header("method", "head"));
server.request(and(by(method("TRACE")), eq(query("q"), "webmagic"))).response("trace");
Runner.running(server, new Runnable() {
@Override
public void run() throws Exception {
HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
Request request = new Request();
request.setUrl("http://127.0.0.1:12306/search");
request.putParams("q", "webmagic");
request.setMethod(HttpConstant.Method.GET);
RequestBuilder requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl());
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("get");
request.setMethod(HttpConstant.Method.POST);
requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl());
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("post");
request.setMethod(HttpConstant.Method.PUT);
requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl());
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("put");
request.setMethod(HttpConstant.Method.DELETE);
requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl());
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("delete");
request.setMethod(HttpConstant.Method.HEAD);
requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl());
assertThat(HttpClients.custom().build().execute(requestBuilder.build()).getFirstHeader("method").getValue()).isEqualTo("head");
request.setMethod(HttpConstant.Method.TRACE);
requestBuilder = httpClientDownloader.selectRequestMethod(request).setUri(request.getUrl());
assertThat(EntityUtils.toString(HttpClients.custom().build().execute(requestBuilder.build()).getEntity())).isEqualTo("trace");
}
});
}
use of us.codecraft.webmagic.Request in project webmagic by code4craft.
the class HttpClientDownloaderTest method testGetHtmlCharset.
@Test
public void testGetHtmlCharset() throws Exception {
HttpServer server = httpserver(12306);
server.get(by(uri("/header"))).response(header("Content-Type", "text/html; charset=gbk"));
server.get(by(uri("/meta4"))).response(with(text("<html>\n" + " <head>\n" + " <meta charset='gbk'/>\n" + " </head>\n" + " <body></body>\n" + "</html>")), header("Content-Type", ""));
server.get(by(uri("/meta5"))).response(with(text("<html>\n" + " <head>\n" + " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=gbk\" />\n" + " </head>\n" + " <body></body>\n" + "</html>")), header("Content-Type", ""));
Runner.running(server, new Runnable() {
@Override
public void run() {
String charset = getCharsetByUrl("http://127.0.0.1:12306/header");
assertEquals(charset, "gbk");
charset = getCharsetByUrl("http://127.0.0.1:12306/meta4");
assertEquals(charset, "gbk");
charset = getCharsetByUrl("http://127.0.0.1:12306/meta5");
assertEquals(charset, "gbk");
}
private String getCharsetByUrl(String url) {
HttpClientDownloader downloader = new HttpClientDownloader();
Site site = Site.me();
CloseableHttpClient httpClient = new HttpClientGenerator().getClient(site, null);
// encoding in http header Content-Type
Request requestGBK = new Request(url);
CloseableHttpResponse httpResponse = null;
try {
httpResponse = httpClient.execute(downloader.getHttpUriRequest(requestGBK, site, null, null));
} catch (IOException e) {
e.printStackTrace();
}
String charset = null;
try {
byte[] contentBytes = IOUtils.toByteArray(httpResponse.getEntity().getContent());
charset = downloader.getHtmlCharset(httpResponse, contentBytes);
} catch (IOException e) {
e.printStackTrace();
}
return charset;
}
});
}
use of us.codecraft.webmagic.Request in project webmagic by code4craft.
the class PriorityScheduler method poll.
@Override
public synchronized Request poll(Task task) {
Request poll = priorityQueuePlus.poll();
if (poll != null) {
return poll;
}
poll = noPriorityQueue.poll();
if (poll != null) {
return poll;
}
return priorityQueueMinus.poll();
}
use of us.codecraft.webmagic.Request in project webmagic by code4craft.
the class MockGithubDownloader method download.
@Override
public Page download(Request request, Task task) {
Page page = new Page();
page.setHtml(new Html(html));
page.setRequest(new Request("https://github.com/code4craft/webmagic"));
page.setUrl(new PlainText("https://github.com/code4craft/webmagic"));
return page;
}
use of us.codecraft.webmagic.Request in project webmagic by code4craft.
the class SeleniumDownloaderTest method test.
@Ignore("need chrome driver")
@Test
public void test() {
SeleniumDownloader seleniumDownloader = new SeleniumDownloader(chromeDriverPath);
long time1 = System.currentTimeMillis();
for (int i = 0; i < 100; i++) {
Page page = seleniumDownloader.download(new Request("http://huaban.com/"), new Task() {
@Override
public String getUUID() {
return "huaban.com";
}
@Override
public Site getSite() {
return Site.me();
}
});
System.out.println(page.getHtml().$("#waterfall").links().regex(".*pins.*").all());
}
System.out.println(System.currentTimeMillis() - time1);
}
Aggregations