use of us.codecraft.webmagic.Page in project webmagic by code4craft.
the class HttpClientDownloaderTest method test_download_binary_content.
@Test
public void test_download_binary_content() throws Exception {
HttpServer server = httpServer(13423);
server.response("binary");
Runner.running(server, new Runnable() {
@Override
public void run() throws Exception {
final HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
Request request = new Request();
request.setBinaryContent(true);
request.setUrl("http://127.0.0.1:13423/");
Page page = httpClientDownloader.download(request, Site.me().toTask());
assertThat(page.getRawText()).isNull();
assertThat(page.getBytes()).isEqualTo("binary".getBytes());
}
});
}
use of us.codecraft.webmagic.Page in project webmagic by code4craft.
the class HttpClientDownloaderTest method test_download_fail.
@Test
public void test_download_fail() {
HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
Task task = Site.me().setDomain("localhost").setCycleRetryTimes(5).toTask();
Request request = new Request(PAGE_ALWAYS_NOT_EXISTS);
Page page = httpClientDownloader.download(request, task);
assertThat(page.isDownloadSuccess()).isFalse();
}
use of us.codecraft.webmagic.Page in project webmagic by code4craft.
the class PhantomJSDownloader method download.
@Override
public Page download(Request request, Task task) {
if (logger.isInfoEnabled()) {
logger.info("downloading page: " + request.getUrl());
}
String content = getPage(request);
if (content.contains("HTTP request failed")) {
for (int i = 1; i <= getRetryNum(); i++) {
content = getPage(request);
if (!content.contains("HTTP request failed")) {
break;
}
}
if (content.contains("HTTP request failed")) {
// when failed
Page page = new Page();
page.setRequest(request);
return page;
}
}
Page page = new Page();
page.setRawText(content);
page.setUrl(new PlainText(request.getUrl()));
page.setRequest(request);
page.setStatusCode(200);
return page;
}
use of us.codecraft.webmagic.Page in project webmagic by code4craft.
the class SeleniumDownloader method download.
@Override
public Page download(Request request, Task task) {
checkInit();
WebDriver webDriver;
try {
webDriver = webDriverPool.get();
} catch (InterruptedException e) {
logger.warn("interrupted", e);
return null;
}
logger.info("downloading page " + request.getUrl());
webDriver.get(request.getUrl());
try {
Thread.sleep(sleepTime);
} catch (InterruptedException e) {
e.printStackTrace();
}
WebDriver.Options manage = webDriver.manage();
Site site = task.getSite();
if (site.getCookies() != null) {
for (Map.Entry<String, String> cookieEntry : site.getCookies().entrySet()) {
Cookie cookie = new Cookie(cookieEntry.getKey(), cookieEntry.getValue());
manage.addCookie(cookie);
}
}
/*
* TODO You can add mouse event or other processes
*
* @author: bob.li.0718@gmail.com
*/
WebElement webElement = webDriver.findElement(By.xpath("/html"));
String content = webElement.getAttribute("outerHTML");
Page page = new Page();
page.setRawText(content);
page.setHtml(new Html(content, request.getUrl()));
page.setUrl(new PlainText(request.getUrl()));
page.setRequest(request);
webDriverPool.returnToPool(webDriver);
return page;
}
use of us.codecraft.webmagic.Page in project webmagic by code4craft.
the class ProcessorBenchmark method test.
@Ignore
@Test
public void test() {
ModelPageProcessor modelPageProcessor = ModelPageProcessor.create(Site.me(), OschinaBlog.class);
Page page = new Page();
page.setRequest(new Request("http://my.oschina.net/flashsword/blog"));
page.setUrl(new PlainText("http://my.oschina.net/flashsword/blog"));
page.setHtml(new Html(html));
long time = System.currentTimeMillis();
for (int i = 0; i < 1000; i++) {
modelPageProcessor.process(page);
}
System.out.println(System.currentTimeMillis() - time);
time = System.currentTimeMillis();
for (int i = 0; i < 1000; i++) {
modelPageProcessor.process(page);
}
System.out.println(System.currentTimeMillis() - time);
}
Aggregations