use of com.cv4j.netdiscovery.core.domain.Response in project NetDiscovery by fengzhizi715.
the class OkHttpDownloader method download.
@Override
public Maybe<Response> download(Request request) {
okhttp3.Request.Builder requestBuilder = null;
if (request.getHttpMethod() == HttpMethod.GET) {
requestBuilder = new okhttp3.Request.Builder().url(request.getUrl());
} else if (request.getHttpMethod() == HttpMethod.POST) {
HttpRequestBody httpRequestBody = request.getHttpRequestBody();
if (httpRequestBody != null) {
MediaType mediaType = MediaType.parse(httpRequestBody.getContentType());
// 创建RequestBody对象,将参数按照指定的MediaType封装
RequestBody requestBody = RequestBody.create(mediaType, httpRequestBody.getBody());
requestBuilder = new okhttp3.Request.Builder().url(request.getUrl()).post(requestBody);
}
}
if (request.getHeader() != null) {
for (Map.Entry<String, String> entry : request.getHeader().entrySet()) {
requestBuilder.addHeader(entry.getKey(), entry.getValue());
}
}
// 针对post请求,需要对header添加一些信息
if (request.getHttpMethod() == HttpMethod.POST) {
if (Preconditions.isNotBlank(request.getHttpRequestBody()) && Preconditions.isNotBlank(request.getHttpRequestBody().getContentType())) {
requestBuilder.addHeader("Content-type", request.getHttpRequestBody().getContentType());
}
}
okhttp3.Request okrequest = requestBuilder.build();
return Maybe.create(new MaybeOnSubscribe<okhttp3.Response>() {
@Override
public void subscribe(MaybeEmitter emitter) throws Exception {
emitter.onSuccess(client.newCall(okrequest).execute());
}
}).map(new Function<okhttp3.Response, Response>() {
@Override
public Response apply(okhttp3.Response resp) throws Exception {
Response response = new Response();
response.setContent(resp.body().bytes());
response.setStatusCode(resp.code());
response.setContentType(resp.header("Content-Type"));
return response;
}
});
}
use of com.cv4j.netdiscovery.core.domain.Response in project NetDiscovery by fengzhizi715.
the class Spider method run.
public void run() {
checkRunningStat();
initialDelay();
try {
while (getSpiderStatus() != SPIDER_STATUS_STOPPED) {
// 暂停抓取
if (pause) {
try {
this.pauseCountDown.await();
} catch (InterruptedException e) {
log.error("can't pause : ", e);
}
initialDelay();
}
final Request request = queue.poll(name);
if (request != null) {
if (request.getSleepTime() > 0) {
try {
Thread.sleep(request.getSleepTime());
} catch (InterruptedException e) {
e.printStackTrace();
}
}
if (autoProxy && request.getProxy() == null) {
Proxy proxy = ProxyPool.getProxy();
if (proxy != null && Utils.checkProxy(proxy)) {
request.proxy(proxy);
}
}
if (request.getBeforeRequest() != null) {
request.getBeforeRequest().process(request);
}
downloader.download(request).map(new Function<Response, Page>() {
@Override
public Page apply(Response response) throws Exception {
Page page = new Page();
page.setRequest(request);
page.setUrl(request.getUrl());
page.setStatusCode(response.getStatusCode());
if (Utils.isTextType(response.getContentType())) {
// text/html
page.setHtml(new Html(response.getContent()));
return page;
} else if (Utils.isApplicationJSONType(response.getContentType())) {
// application/json
// 将json字符串转化成Json对象,放入Page的"RESPONSE_JSON"字段。之所以转换成Json对象,是因为Json提供了toObject(),可以转换成具体的class。
page.putField(Constant.RESPONSE_JSON, new Json(new String(response.getContent())));
return page;
} else {
// 保存InputStream
page.putField(Constant.RESPONSE_RAW, response.getIs());
return page;
}
}
}).map(new Function<Page, Page>() {
@Override
public Page apply(Page page) throws Exception {
if (parser != null) {
parser.process(page);
}
return page;
}
}).map(new Function<Page, Page>() {
@Override
public Page apply(Page page) throws Exception {
if (Preconditions.isNotBlank(pipelines)) {
pipelines.stream().forEach(pipeline -> pipeline.process(page.getResultItems()));
}
return page;
}
}).observeOn(Schedulers.io()).subscribe(new Consumer<Page>() {
@Override
public void accept(Page page) throws Exception {
log.info(page.getUrl());
if (request.getAfterRequest() != null) {
request.getAfterRequest().process(page);
}
}
}, new Consumer<Throwable>() {
@Override
public void accept(Throwable throwable) throws Exception {
log.error(throwable.getMessage());
}
});
} else {
break;
}
}
} finally {
// 爬虫停止
stopSpider(downloader);
}
}
use of com.cv4j.netdiscovery.core.domain.Response in project NetDiscovery by fengzhizi715.
the class FileDownloader method download.
@Override
public Maybe<Response> download(final Request request) {
try {
url = new URL(request.getUrl());
// 将url以open方法返回的urlConnection连接强转为HttpURLConnection连接(标识一个url所引用的远程对象连接)
// 此时cnnection只是为一个连接对象,待连接中
httpUrlConnection = (HttpURLConnection) url.openConnection();
// 设置是否要从 URL连接读取数据,默认为true
httpUrlConnection.setDoInput(true);
// 建立连接
// (请求未开始,直到connection.getInputStream()方法调用时才发起,以上各个参数设置需在此方法之前进行)
httpUrlConnection.connect();
return Maybe.create(new MaybeOnSubscribe<InputStream>() {
@Override
public void subscribe(MaybeEmitter emitter) throws Exception {
emitter.onSuccess(httpUrlConnection.getInputStream());
}
}).map(new Function<InputStream, Response>() {
@Override
public Response apply(InputStream inputStream) throws Exception {
ByteArrayOutputStream baos = Utils.cloneInputStream(inputStream);
// 只针对小的文件使用,大型的文件不建议这样使用
InputStream is = new ByteArrayInputStream(baos.toByteArray());
Response response = new Response();
response.setIs(is);
response.setStatusCode(httpUrlConnection.getResponseCode());
response.setContentType(httpUrlConnection.getContentType());
return response;
}
});
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
use of com.cv4j.netdiscovery.core.domain.Response in project NetDiscovery by fengzhizi715.
the class UrlConnectionDownloader method download.
@Override
public Maybe<Response> download(Request request) {
try {
url = new URL(request.getUrl());
// 设置Proxy
if (request.getProxy() != null) {
httpUrlConnection = (HttpURLConnection) url.openConnection(request.getProxy().toJavaNetProxy());
} else {
httpUrlConnection = (HttpURLConnection) url.openConnection();
}
// 使用Post请求时,设置Post body
if (request.getHttpMethod() == HttpMethod.POST) {
httpUrlConnection.setDoOutput(true);
httpUrlConnection.setDoInput(true);
httpUrlConnection.setRequestMethod("POST");
// post 请求不用缓存
httpUrlConnection.setUseCaches(false);
if (request.getHttpRequestBody() != null) {
httpUrlConnection.setRequestProperty("Content-Type", request.getHttpRequestBody().getContentType());
OutputStream os = httpUrlConnection.getOutputStream();
os.write(request.getHttpRequestBody().getBody());
os.flush();
os.close();
}
}
// 设置请求头header
if (Preconditions.isNotBlank(request.getHeader())) {
for (Map.Entry<String, String> entry : request.getHeader().entrySet()) {
httpUrlConnection.setRequestProperty(entry.getKey(), entry.getValue());
}
}
// 设置字符集
if (Preconditions.isNotBlank(request.getCharset())) {
httpUrlConnection.setRequestProperty("Accept-Charset", request.getCharset());
}
httpUrlConnection.connect();
return Maybe.create(new MaybeOnSubscribe<InputStream>() {
@Override
public void subscribe(MaybeEmitter<InputStream> emitter) throws Exception {
emitter.onSuccess(httpUrlConnection.getInputStream());
}
}).map(new Function<InputStream, Response>() {
@Override
public Response apply(InputStream inputStream) throws Exception {
Response response = new Response();
response.setContent(IOUtils.readInputStream(inputStream));
response.setStatusCode(httpUrlConnection.getResponseCode());
response.setContentType(httpUrlConnection.getContentType());
if (request.isSaveCookie()) {
// save cookies
if (Preconditions.isNotBlank(httpUrlConnection.getHeaderField(Constant.SET_COOKIES_HEADER))) {
CookieGroup cookieGroup = CookieManager.getInsatance().getCookieGroup(request.getUrlParser().getHost());
if (cookieGroup == null) {
cookieGroup = new CookieGroup(request.getUrlParser().getHost());
String cookieStr = httpUrlConnection.getHeaderField(Constant.SET_COOKIES_HEADER);
String[] segs = cookieStr.split(";");
if (Preconditions.isNotBlank(segs)) {
for (String seg : segs) {
String[] pairs = seg.trim().split("\\=");
if (pairs.length == 2) {
cookieSet.add(new Cookie(pairs[0], pairs[1]));
}
}
}
cookieGroup.putAllCookies(cookieSet);
CookieManager.getInsatance().addCookieGroup(cookieGroup);
} else {
String cookieStr = httpUrlConnection.getHeaderField(Constant.SET_COOKIES_HEADER);
String[] segs = cookieStr.split(";");
if (Preconditions.isNotBlank(segs)) {
for (String seg : segs) {
String[] pairs = seg.trim().split("\\=");
if (pairs.length == 2) {
cookieSet.add(new Cookie(pairs[0], pairs[1]));
}
}
}
cookieGroup.putAllCookies(cookieSet);
}
}
}
return response;
}
});
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
use of com.cv4j.netdiscovery.core.domain.Response in project NetDiscovery by fengzhizi715.
the class SeleniumDownloader method download.
@Override
public Maybe<Response> download(Request request) {
return Maybe.create(new MaybeOnSubscribe<String>() {
@Override
public void subscribe(MaybeEmitter emitter) throws Exception {
if (webDriver != null) {
webDriver.get(request.getUrl());
if (action != null) {
action.perform(webDriver);
}
emitter.onSuccess(webDriver.getPageSource());
}
}
}).map(new Function<String, Response>() {
@Override
public Response apply(String html) throws Exception {
Response response = new Response();
response.setContent(html.getBytes());
response.setStatusCode(Constant.OK_STATUS_CODE);
response.setContentType(getContentType(webDriver));
return response;
}
});
}
Aggregations