use of com.cv4j.netdiscovery.core.domain.Request in project NetDiscovery by fengzhizi715.
the class OkHttpDownloader method download.
@Override
public Maybe<Response> download(Request request) {
okhttp3.Request.Builder requestBuilder = null;
if (request.getHttpMethod() == HttpMethod.GET) {
requestBuilder = new okhttp3.Request.Builder().url(request.getUrl());
} else if (request.getHttpMethod() == HttpMethod.POST) {
HttpRequestBody httpRequestBody = request.getHttpRequestBody();
if (httpRequestBody != null) {
MediaType mediaType = MediaType.parse(httpRequestBody.getContentType());
// 创建RequestBody对象,将参数按照指定的MediaType封装
RequestBody requestBody = RequestBody.create(mediaType, httpRequestBody.getBody());
requestBuilder = new okhttp3.Request.Builder().url(request.getUrl()).post(requestBody);
}
}
if (request.getHeader() != null) {
for (Map.Entry<String, String> entry : request.getHeader().entrySet()) {
requestBuilder.addHeader(entry.getKey(), entry.getValue());
}
}
// 针对post请求,需要对header添加一些信息
if (request.getHttpMethod() == HttpMethod.POST) {
if (Preconditions.isNotBlank(request.getHttpRequestBody()) && Preconditions.isNotBlank(request.getHttpRequestBody().getContentType())) {
requestBuilder.addHeader("Content-type", request.getHttpRequestBody().getContentType());
}
}
okhttp3.Request okrequest = requestBuilder.build();
return Maybe.create(new MaybeOnSubscribe<okhttp3.Response>() {
@Override
public void subscribe(MaybeEmitter emitter) throws Exception {
emitter.onSuccess(client.newCall(okrequest).execute());
}
}).map(new Function<okhttp3.Response, Response>() {
@Override
public Response apply(okhttp3.Response resp) throws Exception {
Response response = new Response();
response.setContent(resp.body().bytes());
response.setStatusCode(resp.code());
response.setContentType(resp.header("Content-Type"));
return response;
}
});
}
use of com.cv4j.netdiscovery.core.domain.Request in project NetDiscovery by fengzhizi715.
the class Spider method run.
public void run() {
checkRunningStat();
initialDelay();
try {
while (getSpiderStatus() != SPIDER_STATUS_STOPPED) {
// 暂停抓取
if (pause) {
try {
this.pauseCountDown.await();
} catch (InterruptedException e) {
log.error("can't pause : ", e);
}
initialDelay();
}
final Request request = queue.poll(name);
if (request != null) {
if (request.getSleepTime() > 0) {
try {
Thread.sleep(request.getSleepTime());
} catch (InterruptedException e) {
e.printStackTrace();
}
}
if (autoProxy && request.getProxy() == null) {
Proxy proxy = ProxyPool.getProxy();
if (proxy != null && Utils.checkProxy(proxy)) {
request.proxy(proxy);
}
}
if (request.getBeforeRequest() != null) {
request.getBeforeRequest().process(request);
}
downloader.download(request).map(new Function<Response, Page>() {
@Override
public Page apply(Response response) throws Exception {
Page page = new Page();
page.setRequest(request);
page.setUrl(request.getUrl());
page.setStatusCode(response.getStatusCode());
if (Utils.isTextType(response.getContentType())) {
// text/html
page.setHtml(new Html(response.getContent()));
return page;
} else if (Utils.isApplicationJSONType(response.getContentType())) {
// application/json
// 将json字符串转化成Json对象,放入Page的"RESPONSE_JSON"字段。之所以转换成Json对象,是因为Json提供了toObject(),可以转换成具体的class。
page.putField(Constant.RESPONSE_JSON, new Json(new String(response.getContent())));
return page;
} else {
// 保存InputStream
page.putField(Constant.RESPONSE_RAW, response.getIs());
return page;
}
}
}).map(new Function<Page, Page>() {
@Override
public Page apply(Page page) throws Exception {
if (parser != null) {
parser.process(page);
}
return page;
}
}).map(new Function<Page, Page>() {
@Override
public Page apply(Page page) throws Exception {
if (Preconditions.isNotBlank(pipelines)) {
pipelines.stream().forEach(pipeline -> pipeline.process(page.getResultItems()));
}
return page;
}
}).observeOn(Schedulers.io()).subscribe(new Consumer<Page>() {
@Override
public void accept(Page page) throws Exception {
log.info(page.getUrl());
if (request.getAfterRequest() != null) {
request.getAfterRequest().process(page);
}
}
}, new Consumer<Throwable>() {
@Override
public void accept(Throwable throwable) throws Exception {
log.error(throwable.getMessage());
}
});
} else {
break;
}
}
} finally {
// 爬虫停止
stopSpider(downloader);
}
}
use of com.cv4j.netdiscovery.core.domain.Request in project NetDiscovery by fengzhizi715.
the class RequestDeserializer method deserialize.
@Override
public Request deserialize(String topic, byte[] data) {
ObjectMapper mapper = new ObjectMapper();
Request request = null;
try {
request = mapper.readValue(data, Request.class);
} catch (Exception e) {
e.printStackTrace();
}
return request;
}
use of com.cv4j.netdiscovery.core.domain.Request in project NetDiscovery by fengzhizi715.
the class RedisQueue method poll.
@Override
public synchronized Request poll(String spiderName) {
Jedis jedis = pool.getResource();
try {
String url = jedis.lpop(getQueueKey(spiderName));
if (url == null) {
return null;
}
String key = ITEM_PREFIX + url;
String field = DigestUtils.shaHex(url);
byte[] bytes = jedis.hget(key.getBytes(), field.getBytes());
if (bytes != null) {
Request o = JSON.parseObject(new String(bytes), Request.class);
return o;
}
Request request = new Request(url);
return request;
} finally {
pool.returnResource(jedis);
}
}
use of com.cv4j.netdiscovery.core.domain.Request in project NetDiscovery by fengzhizi715.
the class Spider method repeatRequest.
/**
* 可以重复提交request,用于实现定时任务,使用该方法时需要跟initialDelay一起配合使用。
* @param period 每隔一定的时间提交一次request
* @param url
* @return
*/
public Spider repeatRequest(long period, String url) {
checkIfRunning();
compositeDisposable.add(Flowable.interval(period, TimeUnit.MILLISECONDS).onBackpressureBuffer().subscribe(new Consumer<Long>() {
@Override
public void accept(Long aLong) throws Exception {
if (!pause) {
Request request = new Request(url);
request.checkDuplicate(false);
request.spiderName(name);
request.sleep(period);
queue.push(request);
}
}
}));
return this;
}
Aggregations