use of com.virjar.vscrawler.core.net.proxy.Proxy in project vscrawler by virjar.
the class EverySessionPlanner method determineProxy.
@Override
public Proxy determineProxy(HttpHost host, HttpRequest request, HttpContext context, IPPool ipPool, CrawlerSession crawlerSession) {
HttpClientContext httpClientContext = HttpClientContext.adapt(context);
Proxy proxy = (Proxy) crawlerSession.getExtInfo(VSCRAWLER_AVPROXY_KEY);
if (proxy == null) {
String accessUrl = null;
if (request instanceof HttpRequestWrapper || request instanceof HttpGet) {
accessUrl = HttpUriRequest.class.cast(request).getURI().toString();
}
if (!PoolUtil.isDungProxyEnabled(httpClientContext)) {
log.info("{}不会被代理", accessUrl);
return null;
}
proxy = ipPool.getIP(host.getHostName(), accessUrl);
if (proxy == null) {
return null;
}
crawlerSession.setExtInfo(VSCRAWLER_AVPROXY_KEY, proxy);
}
return proxy;
}
use of com.virjar.vscrawler.core.net.proxy.Proxy in project vscrawler by virjar.
the class EveryRequestPlanner method determineProxy.
@Override
public Proxy determineProxy(HttpHost host, HttpRequest request, HttpContext context, IPPool ipPool, CrawlerSession crawlerSession) {
HttpClientContext httpClientContext = HttpClientContext.adapt(context);
Proxy bind = (Proxy) context.getAttribute(VSCrawlerConstant.VSCRAWLER_AVPROXY_KEY);
String accessUrl = null;
if (request instanceof HttpRequestWrapper || request instanceof HttpGet) {
accessUrl = HttpUriRequest.class.cast(request).getURI().toString();
}
if (!PoolUtil.isDungProxyEnabled(httpClientContext)) {
log.info("{}不会被代理", accessUrl);
return null;
}
if (bind == null || bind.isDisable()) {
bind = ipPool.getIP(host.getHostName(), accessUrl);
}
if (bind == null) {
return null;
}
log.info("{} 当前使用IP为:{}:{}", host.getHostName(), bind.getIp(), bind.getPort());
// 将绑定IP放置到context,用于后置拦截器统计这个IP的使用情况
return bind;
}
use of com.virjar.vscrawler.core.net.proxy.Proxy in project vscrawler by virjar.
the class EveryUserPlanner method determineProxy.
@Override
public Proxy determineProxy(HttpHost host, HttpRequest request, HttpContext context, IPPool ipPool, CrawlerSession crawlerSession) {
HttpClientContext httpClientContext = HttpClientContext.adapt(context);
User user = UserUtil.getUser(crawlerSession);
if (user == null) {
log.warn("you config proxy strategy by user,but this session has not login with a user,proxy bind will be ignore");
return null;
}
Proxy proxy = (Proxy) user.getExtInfo().get(VSCrawlerConstant.VSCRAWLER_AVPROXY_KEY);
if (proxy == null) {
String accessUrl = null;
if (request instanceof HttpRequestWrapper || request instanceof HttpGet) {
accessUrl = HttpUriRequest.class.cast(request).getURI().toString();
}
if (!PoolUtil.isDungProxyEnabled(httpClientContext)) {
log.info("{}不会被代理", accessUrl);
return null;
}
proxy = ipPool.getIP(host.getHostName(), accessUrl);
if (proxy == null) {
return null;
}
user.getExtInfo().put(VSCrawlerConstant.VSCRAWLER_AVPROXY_KEY, proxy);
}
return proxy;
}
use of com.virjar.vscrawler.core.net.proxy.Proxy in project vscrawler by virjar.
the class CrawlerSession method invalidProxyIp.
public void invalidProxyIp() {
Object extInfo = getExtInfo(VSCrawlerConstant.VSCRAWLER_AVPROXY_KEY);
if (!(extInfo instanceof Proxy)) {
return;
}
Proxy proxy = (Proxy) extInfo;
proxy.recordFailed();
proxy.offline();
}
Aggregations