use of com.virjar.vscrawler.core.event.support.AutoEventRegistry in project vscrawler by virjar.
the class VSCrawlerBuilder method build.
public VSCrawler build() {
final VSCrawlerContext vsCrawlerContext = VSCrawlerContext.create(crawlerName);
if (crawlerHttpClientGenerator == null) {
crawlerHttpClientGenerator = new DefaultHttpClientGenerator();
}
if (proxyStrategy == null) {
proxyStrategy = ProxyStrategy.NONE;
}
if (proxyStrategy == ProxyStrategy.CUSTOM && proxyPlanner == null) {
throw new IllegalStateException("proxyPlanner must exist if proxyStrategy is custom");
}
CrawlerSessionPool crawlerSessionPool = new CrawlerSessionPool(vsCrawlerContext, crawlerHttpClientGenerator, proxyStrategy, ipPool, proxyPlanner, sessionPoolMaxSize, sessionPoolCoreSize, sessionPoolInitialSize, sessionPoolReuseDuration, sessionPoolMaxOnlineDuration, autoCreateSession);
if (initSeedSource == null) {
initSeedSource = new LocalFileSeedSource();
}
if (seedKeyResolver == null) {
seedKeyResolver = new DefaultSeedKeyResolver();
}
if (segmentResolver == null) {
segmentResolver = new DefaultSegmentResolver();
}
BerkeleyDBSeedManager berkeleyDBSeedManager = new BerkeleyDBSeedManager(vsCrawlerContext, initSeedSource, seedKeyResolver, segmentResolver, seedManagerCacheSize);
if (processor == null && seedRouters.isEmpty()) {
processor = new PageDownLoadProcessor();
}
if (processor != null && !seedRouters.isEmpty()) {
throw new IllegalStateException(" seedProcessor and routeProcessor conflict");
}
if (!seedRouters.isEmpty()) {
RouteProcessor routeProcessor = new RouteProcessor();
routeProcessor.addRouters(seedRouters);
processor = routeProcessor;
}
if (pipelineList.isEmpty()) {
pipelineList.add(ConsolePipeline.instance);
}
VSCrawler vsCrawler = new VSCrawler(vsCrawlerContext, crawlerSessionPool, berkeleyDBSeedManager, processor, pipelineList, workerThreadNumber, slowStart, slowStartDuration);
if (loginOnSessionCreate) {
if (userResourceFacade == null) {
userResourceFacade = new DefaultUserResource();
}
}
if (resourceManager == null) {
resourceManager = ResourceManagerFactory.create().build();
}
vsCrawlerContext.setResourceManager(resourceManager);
if (queueStorePlanner == null) {
queueStorePlanner = new RamQueueStorePlanner();
}
vsCrawlerContext.setQueueStorePlanner(queueStorePlanner);
if (defaultResourceSetting == null) {
defaultResourceSetting = ResourceSetting.create().setLock(true);
}
vsCrawlerContext.setResourceSetting(defaultResourceSetting);
if (userResourceFacade != null) {
if (loginHandler == null) {
throw new IllegalStateException("login handler is null ,but open login switch");
}
ResourceQueue resourceQueue = resourceManager.getResourceQueue(vsCrawlerContext.makeUserResourceTag());
if (resourceQueue != null) {
resourceQueue.addResourceLoader(new UserManager2ResourceLoader(userResourceFacade));
} else {
resourceManager.registry(new ResourceQueue(vsCrawlerContext.makeUserResourceTag(), queueStorePlanner, defaultResourceSetting, new UserManager2ResourceLoader(userResourceFacade)));
}
addEventObserver(new AutoLoginPlugin(loginHandler, new UserManager2(resourceManager, vsCrawlerContext)));
}
if (stopWhileTaskEmptyDuration > 0) {
final VSCrawler finalVSCrawler = vsCrawler;
addEventObserver(new ShutDownChecker() {
@Override
public void checkShutDown(VSCrawlerContext vsCrawlerContext1) {
// 15s之后检查活跃线程数,发现为0,证明连续10s都没用任务执行了
if (finalVSCrawler.activeWorker() == 0 && (System.currentTimeMillis() - finalVSCrawler.getLastActiveTime()) > 10000) {
log.info((stopWhileTaskEmptyDuration / 1000) + "秒没收到爬虫任务,自动爬虫关闭器,尝试停止爬虫");
finalVSCrawler.stopCrawler();
}
}
});
addEventObserver(new SeedEmptyEvent() {
@Override
public void onSeedEmpty(VSCrawlerContext vsCrawlerContext1) {
finalVSCrawler.getVsCrawlerContext().getAutoEventRegistry().createDelayEventSender(ShutDownChecker.class, stopWhileTaskEmptyDuration).delegate().checkShutDown(vsCrawlerContext);
}
});
}
if (eventObservers.size() > 0) {
vsCrawler.addCrawlerStartCallBack(new VSCrawler.CrawlerStartCallBack() {
@Override
public void onCrawlerStart(VSCrawler vsCrawler) {
AutoEventRegistry autoEventRegistry = vsCrawler.getVsCrawlerContext().getAutoEventRegistry();
for (Object eventObserver : eventObservers) {
autoEventRegistry.registerObserver(eventObserver);
}
}
});
for (Object observer : eventObservers) {
if (observer instanceof VSCrawler.CrawlerStartCallBack) {
vsCrawler.addCrawlerStartCallBack((VSCrawler.CrawlerStartCallBack) observer);
}
}
}
return vsCrawler;
}
use of com.virjar.vscrawler.core.event.support.AutoEventRegistry in project vscrawler by virjar.
the class VSCrawlerContext method create.
public static VSCrawlerContext create(String crawlerName) {
if (allContext.containsKey(crawlerName)) {
return allContext.get(crawlerName);
}
synchronized (VSCrawlerContext.class) {
if (allContext.containsKey(crawlerName)) {
return allContext.get(crawlerName);
}
VSCrawlerContext vsCrawlerContext = new VSCrawlerContext(crawlerName, new EventLoop());
AutoEventRegistry autoEventRegistry = new AutoEventRegistry(vsCrawlerContext);
vsCrawlerContext.setAutoEventRegistry(autoEventRegistry);
vsCrawlerContext.resolveWorkPath();
vsCrawlerContext.getAutoEventRegistry().registerObserver(vsCrawlerConfigFileWatcher);
allContext.put(crawlerName, vsCrawlerContext);
return vsCrawlerContext;
}
}
use of com.virjar.vscrawler.core.event.support.AutoEventRegistry in project vscrawler by virjar.
the class EventTest method main.
public static void main(String[] args) {
VSCrawlerContext vsCrawlerContext = VSCrawlerContext.create("testCrawler");
vsCrawlerContext.getEventLoop().loop();
AutoEventRegistry eventRegister = vsCrawlerContext.getAutoEventRegistry();
eventRegister.registerObserver(new UserLoginEvent() {
@Override
public void afterUserLogin(VSCrawlerContext vsCrawlerContext1, User user, boolean loginSucces) {
System.out.println(Thread.currentThread() + "用户登录:" + (loginSucces ? "成功" : "失败"));
}
});
UserLoginEvent userLoginEvent = eventRegister.findEventDeclaring(UserLoginEvent.class);
for (int i = 0; i < 10; i++) {
userLoginEvent.afterUserLogin(vsCrawlerContext, null, false);
}
CommonUtil.sleep(20000);
}
Aggregations