Search in sources :

Example 1 with AutoEventRegistry

use of com.virjar.vscrawler.core.event.support.AutoEventRegistry in project vscrawler by virjar.

the class VSCrawlerBuilder method build.

public VSCrawler build() {
    final VSCrawlerContext vsCrawlerContext = VSCrawlerContext.create(crawlerName);
    if (crawlerHttpClientGenerator == null) {
        crawlerHttpClientGenerator = new DefaultHttpClientGenerator();
    }
    if (proxyStrategy == null) {
        proxyStrategy = ProxyStrategy.NONE;
    }
    if (proxyStrategy == ProxyStrategy.CUSTOM && proxyPlanner == null) {
        throw new IllegalStateException("proxyPlanner must exist if proxyStrategy is custom");
    }
    CrawlerSessionPool crawlerSessionPool = new CrawlerSessionPool(vsCrawlerContext, crawlerHttpClientGenerator, proxyStrategy, ipPool, proxyPlanner, sessionPoolMaxSize, sessionPoolCoreSize, sessionPoolInitialSize, sessionPoolReuseDuration, sessionPoolMaxOnlineDuration, autoCreateSession);
    if (initSeedSource == null) {
        initSeedSource = new LocalFileSeedSource();
    }
    if (seedKeyResolver == null) {
        seedKeyResolver = new DefaultSeedKeyResolver();
    }
    if (segmentResolver == null) {
        segmentResolver = new DefaultSegmentResolver();
    }
    BerkeleyDBSeedManager berkeleyDBSeedManager = new BerkeleyDBSeedManager(vsCrawlerContext, initSeedSource, seedKeyResolver, segmentResolver, seedManagerCacheSize);
    if (processor == null && seedRouters.isEmpty()) {
        processor = new PageDownLoadProcessor();
    }
    if (processor != null && !seedRouters.isEmpty()) {
        throw new IllegalStateException(" seedProcessor and routeProcessor conflict");
    }
    if (!seedRouters.isEmpty()) {
        RouteProcessor routeProcessor = new RouteProcessor();
        routeProcessor.addRouters(seedRouters);
        processor = routeProcessor;
    }
    if (pipelineList.isEmpty()) {
        pipelineList.add(ConsolePipeline.instance);
    }
    VSCrawler vsCrawler = new VSCrawler(vsCrawlerContext, crawlerSessionPool, berkeleyDBSeedManager, processor, pipelineList, workerThreadNumber, slowStart, slowStartDuration);
    if (loginOnSessionCreate) {
        if (userResourceFacade == null) {
            userResourceFacade = new DefaultUserResource();
        }
    }
    if (resourceManager == null) {
        resourceManager = ResourceManagerFactory.create().build();
    }
    vsCrawlerContext.setResourceManager(resourceManager);
    if (queueStorePlanner == null) {
        queueStorePlanner = new RamQueueStorePlanner();
    }
    vsCrawlerContext.setQueueStorePlanner(queueStorePlanner);
    if (defaultResourceSetting == null) {
        defaultResourceSetting = ResourceSetting.create().setLock(true);
    }
    vsCrawlerContext.setResourceSetting(defaultResourceSetting);
    if (userResourceFacade != null) {
        if (loginHandler == null) {
            throw new IllegalStateException("login handler is null ,but open login switch");
        }
        ResourceQueue resourceQueue = resourceManager.getResourceQueue(vsCrawlerContext.makeUserResourceTag());
        if (resourceQueue != null) {
            resourceQueue.addResourceLoader(new UserManager2ResourceLoader(userResourceFacade));
        } else {
            resourceManager.registry(new ResourceQueue(vsCrawlerContext.makeUserResourceTag(), queueStorePlanner, defaultResourceSetting, new UserManager2ResourceLoader(userResourceFacade)));
        }
        addEventObserver(new AutoLoginPlugin(loginHandler, new UserManager2(resourceManager, vsCrawlerContext)));
    }
    if (stopWhileTaskEmptyDuration > 0) {
        final VSCrawler finalVSCrawler = vsCrawler;
        addEventObserver(new ShutDownChecker() {

            @Override
            public void checkShutDown(VSCrawlerContext vsCrawlerContext1) {
                // 15s之后检查活跃线程数,发现为0,证明连续10s都没用任务执行了
                if (finalVSCrawler.activeWorker() == 0 && (System.currentTimeMillis() - finalVSCrawler.getLastActiveTime()) > 10000) {
                    log.info((stopWhileTaskEmptyDuration / 1000) + "秒没收到爬虫任务,自动爬虫关闭器,尝试停止爬虫");
                    finalVSCrawler.stopCrawler();
                }
            }
        });
        addEventObserver(new SeedEmptyEvent() {

            @Override
            public void onSeedEmpty(VSCrawlerContext vsCrawlerContext1) {
                finalVSCrawler.getVsCrawlerContext().getAutoEventRegistry().createDelayEventSender(ShutDownChecker.class, stopWhileTaskEmptyDuration).delegate().checkShutDown(vsCrawlerContext);
            }
        });
    }
    if (eventObservers.size() > 0) {
        vsCrawler.addCrawlerStartCallBack(new VSCrawler.CrawlerStartCallBack() {

            @Override
            public void onCrawlerStart(VSCrawler vsCrawler) {
                AutoEventRegistry autoEventRegistry = vsCrawler.getVsCrawlerContext().getAutoEventRegistry();
                for (Object eventObserver : eventObservers) {
                    autoEventRegistry.registerObserver(eventObserver);
                }
            }
        });
        for (Object observer : eventObservers) {
            if (observer instanceof VSCrawler.CrawlerStartCallBack) {
                vsCrawler.addCrawlerStartCallBack((VSCrawler.CrawlerStartCallBack) observer);
            }
        }
    }
    return vsCrawler;
}
Also used : ShutDownChecker(com.virjar.vscrawler.core.event.systemevent.ShutDownChecker) PageDownLoadProcessor(com.virjar.vscrawler.core.processor.PageDownLoadProcessor) RamQueueStorePlanner(com.virjar.vscrawler.core.resourcemanager.storage.ram.RamQueueStorePlanner) SeedEmptyEvent(com.virjar.vscrawler.core.event.systemevent.SeedEmptyEvent) DefaultHttpClientGenerator(com.virjar.vscrawler.core.net.DefaultHttpClientGenerator) CrawlerSessionPool(com.virjar.vscrawler.core.net.session.CrawlerSessionPool) RouteProcessor(com.virjar.vscrawler.core.processor.RouteProcessor) BindRouteProcessor(com.virjar.vscrawler.core.processor.BindRouteProcessor) ResourceQueue(com.virjar.vscrawler.core.resourcemanager.ResourceQueue) AutoEventRegistry(com.virjar.vscrawler.core.event.support.AutoEventRegistry)

Example 2 with AutoEventRegistry

use of com.virjar.vscrawler.core.event.support.AutoEventRegistry in project vscrawler by virjar.

the class VSCrawlerContext method create.

public static VSCrawlerContext create(String crawlerName) {
    if (allContext.containsKey(crawlerName)) {
        return allContext.get(crawlerName);
    }
    synchronized (VSCrawlerContext.class) {
        if (allContext.containsKey(crawlerName)) {
            return allContext.get(crawlerName);
        }
        VSCrawlerContext vsCrawlerContext = new VSCrawlerContext(crawlerName, new EventLoop());
        AutoEventRegistry autoEventRegistry = new AutoEventRegistry(vsCrawlerContext);
        vsCrawlerContext.setAutoEventRegistry(autoEventRegistry);
        vsCrawlerContext.resolveWorkPath();
        vsCrawlerContext.getAutoEventRegistry().registerObserver(vsCrawlerConfigFileWatcher);
        allContext.put(crawlerName, vsCrawlerContext);
        return vsCrawlerContext;
    }
}
Also used : EventLoop(com.virjar.vscrawler.core.event.EventLoop) AutoEventRegistry(com.virjar.vscrawler.core.event.support.AutoEventRegistry)

Example 3 with AutoEventRegistry

use of com.virjar.vscrawler.core.event.support.AutoEventRegistry in project vscrawler by virjar.

the class EventTest method main.

public static void main(String[] args) {
    VSCrawlerContext vsCrawlerContext = VSCrawlerContext.create("testCrawler");
    vsCrawlerContext.getEventLoop().loop();
    AutoEventRegistry eventRegister = vsCrawlerContext.getAutoEventRegistry();
    eventRegister.registerObserver(new UserLoginEvent() {

        @Override
        public void afterUserLogin(VSCrawlerContext vsCrawlerContext1, User user, boolean loginSucces) {
            System.out.println(Thread.currentThread() + "用户登录:" + (loginSucces ? "成功" : "失败"));
        }
    });
    UserLoginEvent userLoginEvent = eventRegister.findEventDeclaring(UserLoginEvent.class);
    for (int i = 0; i < 10; i++) {
        userLoginEvent.afterUserLogin(vsCrawlerContext, null, false);
    }
    CommonUtil.sleep(20000);
}
Also used : VSCrawlerContext(com.virjar.vscrawler.core.VSCrawlerContext) User(com.virjar.vscrawler.core.net.user.User) UserLoginEvent(com.virjar.vscrawler.core.event.systemevent.UserLoginEvent) AutoEventRegistry(com.virjar.vscrawler.core.event.support.AutoEventRegistry)

Aggregations

AutoEventRegistry (com.virjar.vscrawler.core.event.support.AutoEventRegistry)3 VSCrawlerContext (com.virjar.vscrawler.core.VSCrawlerContext)1 EventLoop (com.virjar.vscrawler.core.event.EventLoop)1 SeedEmptyEvent (com.virjar.vscrawler.core.event.systemevent.SeedEmptyEvent)1 ShutDownChecker (com.virjar.vscrawler.core.event.systemevent.ShutDownChecker)1 UserLoginEvent (com.virjar.vscrawler.core.event.systemevent.UserLoginEvent)1 DefaultHttpClientGenerator (com.virjar.vscrawler.core.net.DefaultHttpClientGenerator)1 CrawlerSessionPool (com.virjar.vscrawler.core.net.session.CrawlerSessionPool)1 User (com.virjar.vscrawler.core.net.user.User)1 BindRouteProcessor (com.virjar.vscrawler.core.processor.BindRouteProcessor)1 PageDownLoadProcessor (com.virjar.vscrawler.core.processor.PageDownLoadProcessor)1 RouteProcessor (com.virjar.vscrawler.core.processor.RouteProcessor)1 ResourceQueue (com.virjar.vscrawler.core.resourcemanager.ResourceQueue)1 RamQueueStorePlanner (com.virjar.vscrawler.core.resourcemanager.storage.ram.RamQueueStorePlanner)1