Search in sources :

Example 6 with Spider

use of us.codecraft.webmagic.Spider in project webmagic by code4craft.

the class ZipCodePageProcessor method main.

public static void main(String[] args) {
    Spider spider = Spider.create(new ZipCodePageProcessor()).scheduler(new PriorityScheduler()).addUrl("http://www.ip138.com/post/");
    spider.run();
}
Also used : Spider(us.codecraft.webmagic.Spider) PriorityScheduler(us.codecraft.webmagic.scheduler.PriorityScheduler)

Example 7 with Spider

use of us.codecraft.webmagic.Spider in project webmagic by code4craft.

the class OschinaBlogPageProcesser method main.

public static void main(String[] args) throws JMException {
    Spider spider = Spider.create(new OschinaBlogPageProcesser()).setScheduler(new QueueScheduler().setDuplicateRemover(new BloomFilterDuplicateRemover(2000)));
    SpiderMonitor.instance().register(spider);
    spider.run();
}
Also used : BloomFilterDuplicateRemover(us.codecraft.webmagic.scheduler.BloomFilterDuplicateRemover) Spider(us.codecraft.webmagic.Spider) QueueScheduler(us.codecraft.webmagic.scheduler.QueueScheduler)

Example 8 with Spider

use of us.codecraft.webmagic.Spider in project webmagic by code4craft.

the class BaiduBaikePageProcessor method main.

public static void main(String[] args) {
    //single download
    Spider spider = Spider.create(new BaiduBaikePageProcessor()).thread(2);
    String urlTemplate = "http://baike.baidu.com/search/word?word=%s&pic=1&sug=1&enc=utf8";
    ResultItems resultItems = spider.<ResultItems>get(String.format(urlTemplate, "水力发电"));
    System.out.println(resultItems);
    //multidownload
    List<String> list = new ArrayList<String>();
    list.add(String.format(urlTemplate, "风力发电"));
    list.add(String.format(urlTemplate, "太阳能"));
    list.add(String.format(urlTemplate, "地热发电"));
    list.add(String.format(urlTemplate, "地热发电"));
    List<ResultItems> resultItemses = spider.<ResultItems>getAll(list);
    for (ResultItems resultItemse : resultItemses) {
        System.out.println(resultItemse.getAll());
    }
    spider.close();
}
Also used : ResultItems(us.codecraft.webmagic.ResultItems) Spider(us.codecraft.webmagic.Spider) ArrayList(java.util.ArrayList)

Example 9 with Spider

use of us.codecraft.webmagic.Spider in project webmagic by code4craft.

the class MonitorExample method main.

public static void main(String[] args) throws Exception {
    Spider zhihuSpider = Spider.create(new ZhihuPageProcessor()).addUrl("http://my.oschina.net/flashsword/blog");
    Spider githubSpider = Spider.create(new GithubRepoPageProcessor()).addUrl("https://github.com/code4craft");
    SpiderMonitor.instance().register(zhihuSpider);
    SpiderMonitor.instance().register(githubSpider);
    zhihuSpider.start();
    githubSpider.start();
}
Also used : GithubRepoPageProcessor(us.codecraft.webmagic.processor.example.GithubRepoPageProcessor) Spider(us.codecraft.webmagic.Spider) ZhihuPageProcessor(us.codecraft.webmagic.processor.example.ZhihuPageProcessor)

Example 10 with Spider

use of us.codecraft.webmagic.Spider in project webmagic by code4craft.

the class SpiderMonitor method register.

/**
     * Register spider for monitor.
     *
     * @param spiders spiders
     * @return this
     * @throws JMException JMException
     */
public synchronized SpiderMonitor register(Spider... spiders) throws JMException {
    for (Spider spider : spiders) {
        MonitorSpiderListener monitorSpiderListener = new MonitorSpiderListener();
        if (spider.getSpiderListeners() == null) {
            List<SpiderListener> spiderListeners = new ArrayList<SpiderListener>();
            spiderListeners.add(monitorSpiderListener);
            spider.setSpiderListeners(spiderListeners);
        } else {
            spider.getSpiderListeners().add(monitorSpiderListener);
        }
        SpiderStatusMXBean spiderStatusMBean = getSpiderStatusMBean(spider, monitorSpiderListener);
        registerMBean(spiderStatusMBean);
        spiderStatuses.add(spiderStatusMBean);
    }
    return this;
}
Also used : SpiderListener(us.codecraft.webmagic.SpiderListener) Spider(us.codecraft.webmagic.Spider) ArrayList(java.util.ArrayList)

Aggregations

Spider (us.codecraft.webmagic.Spider)10 ArrayList (java.util.ArrayList)3 ResultItems (us.codecraft.webmagic.ResultItems)3 Test (org.junit.Test)2 Task (us.codecraft.webmagic.Task)2 GithubRepoPageProcessor (us.codecraft.webmagic.processor.example.GithubRepoPageProcessor)2 ZhihuPageProcessor (us.codecraft.webmagic.processor.example.ZhihuPageProcessor)2 Page (us.codecraft.webmagic.Page)1 Site (us.codecraft.webmagic.Site)1 SpiderListener (us.codecraft.webmagic.SpiderListener)1 OOSpider (us.codecraft.webmagic.model.OOSpider)1 SpiderMonitor (us.codecraft.webmagic.monitor.SpiderMonitor)1 PageModelPipeline (us.codecraft.webmagic.pipeline.PageModelPipeline)1 Pipeline (us.codecraft.webmagic.pipeline.Pipeline)1 PageProcessor (us.codecraft.webmagic.processor.PageProcessor)1 BaiduBaikePageProcessor (us.codecraft.webmagic.processor.example.BaiduBaikePageProcessor)1 BloomFilterDuplicateRemover (us.codecraft.webmagic.scheduler.BloomFilterDuplicateRemover)1 PriorityScheduler (us.codecraft.webmagic.scheduler.PriorityScheduler)1 QueueScheduler (us.codecraft.webmagic.scheduler.QueueScheduler)1