use of us.codecraft.webmagic.processor.SimplePageProcessor in project webmagic by code4craft.
the class SpiderTest method testGlobalSpider.
@Ignore
@Test
public void testGlobalSpider() {
// PageProcessor pageProcessor = new MeicanProcessor();
// Spider.me().pipeline(new FilePipeline()).scheduler(new FileCacheQueueScheduler(pageProcessor.getSite(),"/data/temp/webmagic/cache/")).
// processor(pageProcessor).run();
SimplePageProcessor pageProcessor2 = new SimplePageProcessor("http://www.diaoyuweng.com/thread-*-1-1.html");
System.out.println(pageProcessor2.getSite().getCharset());
pageProcessor2.getSite().setSleepTime(500);
Spider.create(pageProcessor2).addUrl("http://www.diaoyuweng.com/home.php?mod=space&uid=88304&do=thread&view=me&type=thread&from=space").addPipeline(new FilePipeline()).scheduler(new FileCacheQueueScheduler("/data/temp/webmagic/cache/")).run();
}
use of us.codecraft.webmagic.processor.SimplePageProcessor in project webmagic by code4craft.
the class SpiderTest method testStartAndStop.
@Ignore("long time")
@Test
public void testStartAndStop() throws InterruptedException {
Spider spider = Spider.create(new SimplePageProcessor("http://www.oschina.net/*")).addPipeline(new Pipeline() {
@Override
public void process(ResultItems resultItems, Task task) {
System.out.println(1);
}
}).thread(1).addUrl("http://www.oschina.net/");
spider.start();
Thread.sleep(10000);
spider.stop();
Thread.sleep(10000);
spider.start();
Thread.sleep(10000);
}
Aggregations