Search in sources :

Example 1 with CrawlerBean

use of com.virjar.vscrawler.web.model.CrawlerBean in project vscrawler by virjar.

the class VSCrawlerClassLoader method loadCrawler.

/**
 * @param crawlerEntryName 爬虫入口类,应该是com.virjar.vscrawler.web.crawler.CrawlerBuilder的实现类
 * @return 由入口类构造的一个爬虫对象
 * @see CrawlerBuilder
 */
public CrawlerBean loadCrawler(String crawlerEntryName, WebApplicationContext webApplicationContext) throws InstantiationException, IllegalAccessException {
    // check
    try {
        CrawlerBuilder crawlerBuilder = (CrawlerBuilder) loadClass(crawlerEntryName).newInstance();
        if (crawlerBuilder instanceof SpringContextAware) {
            SpringContextAware springContextAware = (SpringContextAware) crawlerBuilder;
            springContextAware.init4SpringContext(webApplicationContext);
        }
        // for spring bean auto injection
        injectDependency(crawlerBuilder, true, webApplicationContext);
        VSCrawler vsCrawler = crawlerBuilder.build();
        return new CrawlerBean(vsCrawler, true, this);
    } catch (ClassNotFoundException e) {
    // this exception will not happen
    }
    return null;
}
Also used : VSCrawler(com.virjar.vscrawler.core.VSCrawler) SpringContextAware(com.virjar.vscrawler.web.api.SpringContextAware) CrawlerBuilder(com.virjar.vscrawler.web.api.CrawlerBuilder) CrawlerBean(com.virjar.vscrawler.web.model.CrawlerBean)

Example 2 with CrawlerBean

use of com.virjar.vscrawler.web.model.CrawlerBean in project vscrawler by virjar.

the class VSCrawlerManager method loadHotJar.

private void loadHotJar(File dir) {
    if (!dir.exists()) {
        return;
    }
    File[] files = dir.listFiles(new FilenameFilter() {

        @Override
        public boolean accept(File dir, String name) {
            return StringUtils.endsWith(name, ".jar");
        }
    });
    if (files == null) {
        return;
    }
    for (File file : files) {
        if (file.isDirectory()) {
            continue;
        }
        try {
            CrawlerBean crawlerBean = loadJarFile(file);
            if (crawlerBean == null) {
                continue;
            }
            String crawlerName = crawlerBean.getCrawler().getVsCrawlerContext().getCrawlerName();
            if (allCrawler.containsKey(crawlerName)) {
                throw new IllegalStateException("duplicate crawler defined :" + crawlerName);
            }
            allCrawler.put(crawlerName, crawlerBean);
        } catch (Exception e) {
            log.error("error when load jar file,this crawler will be ignore", e);
        }
    }
}
Also used : JarFile(java.util.jar.JarFile) ZipFile(java.util.zip.ZipFile) MultipartFile(org.springframework.web.multipart.MultipartFile) CrawlerBean(com.virjar.vscrawler.web.model.CrawlerBean)

Example 3 with CrawlerBean

use of com.virjar.vscrawler.web.model.CrawlerBean in project vscrawler by virjar.

the class VSCrawlerManager method reloadJar.

public void reloadJar(MultipartFile multipartFile) throws Exception {
    String fileName = multipartFile.getName();
    if (StringUtils.isBlank(fileName)) {
        fileName = multipartFile.getOriginalFilename();
    }
    if (StringUtils.isBlank(fileName)) {
        fileName = String.valueOf(System.currentTimeMillis()) + ".jar";
    }
    File hotJarDir = new File(calcHotJarDir());
    Set<String> existFileSign = Sets.newHashSet();
    Set<String> existFileNames = Sets.newHashSet();
    // load all exits crawler, to avoid duplicate move
    for (File jarFile : hotJarDir.listFiles(new FilenameFilter() {

        @Override
        public boolean accept(File dir, String name) {
            return StringUtils.endsWith(name, ".jar");
        }
    })) {
        existFileSign.add(getFileSign(jarFile));
        existFileNames.add(jarFile.getName());
    }
    fileName = PathResolver.getFileName(fileName);
    File targetFile = judgeCopyTargetFile(fileName, existFileNames, hotJarDir);
    multipartFile.transferTo(targetFile);
    if (existFileSign.contains(getFileSign(targetFile))) {
        deleteJarIfJarIllegal(targetFile);
        return;
    }
    try {
        // scan and load crawler
        CrawlerBean crawlerBean = loadJarFile(targetFile);
        if (crawlerBean == null) {
            throw new IllegalStateException("not crawler defined in this jar file");
        }
        // stop old crawler if necessary
        String crawlerName = crawlerBean.getCrawler().getVsCrawlerContext().getCrawlerName();
        CrawlerBean oldVSCrawler = allCrawler.get(crawlerName);
        if (oldVSCrawler != null) {
            if (!oldVSCrawler.isReloadable()) {
                throw new IllegalStateException("can not reload crawler " + crawlerName + " ,this crawler defined in servlet context,not defined in vscrawler context ");
            }
            // 这里可能比较耗时
            oldVSCrawler.getCrawler().stopCrawler();
            deleteJarIfJarIllegal(oldVSCrawler.relatedJarFile());
        }
        // register new crawler
        allCrawler.put(crawlerName, crawlerBean);
    } catch (Exception e) {
        deleteJarIfJarIllegal(targetFile);
        throw e;
    }
}
Also used : JarFile(java.util.jar.JarFile) ZipFile(java.util.zip.ZipFile) MultipartFile(org.springframework.web.multipart.MultipartFile) CrawlerBean(com.virjar.vscrawler.web.model.CrawlerBean)

Example 4 with CrawlerBean

use of com.virjar.vscrawler.web.model.CrawlerBean in project vscrawler by virjar.

the class VSCrawlerManager method init.

private synchronized void init() {
    if (hasInit) {
        return;
    }
    // cannot auto inject by spring framework,if there no implementations ,a exception will be throw
    Map<String, CrawlerBuilder> beansOfType = webApplicationContext.getBeansOfType(CrawlerBuilder.class);
    crawlerBuilderList.addAll(beansOfType.values());
    // load system crawler
    for (CrawlerBuilder crawlerBuilder : crawlerBuilderList) {
        VSCrawler vsCrawler = crawlerBuilder.build();
        allCrawler.put(vsCrawler.getVsCrawlerContext().getCrawlerName(), new CrawlerBean(vsCrawler));
    }
    // load jar file
    // find jar file root dir
    File jarDir = new File(calcHotJarDir());
    moveEmbedCrawler(jarDir);
    loadHotJar(jarDir);
    hasInit = true;
}
Also used : VSCrawler(com.virjar.vscrawler.core.VSCrawler) CrawlerBuilder(com.virjar.vscrawler.web.api.CrawlerBuilder) CrawlerBean(com.virjar.vscrawler.web.model.CrawlerBean) JarFile(java.util.jar.JarFile) ZipFile(java.util.zip.ZipFile) MultipartFile(org.springframework.web.multipart.MultipartFile)

Aggregations

CrawlerBean (com.virjar.vscrawler.web.model.CrawlerBean)4 JarFile (java.util.jar.JarFile)3 ZipFile (java.util.zip.ZipFile)3 MultipartFile (org.springframework.web.multipart.MultipartFile)3 VSCrawler (com.virjar.vscrawler.core.VSCrawler)2 CrawlerBuilder (com.virjar.vscrawler.web.api.CrawlerBuilder)2 SpringContextAware (com.virjar.vscrawler.web.api.SpringContextAware)1