use of com.virjar.vscrawler.web.model.CrawlerBean in project vscrawler by virjar.
the class VSCrawlerClassLoader method loadCrawler.
/**
* @param crawlerEntryName 爬虫入口类,应该是com.virjar.vscrawler.web.crawler.CrawlerBuilder的实现类
* @return 由入口类构造的一个爬虫对象
* @see CrawlerBuilder
*/
public CrawlerBean loadCrawler(String crawlerEntryName, WebApplicationContext webApplicationContext) throws InstantiationException, IllegalAccessException {
// check
try {
CrawlerBuilder crawlerBuilder = (CrawlerBuilder) loadClass(crawlerEntryName).newInstance();
if (crawlerBuilder instanceof SpringContextAware) {
SpringContextAware springContextAware = (SpringContextAware) crawlerBuilder;
springContextAware.init4SpringContext(webApplicationContext);
}
// for spring bean auto injection
injectDependency(crawlerBuilder, true, webApplicationContext);
VSCrawler vsCrawler = crawlerBuilder.build();
return new CrawlerBean(vsCrawler, true, this);
} catch (ClassNotFoundException e) {
// this exception will not happen
}
return null;
}
use of com.virjar.vscrawler.web.model.CrawlerBean in project vscrawler by virjar.
the class VSCrawlerManager method loadHotJar.
private void loadHotJar(File dir) {
if (!dir.exists()) {
return;
}
File[] files = dir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return StringUtils.endsWith(name, ".jar");
}
});
if (files == null) {
return;
}
for (File file : files) {
if (file.isDirectory()) {
continue;
}
try {
CrawlerBean crawlerBean = loadJarFile(file);
if (crawlerBean == null) {
continue;
}
String crawlerName = crawlerBean.getCrawler().getVsCrawlerContext().getCrawlerName();
if (allCrawler.containsKey(crawlerName)) {
throw new IllegalStateException("duplicate crawler defined :" + crawlerName);
}
allCrawler.put(crawlerName, crawlerBean);
} catch (Exception e) {
log.error("error when load jar file,this crawler will be ignore", e);
}
}
}
use of com.virjar.vscrawler.web.model.CrawlerBean in project vscrawler by virjar.
the class VSCrawlerManager method reloadJar.
public void reloadJar(MultipartFile multipartFile) throws Exception {
String fileName = multipartFile.getName();
if (StringUtils.isBlank(fileName)) {
fileName = multipartFile.getOriginalFilename();
}
if (StringUtils.isBlank(fileName)) {
fileName = String.valueOf(System.currentTimeMillis()) + ".jar";
}
File hotJarDir = new File(calcHotJarDir());
Set<String> existFileSign = Sets.newHashSet();
Set<String> existFileNames = Sets.newHashSet();
// load all exits crawler, to avoid duplicate move
for (File jarFile : hotJarDir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return StringUtils.endsWith(name, ".jar");
}
})) {
existFileSign.add(getFileSign(jarFile));
existFileNames.add(jarFile.getName());
}
fileName = PathResolver.getFileName(fileName);
File targetFile = judgeCopyTargetFile(fileName, existFileNames, hotJarDir);
multipartFile.transferTo(targetFile);
if (existFileSign.contains(getFileSign(targetFile))) {
deleteJarIfJarIllegal(targetFile);
return;
}
try {
// scan and load crawler
CrawlerBean crawlerBean = loadJarFile(targetFile);
if (crawlerBean == null) {
throw new IllegalStateException("not crawler defined in this jar file");
}
// stop old crawler if necessary
String crawlerName = crawlerBean.getCrawler().getVsCrawlerContext().getCrawlerName();
CrawlerBean oldVSCrawler = allCrawler.get(crawlerName);
if (oldVSCrawler != null) {
if (!oldVSCrawler.isReloadable()) {
throw new IllegalStateException("can not reload crawler " + crawlerName + " ,this crawler defined in servlet context,not defined in vscrawler context ");
}
// 这里可能比较耗时
oldVSCrawler.getCrawler().stopCrawler();
deleteJarIfJarIllegal(oldVSCrawler.relatedJarFile());
}
// register new crawler
allCrawler.put(crawlerName, crawlerBean);
} catch (Exception e) {
deleteJarIfJarIllegal(targetFile);
throw e;
}
}
use of com.virjar.vscrawler.web.model.CrawlerBean in project vscrawler by virjar.
the class VSCrawlerManager method init.
private synchronized void init() {
if (hasInit) {
return;
}
// cannot auto inject by spring framework,if there no implementations ,a exception will be throw
Map<String, CrawlerBuilder> beansOfType = webApplicationContext.getBeansOfType(CrawlerBuilder.class);
crawlerBuilderList.addAll(beansOfType.values());
// load system crawler
for (CrawlerBuilder crawlerBuilder : crawlerBuilderList) {
VSCrawler vsCrawler = crawlerBuilder.build();
allCrawler.put(vsCrawler.getVsCrawlerContext().getCrawlerName(), new CrawlerBean(vsCrawler));
}
// load jar file
// find jar file root dir
File jarDir = new File(calcHotJarDir());
moveEmbedCrawler(jarDir);
loadHotJar(jarDir);
hasInit = true;
}
Aggregations