use of org.codelibs.fess.helper.DuplicateHostHelper in project fess by codelibs.
the class Crawler method doCrawl.
public int doCrawl(final Options options) {
if (logger.isInfoEnabled()) {
logger.info("Starting Crawler..");
}
final PathMappingHelper pathMappingHelper = ComponentUtil.getPathMappingHelper();
final long totalTime = System.currentTimeMillis();
final CrawlingInfoHelper crawlingInfoHelper = ComponentUtil.getCrawlingInfoHelper();
boolean completed = false;
try {
writeTimeToSessionInfo(crawlingInfoHelper, Constants.CRAWLER_START_TIME);
// setup path mapping
final List<String> ptList = new ArrayList<>();
ptList.add(Constants.PROCESS_TYPE_CRAWLING);
ptList.add(Constants.PROCESS_TYPE_BOTH);
pathMappingHelper.setPathMappingList(options.sessionId, pathMappingService.getPathMappingList(ptList));
// duplicate host
try {
final DuplicateHostHelper duplicateHostHelper = ComponentUtil.getDuplicateHostHelper();
duplicateHostHelper.init();
} catch (final Exception e) {
logger.warn("Could not initialize duplicateHostHelper.", e);
}
// delete expired sessions
crawlingInfoService.deleteSessionIdsBefore(options.sessionId, options.name, ComponentUtil.getSystemHelper().getCurrentTimeAsLong());
final List<String> webConfigIdList = options.getWebConfigIdList();
final List<String> fileConfigIdList = options.getFileConfigIdList();
final List<String> dataConfigIdList = options.getDataConfigIdList();
final boolean runAll = webConfigIdList == null && fileConfigIdList == null && dataConfigIdList == null;
Thread webFsCrawlerThread = null;
Thread dataCrawlerThread = null;
if (runAll || webConfigIdList != null || fileConfigIdList != null) {
webFsCrawlerThread = new Thread((Runnable) () -> {
writeTimeToSessionInfo(crawlingInfoHelper, Constants.WEB_FS_CRAWLER_START_TIME);
webFsIndexHelper.crawl(options.sessionId, webConfigIdList, fileConfigIdList);
writeTimeToSessionInfo(crawlingInfoHelper, Constants.WEB_FS_CRAWLER_END_TIME);
}, WEB_FS_CRAWLING_PROCESS);
webFsCrawlerThread.start();
}
if (runAll || dataConfigIdList != null) {
dataCrawlerThread = new Thread((Runnable) () -> {
writeTimeToSessionInfo(crawlingInfoHelper, Constants.DATA_CRAWLER_START_TIME);
dataIndexHelper.crawl(options.sessionId, dataConfigIdList);
writeTimeToSessionInfo(crawlingInfoHelper, Constants.DATA_CRAWLER_END_TIME);
}, DATA_CRAWLING_PROCESS);
dataCrawlerThread.start();
}
joinCrawlerThread(webFsCrawlerThread);
joinCrawlerThread(dataCrawlerThread);
if (logger.isInfoEnabled()) {
logger.info("Finished Crawler");
}
completed = true;
return Constants.EXIT_OK;
} catch (final Throwable t) {
logger.warn("An exception occurs on the crawl task.", t);
return Constants.EXIT_FAIL;
} finally {
pathMappingHelper.removePathMappingList(options.sessionId);
crawlingInfoHelper.putToInfoMap(Constants.CRAWLER_STATUS, completed ? Constants.T.toString() : Constants.F.toString());
writeTimeToSessionInfo(crawlingInfoHelper, Constants.CRAWLER_END_TIME);
crawlingInfoHelper.putToInfoMap(Constants.CRAWLER_EXEC_TIME, Long.toString(System.currentTimeMillis() - totalTime));
}
}
Aggregations