Search in sources :

Example 1 with DuplicateHostHelper

use of org.codelibs.fess.helper.DuplicateHostHelper in project fess by codelibs.

the class Crawler method doCrawl.

public int doCrawl(final Options options) {
    if (logger.isInfoEnabled()) {
        logger.info("Starting Crawler..");
    }
    final PathMappingHelper pathMappingHelper = ComponentUtil.getPathMappingHelper();
    final long totalTime = System.currentTimeMillis();
    final CrawlingInfoHelper crawlingInfoHelper = ComponentUtil.getCrawlingInfoHelper();
    boolean completed = false;
    try {
        writeTimeToSessionInfo(crawlingInfoHelper, Constants.CRAWLER_START_TIME);
        // setup path mapping
        final List<String> ptList = new ArrayList<>();
        ptList.add(Constants.PROCESS_TYPE_CRAWLING);
        ptList.add(Constants.PROCESS_TYPE_BOTH);
        pathMappingHelper.setPathMappingList(options.sessionId, pathMappingService.getPathMappingList(ptList));
        // duplicate host
        try {
            final DuplicateHostHelper duplicateHostHelper = ComponentUtil.getDuplicateHostHelper();
            duplicateHostHelper.init();
        } catch (final Exception e) {
            logger.warn("Could not initialize duplicateHostHelper.", e);
        }
        // delete expired sessions
        crawlingInfoService.deleteSessionIdsBefore(options.sessionId, options.name, ComponentUtil.getSystemHelper().getCurrentTimeAsLong());
        final List<String> webConfigIdList = options.getWebConfigIdList();
        final List<String> fileConfigIdList = options.getFileConfigIdList();
        final List<String> dataConfigIdList = options.getDataConfigIdList();
        final boolean runAll = webConfigIdList == null && fileConfigIdList == null && dataConfigIdList == null;
        Thread webFsCrawlerThread = null;
        Thread dataCrawlerThread = null;
        if (runAll || webConfigIdList != null || fileConfigIdList != null) {
            webFsCrawlerThread = new Thread((Runnable) () -> {
                writeTimeToSessionInfo(crawlingInfoHelper, Constants.WEB_FS_CRAWLER_START_TIME);
                webFsIndexHelper.crawl(options.sessionId, webConfigIdList, fileConfigIdList);
                writeTimeToSessionInfo(crawlingInfoHelper, Constants.WEB_FS_CRAWLER_END_TIME);
            }, WEB_FS_CRAWLING_PROCESS);
            webFsCrawlerThread.start();
        }
        if (runAll || dataConfigIdList != null) {
            dataCrawlerThread = new Thread((Runnable) () -> {
                writeTimeToSessionInfo(crawlingInfoHelper, Constants.DATA_CRAWLER_START_TIME);
                dataIndexHelper.crawl(options.sessionId, dataConfigIdList);
                writeTimeToSessionInfo(crawlingInfoHelper, Constants.DATA_CRAWLER_END_TIME);
            }, DATA_CRAWLING_PROCESS);
            dataCrawlerThread.start();
        }
        joinCrawlerThread(webFsCrawlerThread);
        joinCrawlerThread(dataCrawlerThread);
        if (logger.isInfoEnabled()) {
            logger.info("Finished Crawler");
        }
        completed = true;
        return Constants.EXIT_OK;
    } catch (final Throwable t) {
        logger.warn("An exception occurs on the crawl task.", t);
        return Constants.EXIT_FAIL;
    } finally {
        pathMappingHelper.removePathMappingList(options.sessionId);
        crawlingInfoHelper.putToInfoMap(Constants.CRAWLER_STATUS, completed ? Constants.T.toString() : Constants.F.toString());
        writeTimeToSessionInfo(crawlingInfoHelper, Constants.CRAWLER_END_TIME);
        crawlingInfoHelper.putToInfoMap(Constants.CRAWLER_EXEC_TIME, Long.toString(System.currentTimeMillis() - totalTime));
    }
}
Also used : CrawlingInfoHelper(org.codelibs.fess.helper.CrawlingInfoHelper) ArrayList(java.util.ArrayList) DuplicateHostHelper(org.codelibs.fess.helper.DuplicateHostHelper) PathMappingHelper(org.codelibs.fess.helper.PathMappingHelper) ContainerNotAvailableException(org.codelibs.fess.exception.ContainerNotAvailableException) IOException(java.io.IOException) CmdLineException(org.kohsuke.args4j.CmdLineException)

Aggregations

IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 ContainerNotAvailableException (org.codelibs.fess.exception.ContainerNotAvailableException)1 CrawlingInfoHelper (org.codelibs.fess.helper.CrawlingInfoHelper)1 DuplicateHostHelper (org.codelibs.fess.helper.DuplicateHostHelper)1 PathMappingHelper (org.codelibs.fess.helper.PathMappingHelper)1 CmdLineException (org.kohsuke.args4j.CmdLineException)1