Search in sources :

Example 6 with WebConfig

use of org.codelibs.fess.es.config.exentity.WebConfig in project fess by codelibs.

the class AdminWebconfigAction method getWebConfig.

public static OptionalEntity<WebConfig> getWebConfig(final CreateForm form) {
    final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
    final String username = systemHelper.getUsername();
    final long currentTime = systemHelper.getCurrentTimeAsLong();
    return getEntity(form, username, currentTime).map(entity -> {
        entity.setUpdatedBy(username);
        entity.setUpdatedTime(currentTime);
        copyBeanToBean(form, entity, op -> op.exclude(Stream.concat(Stream.of(Constants.COMMON_CONVERSION_RULE), Stream.of(Constants.PERMISSIONS)).toArray(n -> new String[n])));
        final PermissionHelper permissionHelper = ComponentUtil.getPermissionHelper();
        entity.setPermissions(split(form.permissions, "\n").get(stream -> stream.map(s -> permissionHelper.encode(s)).filter(StringUtil::isNotBlank).distinct().toArray(n -> new String[n])));
        return entity;
    });
}
Also used : Constants(org.codelibs.fess.Constants) WebConfigService(org.codelibs.fess.app.service.WebConfigService) OptionalThing(org.dbflute.optional.OptionalThing) PermissionHelper(org.codelibs.fess.helper.PermissionHelper) ActionRuntime(org.lastaflute.web.ruts.process.ActionRuntime) RenderDataUtil(org.codelibs.fess.util.RenderDataUtil) StreamUtil.split(org.codelibs.core.stream.StreamUtil.split) CrudMode(org.codelibs.fess.app.web.CrudMode) FessAdminAction(org.codelibs.fess.app.web.base.FessAdminAction) RenderData(org.lastaflute.web.response.render.RenderData) RoleTypeService(org.codelibs.fess.app.service.RoleTypeService) WebConfig(org.codelibs.fess.es.config.exentity.WebConfig) HtmlResponse(org.lastaflute.web.response.HtmlResponse) StreamUtil.stream(org.codelibs.core.stream.StreamUtil.stream) OptionalEntity(org.dbflute.optional.OptionalEntity) Resource(javax.annotation.Resource) StringUtil(org.codelibs.core.lang.StringUtil) Collectors(java.util.stream.Collectors) LabelTypeService(org.codelibs.fess.app.service.LabelTypeService) Stream(java.util.stream.Stream) ComponentUtil(org.codelibs.fess.util.ComponentUtil) ScheduledJobService(org.codelibs.fess.app.service.ScheduledJobService) SystemHelper(org.codelibs.fess.helper.SystemHelper) WebConfigPager(org.codelibs.fess.app.pager.WebConfigPager) Execute(org.lastaflute.web.Execute) SystemHelper(org.codelibs.fess.helper.SystemHelper) PermissionHelper(org.codelibs.fess.helper.PermissionHelper) StringUtil(org.codelibs.core.lang.StringUtil)

Example 7 with WebConfig

use of org.codelibs.fess.es.config.exentity.WebConfig in project fess by codelibs.

the class AdminReqheaderAction method registerWebConfigItems.

protected void registerWebConfigItems(final RenderData data) {
    final List<Map<String, String>> itemList = new ArrayList<>();
    final List<WebConfig> webConfigList = webConfigService.getAllWebConfigList(false, false, false, null);
    for (final WebConfig webConfig : webConfigList) {
        itemList.add(createItem(webConfig.getName(), webConfig.getId().toString()));
    }
    RenderDataUtil.register(data, "webConfigItems", itemList);
}
Also used : ArrayList(java.util.ArrayList) WebConfig(org.codelibs.fess.es.config.exentity.WebConfig) HashMap(java.util.HashMap) Map(java.util.Map)

Example 8 with WebConfig

use of org.codelibs.fess.es.config.exentity.WebConfig in project fess by codelibs.

the class FessXpathTransformerTest method test_transform.

public void test_transform() throws Exception {
    String data = "<html><head><title>Test</title></head><body><h1>Header1</h1><p>This is a pen.</p></body></html>";
    final FessXpathTransformer fessXpathTransformer = new FessXpathTransformer();
    fessXpathTransformer.init();
    SingletonLaContainerFactory.getContainer().register(CrawlingInfoHelper.class, "crawlingInfoHelper");
    SingletonLaContainerFactory.getContainer().register(PathMappingHelper.class, "pathMappingHelper");
    SingletonLaContainerFactory.getContainer().register(CrawlingConfigHelper.class, "crawlingConfigHelper");
    SingletonLaContainerFactory.getContainer().register(SystemHelper.class, "systemHelper");
    SingletonLaContainerFactory.getContainer().register(FileTypeHelper.class, "fileTypeHelper");
    SingletonLaContainerFactory.getContainer().register(DocumentHelper.class, "documentHelper");
    SingletonLaContainerFactory.getContainer().register(LabelTypeHelper.class, "labelTypeHelper");
    WebConfig webConfig = new WebConfig();
    setValueToObject(webConfig, "labelTypeList", new ArrayList<LabelType>());
    ComponentUtil.getCrawlingConfigHelper().store("test", webConfig);
    setValueToObject(ComponentUtil.getLabelTypeHelper(), "labelTypePatternList", new ArrayList<LabelTypePattern>());
    for (int i = 0; i < 10000; i++) {
        if (i % 1000 == 0) {
            logger.info(MemoryUtil.getMemoryUsageLog() + ":" + i);
            System.gc();
        }
        ResponseData responseData = new ResponseData();
        responseData.setCharSet("UTF-8");
        responseData.setContentLength(data.length());
        responseData.setExecutionTime(1000L);
        responseData.setHttpStatusCode(200);
        responseData.setLastModified(new Date());
        responseData.setMethod("GET");
        responseData.setMimeType("text/html");
        responseData.setParentUrl("http://fess.codelibs.org/");
        responseData.setResponseBody(data.getBytes());
        responseData.setSessionId("test-1");
        responseData.setStatus(0);
        responseData.setUrl("http://fess.codelibs.org/test.html");
        ResultData resultData = fessXpathTransformer.transform(responseData);
    // System.out.println(resultData.toString());
    }
    System.gc();
    Thread.sleep(1000L);
    logger.info(MemoryUtil.getMemoryUsageLog());
    assertTrue(MemoryUtil.getUsedMemory() < 100000000L);
}
Also used : ResultData(org.codelibs.fess.crawler.entity.ResultData) LabelType(org.codelibs.fess.es.config.exentity.LabelType) LabelTypePattern(org.codelibs.fess.helper.LabelTypeHelper.LabelTypePattern) ResponseData(org.codelibs.fess.crawler.entity.ResponseData) WebConfig(org.codelibs.fess.es.config.exentity.WebConfig) Date(java.util.Date)

Example 9 with WebConfig

use of org.codelibs.fess.es.config.exentity.WebConfig in project fess by codelibs.

the class WebFsIndexHelper method doCrawl.

protected void doCrawl(final String sessionId, final List<WebConfig> webConfigList, final List<FileConfig> fileConfigList) {
    final int multiprocessCrawlingCount = ComponentUtil.getFessConfig().getCrawlingThreadCount();
    final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
    final FessConfig fessConfig = ComponentUtil.getFessConfig();
    final long startTime = System.currentTimeMillis();
    final List<String> sessionIdList = new ArrayList<>();
    crawlerList.clear();
    final List<String> crawlerStatusList = new ArrayList<>();
    // Web
    for (final WebConfig webConfig : webConfigList) {
        final String sid = crawlingConfigHelper.store(sessionId, webConfig);
        // create crawler
        final Crawler crawler = ComponentUtil.getComponent(Crawler.class);
        crawler.setSessionId(sid);
        sessionIdList.add(sid);
        final String urlsStr = webConfig.getUrls();
        if (StringUtil.isBlank(urlsStr)) {
            logger.warn("No target urls. Skipped");
            break;
        }
        // interval time
        final int intervalTime = webConfig.getIntervalTime() != null ? webConfig.getIntervalTime() : Constants.DEFAULT_INTERVAL_TIME_FOR_WEB;
        ((FessIntervalController) crawler.getIntervalController()).setDelayMillisForWaitingNewUrl(intervalTime);
        final String includedUrlsStr = webConfig.getIncludedUrls() != null ? webConfig.getIncludedUrls() : StringUtil.EMPTY;
        final String excludedUrlsStr = webConfig.getExcludedUrls() != null ? webConfig.getExcludedUrls() : StringUtil.EMPTY;
        // num of threads
        final CrawlerContext crawlerContext = crawler.getCrawlerContext();
        final int numOfThread = webConfig.getNumOfThread() != null ? webConfig.getNumOfThread() : Constants.DEFAULT_NUM_OF_THREAD_FOR_WEB;
        crawlerContext.setNumOfThread(numOfThread);
        // depth
        final int depth = webConfig.getDepth() != null ? webConfig.getDepth() : -1;
        crawlerContext.setMaxDepth(depth);
        // max count
        final long maxCount = webConfig.getMaxAccessCount() != null ? webConfig.getMaxAccessCount() : maxAccessCount;
        crawlerContext.setMaxAccessCount(maxCount);
        webConfig.initializeClientFactory(crawler.getClientFactory());
        final Map<String, String> configParamMap = webConfig.getConfigParameterMap(ConfigName.CONFIG);
        if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_ALL))) {
            deleteCrawlData(sid);
        } else if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_FILTERS))) {
            final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class);
            try {
                urlFilterService.delete(sid);
            } catch (final Exception e) {
                logger.warn("Failed to delete url filters for " + sid);
            }
        }
        // set urls
        final String[] urls = urlsStr.split("[\r\n]");
        for (final String u : urls) {
            if (StringUtil.isNotBlank(u)) {
                final String urlValue = u.trim();
                if (!urlValue.startsWith("#") && fessConfig.isValidCrawlerWebProtocol(u)) {
                    crawler.addUrl(urlValue);
                    if (logger.isInfoEnabled()) {
                        logger.info("Target URL: " + urlValue);
                    }
                }
            }
        }
        // set included urls
        final String[] includedUrls = includedUrlsStr.split("[\r\n]");
        for (final String u : includedUrls) {
            if (StringUtil.isNotBlank(u)) {
                final String urlValue = u.trim();
                if (!urlValue.startsWith("#")) {
                    crawler.addIncludeFilter(urlValue);
                    if (logger.isInfoEnabled()) {
                        logger.info("Included URL: " + urlValue);
                    }
                }
            }
        }
        // set excluded urls
        final String[] excludedUrls = excludedUrlsStr.split("[\r\n]");
        for (final String u : excludedUrls) {
            if (StringUtil.isNotBlank(u)) {
                final String urlValue = u.trim();
                if (!urlValue.startsWith("#")) {
                    crawler.addExcludeFilter(urlValue);
                    if (logger.isInfoEnabled()) {
                        logger.info("Excluded URL: " + urlValue);
                    }
                }
            }
        }
        // failure url
        final List<String> excludedUrlList = failureUrlService.getExcludedUrlList(webConfig.getConfigId());
        for (final String u : excludedUrlList) {
            if (StringUtil.isNotBlank(u)) {
                final String urlValue = u.trim();
                crawler.addExcludeFilter(urlValue);
                if (logger.isInfoEnabled()) {
                    logger.info("Excluded URL from failures: " + urlValue);
                }
            }
        }
        if (logger.isDebugEnabled()) {
            logger.debug("Crawling " + urlsStr);
        }
        crawler.setBackground(true);
        crawler.setThreadPriority(crawlerPriority);
        crawlerList.add(crawler);
        crawlerStatusList.add(Constants.READY);
    }
    // File
    for (final FileConfig fileConfig : fileConfigList) {
        final String sid = crawlingConfigHelper.store(sessionId, fileConfig);
        // create crawler
        final Crawler crawler = ComponentUtil.getComponent(Crawler.class);
        crawler.setSessionId(sid);
        sessionIdList.add(sid);
        final String pathsStr = fileConfig.getPaths();
        if (StringUtil.isBlank(pathsStr)) {
            logger.warn("No target uris. Skipped");
            break;
        }
        final int intervalTime = fileConfig.getIntervalTime() != null ? fileConfig.getIntervalTime() : Constants.DEFAULT_INTERVAL_TIME_FOR_FS;
        ((FessIntervalController) crawler.getIntervalController()).setDelayMillisForWaitingNewUrl(intervalTime);
        final String includedPathsStr = fileConfig.getIncludedPaths() != null ? fileConfig.getIncludedPaths() : StringUtil.EMPTY;
        final String excludedPathsStr = fileConfig.getExcludedPaths() != null ? fileConfig.getExcludedPaths() : StringUtil.EMPTY;
        // num of threads
        final CrawlerContext crawlerContext = crawler.getCrawlerContext();
        final int numOfThread = fileConfig.getNumOfThread() != null ? fileConfig.getNumOfThread() : Constants.DEFAULT_NUM_OF_THREAD_FOR_FS;
        crawlerContext.setNumOfThread(numOfThread);
        // depth
        final int depth = fileConfig.getDepth() != null ? fileConfig.getDepth() : -1;
        crawlerContext.setMaxDepth(depth);
        // max count
        final long maxCount = fileConfig.getMaxAccessCount() != null ? fileConfig.getMaxAccessCount() : maxAccessCount;
        crawlerContext.setMaxAccessCount(maxCount);
        fileConfig.initializeClientFactory(crawler.getClientFactory());
        final Map<String, String> configParamMap = fileConfig.getConfigParameterMap(ConfigName.CONFIG);
        if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_ALL))) {
            deleteCrawlData(sid);
        } else if (Constants.TRUE.equalsIgnoreCase(configParamMap.get(Constants.CONFIG_CLEANUP_FILTERS))) {
            final EsUrlFilterService urlFilterService = ComponentUtil.getComponent(EsUrlFilterService.class);
            try {
                urlFilterService.delete(sid);
            } catch (final Exception e) {
                logger.warn("Failed to delete url filters for " + sid);
            }
        }
        // set paths
        final String[] paths = pathsStr.split("[\r\n]");
        for (String u : paths) {
            if (StringUtil.isNotBlank(u)) {
                u = u.trim();
                if (!u.startsWith("#")) {
                    if (!fessConfig.isValidCrawlerFileProtocol(u)) {
                        if (u.startsWith("/")) {
                            u = "file:" + u;
                        } else {
                            u = "file:/" + u;
                        }
                    }
                    crawler.addUrl(u);
                    if (logger.isInfoEnabled()) {
                        logger.info("Target Path: " + u);
                    }
                }
            }
        }
        // set included paths
        boolean urlEncodeDisabled = false;
        final String[] includedPaths = includedPathsStr.split("[\r\n]");
        for (final String u : includedPaths) {
            if (StringUtil.isNotBlank(u)) {
                final String line = u.trim();
                if (!line.startsWith("#")) {
                    final String urlValue;
                    if (urlEncodeDisabled) {
                        urlValue = line;
                        urlEncodeDisabled = false;
                    } else {
                        urlValue = systemHelper.encodeUrlFilter(line);
                    }
                    crawler.addIncludeFilter(urlValue);
                    if (logger.isInfoEnabled()) {
                        logger.info("Included Path: " + urlValue);
                    }
                } else if (line.startsWith("#DISABLE_URL_ENCODE")) {
                    urlEncodeDisabled = true;
                }
            }
        }
        // set excluded paths
        urlEncodeDisabled = false;
        final String[] excludedPaths = excludedPathsStr.split("[\r\n]");
        for (final String u : excludedPaths) {
            if (StringUtil.isNotBlank(u)) {
                final String line = u.trim();
                if (!line.startsWith("#")) {
                    final String urlValue;
                    if (urlEncodeDisabled) {
                        urlValue = line;
                        urlEncodeDisabled = false;
                    } else {
                        urlValue = systemHelper.encodeUrlFilter(line);
                    }
                    crawler.addExcludeFilter(urlValue);
                    if (logger.isInfoEnabled()) {
                        logger.info("Excluded Path: " + urlValue);
                    }
                } else if (line.startsWith("#DISABLE_URL_ENCODE")) {
                    urlEncodeDisabled = true;
                }
            }
        }
        // failure url
        final List<String> excludedUrlList = failureUrlService.getExcludedUrlList(fileConfig.getConfigId());
        if (excludedUrlList != null) {
            for (final String u : excludedUrlList) {
                if (StringUtil.isNotBlank(u)) {
                    final String urlValue = u.trim();
                    crawler.addExcludeFilter(urlValue);
                    if (logger.isInfoEnabled()) {
                        logger.info("Excluded Path from failures: " + urlValue);
                    }
                }
            }
        }
        if (logger.isDebugEnabled()) {
            logger.debug("Crawling " + pathsStr);
        }
        crawler.setBackground(true);
        crawler.setThreadPriority(crawlerPriority);
        crawlerList.add(crawler);
        crawlerStatusList.add(Constants.READY);
    }
    // run index update
    final IndexUpdater indexUpdater = ComponentUtil.getIndexUpdater();
    indexUpdater.setName("IndexUpdater");
    indexUpdater.setPriority(indexUpdaterPriority);
    indexUpdater.setSessionIdList(sessionIdList);
    indexUpdater.setDaemon(true);
    indexUpdater.setCrawlerList(crawlerList);
    boostDocumentRuleService.getAvailableBoostDocumentRuleList().forEach(rule -> {
        indexUpdater.addDocBoostMatcher(new org.codelibs.fess.indexer.DocBoostMatcher(rule));
    });
    indexUpdater.start();
    int startedCrawlerNum = 0;
    int activeCrawlerNum = 0;
    while (startedCrawlerNum < crawlerList.size()) {
        // Force to stop crawl
        if (systemHelper.isForceStop()) {
            for (final Crawler crawler : crawlerList) {
                crawler.stop();
            }
            break;
        }
        if (activeCrawlerNum < multiprocessCrawlingCount) {
            // start crawling
            crawlerList.get(startedCrawlerNum).execute();
            crawlerStatusList.set(startedCrawlerNum, Constants.RUNNING);
            startedCrawlerNum++;
            activeCrawlerNum++;
            try {
                Thread.sleep(crawlingExecutionInterval);
            } catch (final InterruptedException e) {
                if (logger.isDebugEnabled()) {
                    logger.debug("Interrupted.", e);
                }
            }
            continue;
        }
        // check status
        for (int i = 0; i < startedCrawlerNum; i++) {
            if (crawlerList.get(i).getCrawlerContext().getStatus() == CrawlerStatus.DONE && crawlerStatusList.get(i).equals(Constants.RUNNING)) {
                crawlerList.get(i).awaitTermination();
                crawlerStatusList.set(i, Constants.DONE);
                final String sid = crawlerList.get(i).getCrawlerContext().getSessionId();
                indexUpdater.addFinishedSessionId(sid);
                activeCrawlerNum--;
            }
        }
        try {
            Thread.sleep(crawlingExecutionInterval);
        } catch (final InterruptedException e) {
            if (logger.isDebugEnabled()) {
                logger.debug("Interrupted.", e);
            }
        }
    }
    boolean finishedAll = false;
    while (!finishedAll) {
        finishedAll = true;
        for (int i = 0; i < crawlerList.size(); i++) {
            crawlerList.get(i).awaitTermination(crawlingExecutionInterval);
            if (crawlerList.get(i).getCrawlerContext().getStatus() == CrawlerStatus.DONE && !crawlerStatusList.get(i).equals(Constants.DONE)) {
                crawlerStatusList.set(i, Constants.DONE);
                final String sid = crawlerList.get(i).getCrawlerContext().getSessionId();
                indexUpdater.addFinishedSessionId(sid);
            }
            if (!crawlerStatusList.get(i).equals(Constants.DONE)) {
                finishedAll = false;
            }
        }
    }
    crawlerList.clear();
    crawlerStatusList.clear();
    // put cralwing info
    final CrawlingInfoHelper crawlingInfoHelper = ComponentUtil.getCrawlingInfoHelper();
    final long execTime = System.currentTimeMillis() - startTime;
    crawlingInfoHelper.putToInfoMap(Constants.WEB_FS_CRAWLING_EXEC_TIME, Long.toString(execTime));
    if (logger.isInfoEnabled()) {
        logger.info("[EXEC TIME] crawling time: " + execTime + "ms");
    }
    indexUpdater.setFinishCrawling(true);
    try {
        indexUpdater.join();
    } catch (final InterruptedException e) {
        logger.warn("Interrupted index update.", e);
    }
    crawlingInfoHelper.putToInfoMap(Constants.WEB_FS_INDEX_EXEC_TIME, Long.toString(indexUpdater.getExecuteTime()));
    crawlingInfoHelper.putToInfoMap(Constants.WEB_FS_INDEX_SIZE, Long.toString(indexUpdater.getDocumentSize()));
    if (systemHelper.isForceStop()) {
        return;
    }
    for (final String sid : sessionIdList) {
        // remove config
        crawlingConfigHelper.remove(sid);
        deleteCrawlData(sid);
    }
}
Also used : FileConfig(org.codelibs.fess.es.config.exentity.FileConfig) ArrayList(java.util.ArrayList) WebConfig(org.codelibs.fess.es.config.exentity.WebConfig) FessIntervalController(org.codelibs.fess.crawler.interval.FessIntervalController) Crawler(org.codelibs.fess.crawler.Crawler) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig) CrawlerContext(org.codelibs.fess.crawler.CrawlerContext) EsUrlFilterService(org.codelibs.fess.crawler.service.impl.EsUrlFilterService) IndexUpdater(org.codelibs.fess.indexer.IndexUpdater)

Aggregations

WebConfig (org.codelibs.fess.es.config.exentity.WebConfig)9 Execute (org.lastaflute.web.Execute)5 Resource (javax.annotation.Resource)4 WebConfigPager (org.codelibs.fess.app.pager.WebConfigPager)4 WebConfigService (org.codelibs.fess.app.service.WebConfigService)4 ArrayList (java.util.ArrayList)3 List (java.util.List)3 Collectors (java.util.stream.Collectors)3 CrudMode (org.codelibs.fess.app.web.CrudMode)3 AdminWebconfigAction.getWebConfig (org.codelibs.fess.app.web.admin.webconfig.AdminWebconfigAction.getWebConfig)3 ApiResult (org.codelibs.fess.app.web.api.ApiResult)3 HashMap (java.util.HashMap)2 Map (java.util.Map)2 StringUtil (org.codelibs.core.lang.StringUtil)2 Constants (org.codelibs.fess.Constants)2 ScheduledJobService (org.codelibs.fess.app.service.ScheduledJobService)2 ApiConfigResponse (org.codelibs.fess.app.web.api.ApiResult.ApiConfigResponse)2 ApiResponse (org.codelibs.fess.app.web.api.ApiResult.ApiResponse)2 ApiUpdateResponse (org.codelibs.fess.app.web.api.ApiResult.ApiUpdateResponse)2 Status (org.codelibs.fess.app.web.api.ApiResult.Status)2