Search in sources :

Example 1 with IntervalControlHelper

use of org.codelibs.fess.helper.IntervalControlHelper in project fess by codelibs.

the class IndexUpdater method getAccessResultList.

private List<EsAccessResult> getAccessResultList(final Consumer<SearchRequestBuilder> cb, final long cleanupTime) {
    if (logger.isDebugEnabled()) {
        logger.debug("Getting documents in IndexUpdater queue.");
    }
    final long execTime = System.currentTimeMillis();
    final List<EsAccessResult> arList = ((EsDataService) dataService).getAccessResultList(cb);
    final FessConfig fessConfig = ComponentUtil.getFessConfig();
    if (!arList.isEmpty()) {
        final long commitMarginTime = fessConfig.getIndexerWebfsCommitMarginTimeAsInteger().longValue();
        for (final AccessResult<?> ar : arList.toArray(new AccessResult[arList.size()])) {
            if (ar.getCreateTime().longValue() > execTime - commitMarginTime) {
                arList.remove(ar);
            }
        }
    }
    final long totalHits = ((EsResultList<EsAccessResult>) arList).getTotalHits();
    if (logger.isInfoEnabled()) {
        final StringBuilder buf = new StringBuilder(100);
        buf.append("Processing ");
        if (totalHits > 0) {
            buf.append(arList.size()).append('/').append(totalHits).append(" docs (Doc:{access ");
        } else {
            buf.append("no docs in indexing queue (Doc:{access ");
        }
        buf.append(System.currentTimeMillis() - execTime).append("ms");
        if (cleanupTime >= 0) {
            buf.append(", cleanup ").append(cleanupTime).append("ms");
        }
        buf.append("}, ");
        buf.append(MemoryUtil.getMemoryUsageLog());
        buf.append(')');
        logger.info(buf.toString());
    }
    final long unprocessedDocumentSize = fessConfig.getIndexerUnprocessedDocumentSizeAsInteger().longValue();
    final IntervalControlHelper intervalControlHelper = ComponentUtil.getIntervalControlHelper();
    if (totalHits > unprocessedDocumentSize && intervalControlHelper.isCrawlerRunning()) {
        if (logger.isInfoEnabled()) {
            logger.info("Stopped all crawler threads. You have {} (>{}) unprocessed docs.", totalHits, unprocessedDocumentSize);
        }
        intervalControlHelper.setCrawlerRunning(false);
    }
    return arList;
}
Also used : EsAccessResult(org.codelibs.fess.crawler.entity.EsAccessResult) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig) EsDataService(org.codelibs.fess.crawler.service.impl.EsDataService) EsResultList(org.codelibs.fess.crawler.util.EsResultList) IntervalControlHelper(org.codelibs.fess.helper.IntervalControlHelper)

Example 2 with IntervalControlHelper

use of org.codelibs.fess.helper.IntervalControlHelper in project fess by codelibs.

the class IndexUpdater method run.

@Override
public void run() {
    if (dataService == null) {
        throw new FessSystemException("DataService is null.");
    }
    if (logger.isDebugEnabled()) {
        logger.debug("Starting indexUpdater.");
    }
    executeTime = 0;
    documentSize = 0;
    final FessConfig fessConfig = ComponentUtil.getFessConfig();
    final long updateInterval = fessConfig.getIndexerWebfsUpdateIntervalAsInteger().longValue();
    final int maxEmptyListCount = fessConfig.getIndexerWebfsMaxEmptyListCountAsInteger();
    final IntervalControlHelper intervalControlHelper = ComponentUtil.getIntervalControlHelper();
    try {
        final Consumer<SearchRequestBuilder> cb = builder -> {
            final QueryBuilder queryBuilder = QueryBuilders.boolQuery().filter(QueryBuilders.termsQuery(EsAccessResult.SESSION_ID, sessionIdList)).filter(QueryBuilders.termQuery(EsAccessResult.STATUS, org.codelibs.fess.crawler.Constants.OK_STATUS));
            builder.setQuery(queryBuilder);
            builder.setFrom(0);
            final int maxDocumentCacheSize = fessConfig.getIndexerWebfsMaxDocumentCacheSizeAsInteger();
            builder.setSize(maxDocumentCacheSize <= 0 ? 1 : maxDocumentCacheSize);
            builder.addSort(EsAccessResult.CREATE_TIME, SortOrder.ASC);
        };
        final DocList docList = new DocList();
        final List<EsAccessResult> accessResultList = new ArrayList<>();
        long updateTime = System.currentTimeMillis();
        int errorCount = 0;
        int emptyListCount = 0;
        long cleanupTime = -1;
        while (!finishCrawling || !accessResultList.isEmpty()) {
            try {
                final int sessionIdListSize = finishedSessionIdList.size();
                intervalControlHelper.setCrawlerRunning(true);
                updateTime = System.currentTimeMillis() - updateTime;
                final long interval = updateInterval - updateTime;
                if (interval > 0) {
                    // sleep
                    // 10 sec (default)
                    ThreadUtil.sleep(interval);
                }
                systemHelper.calibrateCpuLoad();
                docList.clear();
                accessResultList.clear();
                intervalControlHelper.delayByRules();
                if (logger.isDebugEnabled()) {
                    logger.debug("Processing documents in IndexUpdater queue.");
                }
                updateTime = System.currentTimeMillis();
                List<EsAccessResult> arList = getAccessResultList(cb, cleanupTime);
                if (arList.isEmpty()) {
                    emptyListCount++;
                } else {
                    // reset
                    emptyListCount = 0;
                }
                long hitCount = ((EsResultList<EsAccessResult>) arList).getTotalHits();
                while (hitCount > 0) {
                    if (arList.isEmpty()) {
                        ThreadUtil.sleep(fessConfig.getIndexerWebfsCommitMarginTimeAsInteger().longValue());
                        cleanupTime = -1;
                    } else {
                        processAccessResults(docList, accessResultList, arList);
                        cleanupTime = cleanupAccessResults(accessResultList);
                    }
                    arList = getAccessResultList(cb, cleanupTime);
                    hitCount = ((EsResultList<EsAccessResult>) arList).getTotalHits();
                }
                if (!docList.isEmpty()) {
                    indexingHelper.sendDocuments(searchEngineClient, docList);
                }
                synchronized (finishedSessionIdList) {
                    if (sessionIdListSize != 0 && sessionIdListSize == finishedSessionIdList.size()) {
                        cleanupFinishedSessionData();
                    }
                }
                executeTime += System.currentTimeMillis() - updateTime;
                if (logger.isDebugEnabled()) {
                    logger.debug("Processed documents in IndexUpdater queue.");
                }
                // reset count
                errorCount = 0;
            } catch (final Exception e) {
                if (errorCount > maxErrorCount) {
                    throw e;
                }
                errorCount++;
                logger.warn("Failed to access data. Retry to access it {} times.", errorCount, e);
            } finally {
                if (systemHelper.isForceStop()) {
                    finishCrawling = true;
                    if (logger.isDebugEnabled()) {
                        logger.debug("Stopped indexUpdater.");
                    }
                }
            }
            if (emptyListCount >= maxEmptyListCount) {
                if (logger.isInfoEnabled()) {
                    logger.info("Terminating indexUpdater. emptyListCount is over {}.", maxEmptyListCount);
                }
                // terminate crawling
                finishCrawling = true;
                forceStop();
                if (fessConfig.getIndexerThreadDumpEnabledAsBoolean()) {
                    ThreadDumpUtil.printThreadDump();
                }
                org.codelibs.fess.exec.Crawler.addError("QueueTimeout");
            }
            if (!ComponentUtil.available()) {
                logger.info("IndexUpdater is terminated.");
                forceStop();
                break;
            }
        }
        if (logger.isDebugEnabled()) {
            logger.debug("Finished indexUpdater.");
        }
    } catch (final ContainerNotAvailableException e) {
        if (logger.isDebugEnabled()) {
            logger.error("IndexUpdater is terminated.", e);
        } else if (logger.isInfoEnabled()) {
            logger.info("IndexUpdater is terminated.");
        }
        forceStop();
    } catch (final Throwable t) {
        if (ComponentUtil.available()) {
            logger.error("IndexUpdater is terminated.", t);
        } else if (logger.isDebugEnabled()) {
            logger.error("IndexUpdater is terminated.", t);
            org.codelibs.fess.exec.Crawler.addError(t.getClass().getSimpleName());
        } else if (logger.isInfoEnabled()) {
            logger.info("IndexUpdater is terminated.");
            org.codelibs.fess.exec.Crawler.addError(t.getClass().getSimpleName());
        }
        forceStop();
    } finally {
        intervalControlHelper.setCrawlerRunning(true);
    }
    if (logger.isInfoEnabled()) {
        logger.info("[EXEC TIME] index update time: {}ms", executeTime);
    }
}
Also used : ThreadUtil(org.codelibs.core.lang.ThreadUtil) Constants(org.codelibs.fess.Constants) MemoryUtil(org.codelibs.fess.util.MemoryUtil) IndexingHelper(org.codelibs.fess.helper.IndexingHelper) FessSystemException(org.codelibs.fess.exception.FessSystemException) DataService(org.codelibs.fess.crawler.service.DataService) EsDataService(org.codelibs.fess.crawler.service.impl.EsDataService) SearchEngineClient(org.codelibs.fess.es.client.SearchEngineClient) Transformer(org.codelibs.fess.crawler.transformer.Transformer) ArrayList(java.util.ArrayList) PreDestroy(javax.annotation.PreDestroy) IngestFactory(org.codelibs.fess.ingest.IngestFactory) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig) SortOrder(org.opensearch.search.sort.SortOrder) EsAccessResult(org.codelibs.fess.crawler.entity.EsAccessResult) EsUrlQueue(org.codelibs.fess.crawler.entity.EsUrlQueue) Map(java.util.Map) AccessResultData(org.codelibs.fess.crawler.entity.AccessResultData) FavoriteLogBhv(org.codelibs.fess.es.log.exbhv.FavoriteLogBhv) IntervalControlHelper(org.codelibs.fess.helper.IntervalControlHelper) SearchRequestBuilder(org.opensearch.action.search.SearchRequestBuilder) UrlFilterService(org.codelibs.fess.crawler.service.UrlFilterService) Crawler(org.codelibs.fess.crawler.Crawler) QueryBuilders(org.opensearch.index.query.QueryBuilders) ClickLogBhv(org.codelibs.fess.es.log.exbhv.ClickLogBhv) ContainerNotAvailableException(org.codelibs.fess.exception.ContainerNotAvailableException) Resource(javax.annotation.Resource) StringUtil(org.codelibs.core.lang.StringUtil) Consumer(java.util.function.Consumer) UrlQueueService(org.codelibs.fess.crawler.service.UrlQueueService) List(java.util.List) Logger(org.apache.logging.log4j.Logger) QueryBuilder(org.opensearch.index.query.QueryBuilder) SearchLogHelper(org.codelibs.fess.helper.SearchLogHelper) ComponentUtil(org.codelibs.fess.util.ComponentUtil) SystemHelper(org.codelibs.fess.helper.SystemHelper) ThreadDumpUtil(org.codelibs.fess.util.ThreadDumpUtil) PostConstruct(javax.annotation.PostConstruct) AccessResult(org.codelibs.fess.crawler.entity.AccessResult) DocList(org.codelibs.fess.util.DocList) LogManager(org.apache.logging.log4j.LogManager) Ingester(org.codelibs.fess.ingest.Ingester) EsResultList(org.codelibs.fess.crawler.util.EsResultList) ContainerNotAvailableException(org.codelibs.fess.exception.ContainerNotAvailableException) SearchRequestBuilder(org.opensearch.action.search.SearchRequestBuilder) ArrayList(java.util.ArrayList) QueryBuilder(org.opensearch.index.query.QueryBuilder) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig) FessSystemException(org.codelibs.fess.exception.FessSystemException) ContainerNotAvailableException(org.codelibs.fess.exception.ContainerNotAvailableException) FessSystemException(org.codelibs.fess.exception.FessSystemException) EsAccessResult(org.codelibs.fess.crawler.entity.EsAccessResult) DocList(org.codelibs.fess.util.DocList) IntervalControlHelper(org.codelibs.fess.helper.IntervalControlHelper) EsResultList(org.codelibs.fess.crawler.util.EsResultList)

Example 3 with IntervalControlHelper

use of org.codelibs.fess.helper.IntervalControlHelper in project fess by codelibs.

the class FessIntervalController method delayForWaitingNewUrl.

@Override
protected void delayForWaitingNewUrl() {
    ComponentUtil.getSystemHelper().calibrateCpuLoad();
    try {
        final IntervalControlHelper intervalControlHelper = ComponentUtil.getIntervalControlHelper();
        intervalControlHelper.checkCrawlerStatus();
        intervalControlHelper.delayByRules();
    } catch (final Exception e) {
    }
    super.delayForWaitingNewUrl();
}
Also used : IntervalControlHelper(org.codelibs.fess.helper.IntervalControlHelper)

Aggregations

IntervalControlHelper (org.codelibs.fess.helper.IntervalControlHelper)3 EsAccessResult (org.codelibs.fess.crawler.entity.EsAccessResult)2 EsDataService (org.codelibs.fess.crawler.service.impl.EsDataService)2 EsResultList (org.codelibs.fess.crawler.util.EsResultList)2 FessConfig (org.codelibs.fess.mylasta.direction.FessConfig)2 ArrayList (java.util.ArrayList)1 List (java.util.List)1 Map (java.util.Map)1 Consumer (java.util.function.Consumer)1 PostConstruct (javax.annotation.PostConstruct)1 PreDestroy (javax.annotation.PreDestroy)1 Resource (javax.annotation.Resource)1 LogManager (org.apache.logging.log4j.LogManager)1 Logger (org.apache.logging.log4j.Logger)1 StringUtil (org.codelibs.core.lang.StringUtil)1 ThreadUtil (org.codelibs.core.lang.ThreadUtil)1 Constants (org.codelibs.fess.Constants)1 Crawler (org.codelibs.fess.crawler.Crawler)1 AccessResult (org.codelibs.fess.crawler.entity.AccessResult)1 AccessResultData (org.codelibs.fess.crawler.entity.AccessResultData)1