Search in sources :

Example 1 with DocList

use of org.codelibs.fess.util.DocList in project fess by codelibs.

the class IndexUpdater method run.

@Override
public void run() {
    if (dataService == null) {
        throw new FessSystemException("DataService is null.");
    }
    if (logger.isDebugEnabled()) {
        logger.debug("Starting indexUpdater.");
    }
    executeTime = 0;
    documentSize = 0;
    final FessConfig fessConfig = ComponentUtil.getFessConfig();
    final long updateInterval = fessConfig.getIndexerWebfsUpdateIntervalAsInteger().longValue();
    final int maxEmptyListCount = fessConfig.getIndexerWebfsMaxEmptyListCountAsInteger().intValue();
    final IntervalControlHelper intervalControlHelper = ComponentUtil.getIntervalControlHelper();
    try {
        final Consumer<SearchRequestBuilder> cb = builder -> {
            final QueryBuilder queryBuilder = QueryBuilders.boolQuery().filter(QueryBuilders.termsQuery(EsAccessResult.SESSION_ID, sessionIdList)).filter(QueryBuilders.termQuery(EsAccessResult.STATUS, org.codelibs.fess.crawler.Constants.OK_STATUS));
            builder.setQuery(queryBuilder);
            builder.setFrom(0);
            final int maxDocumentCacheSize = fessConfig.getIndexerWebfsMaxDocumentCacheSizeAsInteger().intValue();
            builder.setSize(maxDocumentCacheSize <= 0 ? 1 : maxDocumentCacheSize);
            builder.addSort(EsAccessResult.CREATE_TIME, SortOrder.ASC);
        };
        final DocList docList = new DocList();
        final List<EsAccessResult> accessResultList = new ArrayList<>();
        long updateTime = System.currentTimeMillis();
        int errorCount = 0;
        int emptyListCount = 0;
        long cleanupTime = -1;
        while (!finishCrawling || !accessResultList.isEmpty()) {
            try {
                final int sessionIdListSize = finishedSessionIdList.size();
                intervalControlHelper.setCrawlerRunning(true);
                updateTime = System.currentTimeMillis() - updateTime;
                final long interval = updateInterval - updateTime;
                if (interval > 0) {
                    // sleep
                    try {
                        // 10 sec (default)
                        Thread.sleep(interval);
                    } catch (final InterruptedException e) {
                        logger.warn("Interrupted index update.", e);
                    }
                }
                docList.clear();
                accessResultList.clear();
                intervalControlHelper.delayByRules();
                if (logger.isDebugEnabled()) {
                    logger.debug("Processing documents in IndexUpdater queue.");
                }
                updateTime = System.currentTimeMillis();
                List<EsAccessResult> arList = getAccessResultList(cb, cleanupTime);
                if (arList.isEmpty()) {
                    emptyListCount++;
                } else {
                    // reset
                    emptyListCount = 0;
                }
                long hitCount = ((EsResultList<EsAccessResult>) arList).getTotalHits();
                while (hitCount > 0) {
                    if (arList.isEmpty()) {
                        try {
                            Thread.sleep(fessConfig.getIndexerWebfsCommitMarginTimeAsInteger().longValue());
                        } catch (final Exception e) {
                        // ignore
                        }
                        cleanupTime = -1;
                    } else {
                        processAccessResults(docList, accessResultList, arList);
                        cleanupTime = cleanupAccessResults(accessResultList);
                    }
                    arList = getAccessResultList(cb, cleanupTime);
                    hitCount = ((EsResultList<EsAccessResult>) arList).getTotalHits();
                }
                if (!docList.isEmpty()) {
                    indexingHelper.sendDocuments(fessEsClient, docList);
                }
                synchronized (finishedSessionIdList) {
                    if (sessionIdListSize != 0 && sessionIdListSize == finishedSessionIdList.size()) {
                        cleanupFinishedSessionData();
                    }
                }
                executeTime += System.currentTimeMillis() - updateTime;
                if (logger.isDebugEnabled()) {
                    logger.debug("Processed documents in IndexUpdater queue.");
                }
                // reset count
                errorCount = 0;
            } catch (final Exception e) {
                if (errorCount > maxErrorCount) {
                    throw e;
                }
                errorCount++;
                logger.warn("Failed to access data. Retry to access.. " + errorCount, e);
            } finally {
                if (systemHelper.isForceStop()) {
                    finishCrawling = true;
                    if (logger.isDebugEnabled()) {
                        logger.debug("Stopped indexUpdater.");
                    }
                }
            }
            if (emptyListCount >= maxEmptyListCount) {
                if (logger.isInfoEnabled()) {
                    logger.info("Terminating indexUpdater. " + "emptyListCount is over " + maxEmptyListCount + ".");
                }
                // terminate crawling
                finishCrawling = true;
                forceStop();
                if (fessConfig.getIndexerThreadDumpEnabledAsBoolean()) {
                    printThreadDump();
                }
            }
            if (!ComponentUtil.available()) {
                logger.info("IndexUpdater is terminated.");
                forceStop();
                break;
            }
        }
        if (logger.isDebugEnabled()) {
            logger.debug("Finished indexUpdater.");
        }
    } catch (final ContainerNotAvailableException e) {
        if (logger.isDebugEnabled()) {
            logger.error("IndexUpdater is terminated.", e);
        } else if (logger.isInfoEnabled()) {
            logger.info("IndexUpdater is terminated.");
        }
        forceStop();
    } catch (final Throwable t) {
        if (ComponentUtil.available()) {
            logger.error("IndexUpdater is terminated.", t);
        } else if (logger.isDebugEnabled()) {
            logger.error("IndexUpdater is terminated.", t);
        } else if (logger.isInfoEnabled()) {
            logger.info("IndexUpdater is terminated.");
        }
        forceStop();
    } finally {
        intervalControlHelper.setCrawlerRunning(true);
    }
    if (logger.isInfoEnabled()) {
        logger.info("[EXEC TIME] index update time: " + executeTime + "ms");
    }
}
Also used : Constants(org.codelibs.fess.Constants) MemoryUtil(org.codelibs.fess.util.MemoryUtil) IndexingHelper(org.codelibs.fess.helper.IndexingHelper) FessSystemException(org.codelibs.fess.exception.FessSystemException) LoggerFactory(org.slf4j.LoggerFactory) DataService(org.codelibs.fess.crawler.service.DataService) EsDataService(org.codelibs.fess.crawler.service.impl.EsDataService) Transformer(org.codelibs.fess.crawler.transformer.Transformer) QueryBuilders(org.elasticsearch.index.query.QueryBuilders) ArrayList(java.util.ArrayList) PreDestroy(javax.annotation.PreDestroy) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig) EsAccessResult(org.codelibs.fess.crawler.entity.EsAccessResult) EsUrlQueue(org.codelibs.fess.crawler.entity.EsUrlQueue) Map(java.util.Map) AccessResultData(org.codelibs.fess.crawler.entity.AccessResultData) FavoriteLogBhv(org.codelibs.fess.es.log.exbhv.FavoriteLogBhv) IntervalControlHelper(org.codelibs.fess.helper.IntervalControlHelper) UrlFilterService(org.codelibs.fess.crawler.service.UrlFilterService) Crawler(org.codelibs.fess.crawler.Crawler) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) Logger(org.slf4j.Logger) FessEsClient(org.codelibs.fess.es.client.FessEsClient) ClickLogBhv(org.codelibs.fess.es.log.exbhv.ClickLogBhv) ContainerNotAvailableException(org.codelibs.fess.exception.ContainerNotAvailableException) Resource(javax.annotation.Resource) StringUtil(org.codelibs.core.lang.StringUtil) Consumer(java.util.function.Consumer) UrlQueueService(org.codelibs.fess.crawler.service.UrlQueueService) List(java.util.List) SearchLogHelper(org.codelibs.fess.helper.SearchLogHelper) ComponentUtil(org.codelibs.fess.util.ComponentUtil) SystemHelper(org.codelibs.fess.helper.SystemHelper) SearchRequestBuilder(org.elasticsearch.action.search.SearchRequestBuilder) SortOrder(org.elasticsearch.search.sort.SortOrder) AccessResult(org.codelibs.fess.crawler.entity.AccessResult) DocList(org.codelibs.fess.util.DocList) EsResultList(org.codelibs.fess.crawler.util.EsResultList) ContainerNotAvailableException(org.codelibs.fess.exception.ContainerNotAvailableException) SearchRequestBuilder(org.elasticsearch.action.search.SearchRequestBuilder) ArrayList(java.util.ArrayList) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig) FessSystemException(org.codelibs.fess.exception.FessSystemException) ContainerNotAvailableException(org.codelibs.fess.exception.ContainerNotAvailableException) FessSystemException(org.codelibs.fess.exception.FessSystemException) EsAccessResult(org.codelibs.fess.crawler.entity.EsAccessResult) DocList(org.codelibs.fess.util.DocList) IntervalControlHelper(org.codelibs.fess.helper.IntervalControlHelper) EsResultList(org.codelibs.fess.crawler.util.EsResultList)

Aggregations

ArrayList (java.util.ArrayList)1 List (java.util.List)1 Map (java.util.Map)1 Consumer (java.util.function.Consumer)1 PreDestroy (javax.annotation.PreDestroy)1 Resource (javax.annotation.Resource)1 StringUtil (org.codelibs.core.lang.StringUtil)1 Constants (org.codelibs.fess.Constants)1 Crawler (org.codelibs.fess.crawler.Crawler)1 AccessResult (org.codelibs.fess.crawler.entity.AccessResult)1 AccessResultData (org.codelibs.fess.crawler.entity.AccessResultData)1 EsAccessResult (org.codelibs.fess.crawler.entity.EsAccessResult)1 EsUrlQueue (org.codelibs.fess.crawler.entity.EsUrlQueue)1 DataService (org.codelibs.fess.crawler.service.DataService)1 UrlFilterService (org.codelibs.fess.crawler.service.UrlFilterService)1 UrlQueueService (org.codelibs.fess.crawler.service.UrlQueueService)1 EsDataService (org.codelibs.fess.crawler.service.impl.EsDataService)1 Transformer (org.codelibs.fess.crawler.transformer.Transformer)1 EsResultList (org.codelibs.fess.crawler.util.EsResultList)1 FessEsClient (org.codelibs.fess.es.client.FessEsClient)1