Search in sources :

Example 21 with QueryBuilder

use of org.opensearch.index.query.QueryBuilder in project fess by codelibs.

the class IndexingHelper method deleteOldDocuments.

private void deleteOldDocuments(final SearchEngineClient searchEngineClient, final DocList docList) {
    final FessConfig fessConfig = ComponentUtil.getFessConfig();
    final List<String> docIdList = new ArrayList<>();
    for (final Map<String, Object> inputDoc : docList) {
        final Object idValue = inputDoc.get(fessConfig.getIndexFieldId());
        if (idValue == null) {
            continue;
        }
        final Object configIdValue = inputDoc.get(fessConfig.getIndexFieldConfigId());
        if (configIdValue == null) {
            continue;
        }
        final QueryBuilder queryBuilder = QueryBuilders.boolQuery().must(QueryBuilders.termQuery(fessConfig.getIndexFieldUrl(), inputDoc.get(fessConfig.getIndexFieldUrl()))).filter(QueryBuilders.termQuery(fessConfig.getIndexFieldConfigId(), configIdValue));
        final List<Map<String, Object>> docs = getDocumentListByQuery(searchEngineClient, queryBuilder, new String[] { fessConfig.getIndexFieldId(), fessConfig.getIndexFieldDocId() });
        for (final Map<String, Object> doc : docs) {
            final Object oldIdValue = doc.get(fessConfig.getIndexFieldId());
            if (!idValue.equals(oldIdValue) && oldIdValue != null) {
                final Object oldDocIdValue = doc.get(fessConfig.getIndexFieldDocId());
                if (oldDocIdValue != null) {
                    docIdList.add(oldDocIdValue.toString());
                }
            }
        }
        if (logger.isDebugEnabled()) {
            logger.debug("{} => {}", queryBuilder, docs);
        }
    }
    if (!docIdList.isEmpty()) {
        searchEngineClient.deleteByQuery(fessConfig.getIndexDocumentUpdateIndex(), QueryBuilders.idsQuery().addIds(docIdList.stream().toArray(n -> new String[n])));
    }
}
Also used : ArrayList(java.util.ArrayList) QueryBuilder(org.opensearch.index.query.QueryBuilder) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig) Map(java.util.Map)

Example 22 with QueryBuilder

use of org.opensearch.index.query.QueryBuilder in project fess by codelibs.

the class KeyMatchHelper method getBoostedDocumentList.

public List<Map<String, Object>> getBoostedDocumentList(final KeyMatch keyMatch) {
    final SearchEngineClient searchEngineClient = ComponentUtil.getSearchEngineClient();
    String virtualHost = keyMatch.getVirtualHost();
    if (StringUtil.isBlank(virtualHost)) {
        virtualHost = StringUtil.EMPTY;
    }
    final List<Tuple3<String, QueryBuilder, ScoreFunctionBuilder<?>>> boostList = getQueryMap(virtualHost).get(toLowerCase(keyMatch.getTerm()));
    if (boostList == null) {
        return Collections.emptyList();
    }
    for (final Tuple3<String, QueryBuilder, ScoreFunctionBuilder<?>> pair : boostList) {
        if (!keyMatch.getId().equals(pair.getValue1())) {
            continue;
        }
        final FessConfig fessConfig = ComponentUtil.getFessConfig();
        return searchEngineClient.getDocumentList(fessConfig.getIndexDocumentSearchIndex(), searchRequestBuilder -> {
            searchRequestBuilder.setPreference(Constants.SEARCH_PREFERENCE_LOCAL).setQuery(pair.getValue2()).setSize(keyMatch.getMaxSize());
            return true;
        });
    }
    return Collections.emptyList();
}
Also used : ScoreFunctionBuilder(org.opensearch.index.query.functionscore.ScoreFunctionBuilder) Tuple3(org.codelibs.core.misc.Tuple3) SearchEngineClient(org.codelibs.fess.es.client.SearchEngineClient) QueryBuilder(org.opensearch.index.query.QueryBuilder) BoolQueryBuilder(org.opensearch.index.query.BoolQueryBuilder) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig)

Example 23 with QueryBuilder

use of org.opensearch.index.query.QueryBuilder in project fess by codelibs.

the class IndexUpdater method run.

@Override
public void run() {
    if (dataService == null) {
        throw new FessSystemException("DataService is null.");
    }
    if (logger.isDebugEnabled()) {
        logger.debug("Starting indexUpdater.");
    }
    executeTime = 0;
    documentSize = 0;
    final FessConfig fessConfig = ComponentUtil.getFessConfig();
    final long updateInterval = fessConfig.getIndexerWebfsUpdateIntervalAsInteger().longValue();
    final int maxEmptyListCount = fessConfig.getIndexerWebfsMaxEmptyListCountAsInteger();
    final IntervalControlHelper intervalControlHelper = ComponentUtil.getIntervalControlHelper();
    try {
        final Consumer<SearchRequestBuilder> cb = builder -> {
            final QueryBuilder queryBuilder = QueryBuilders.boolQuery().filter(QueryBuilders.termsQuery(EsAccessResult.SESSION_ID, sessionIdList)).filter(QueryBuilders.termQuery(EsAccessResult.STATUS, org.codelibs.fess.crawler.Constants.OK_STATUS));
            builder.setQuery(queryBuilder);
            builder.setFrom(0);
            final int maxDocumentCacheSize = fessConfig.getIndexerWebfsMaxDocumentCacheSizeAsInteger();
            builder.setSize(maxDocumentCacheSize <= 0 ? 1 : maxDocumentCacheSize);
            builder.addSort(EsAccessResult.CREATE_TIME, SortOrder.ASC);
        };
        final DocList docList = new DocList();
        final List<EsAccessResult> accessResultList = new ArrayList<>();
        long updateTime = System.currentTimeMillis();
        int errorCount = 0;
        int emptyListCount = 0;
        long cleanupTime = -1;
        while (!finishCrawling || !accessResultList.isEmpty()) {
            try {
                final int sessionIdListSize = finishedSessionIdList.size();
                intervalControlHelper.setCrawlerRunning(true);
                updateTime = System.currentTimeMillis() - updateTime;
                final long interval = updateInterval - updateTime;
                if (interval > 0) {
                    // sleep
                    // 10 sec (default)
                    ThreadUtil.sleep(interval);
                }
                systemHelper.calibrateCpuLoad();
                docList.clear();
                accessResultList.clear();
                intervalControlHelper.delayByRules();
                if (logger.isDebugEnabled()) {
                    logger.debug("Processing documents in IndexUpdater queue.");
                }
                updateTime = System.currentTimeMillis();
                List<EsAccessResult> arList = getAccessResultList(cb, cleanupTime);
                if (arList.isEmpty()) {
                    emptyListCount++;
                } else {
                    // reset
                    emptyListCount = 0;
                }
                long hitCount = ((EsResultList<EsAccessResult>) arList).getTotalHits();
                while (hitCount > 0) {
                    if (arList.isEmpty()) {
                        ThreadUtil.sleep(fessConfig.getIndexerWebfsCommitMarginTimeAsInteger().longValue());
                        cleanupTime = -1;
                    } else {
                        processAccessResults(docList, accessResultList, arList);
                        cleanupTime = cleanupAccessResults(accessResultList);
                    }
                    arList = getAccessResultList(cb, cleanupTime);
                    hitCount = ((EsResultList<EsAccessResult>) arList).getTotalHits();
                }
                if (!docList.isEmpty()) {
                    indexingHelper.sendDocuments(searchEngineClient, docList);
                }
                synchronized (finishedSessionIdList) {
                    if (sessionIdListSize != 0 && sessionIdListSize == finishedSessionIdList.size()) {
                        cleanupFinishedSessionData();
                    }
                }
                executeTime += System.currentTimeMillis() - updateTime;
                if (logger.isDebugEnabled()) {
                    logger.debug("Processed documents in IndexUpdater queue.");
                }
                // reset count
                errorCount = 0;
            } catch (final Exception e) {
                if (errorCount > maxErrorCount) {
                    throw e;
                }
                errorCount++;
                logger.warn("Failed to access data. Retry to access it {} times.", errorCount, e);
            } finally {
                if (systemHelper.isForceStop()) {
                    finishCrawling = true;
                    if (logger.isDebugEnabled()) {
                        logger.debug("Stopped indexUpdater.");
                    }
                }
            }
            if (emptyListCount >= maxEmptyListCount) {
                if (logger.isInfoEnabled()) {
                    logger.info("Terminating indexUpdater. emptyListCount is over {}.", maxEmptyListCount);
                }
                // terminate crawling
                finishCrawling = true;
                forceStop();
                if (fessConfig.getIndexerThreadDumpEnabledAsBoolean()) {
                    ThreadDumpUtil.printThreadDump();
                }
                org.codelibs.fess.exec.Crawler.addError("QueueTimeout");
            }
            if (!ComponentUtil.available()) {
                logger.info("IndexUpdater is terminated.");
                forceStop();
                break;
            }
        }
        if (logger.isDebugEnabled()) {
            logger.debug("Finished indexUpdater.");
        }
    } catch (final ContainerNotAvailableException e) {
        if (logger.isDebugEnabled()) {
            logger.error("IndexUpdater is terminated.", e);
        } else if (logger.isInfoEnabled()) {
            logger.info("IndexUpdater is terminated.");
        }
        forceStop();
    } catch (final Throwable t) {
        if (ComponentUtil.available()) {
            logger.error("IndexUpdater is terminated.", t);
        } else if (logger.isDebugEnabled()) {
            logger.error("IndexUpdater is terminated.", t);
            org.codelibs.fess.exec.Crawler.addError(t.getClass().getSimpleName());
        } else if (logger.isInfoEnabled()) {
            logger.info("IndexUpdater is terminated.");
            org.codelibs.fess.exec.Crawler.addError(t.getClass().getSimpleName());
        }
        forceStop();
    } finally {
        intervalControlHelper.setCrawlerRunning(true);
    }
    if (logger.isInfoEnabled()) {
        logger.info("[EXEC TIME] index update time: {}ms", executeTime);
    }
}
Also used : ThreadUtil(org.codelibs.core.lang.ThreadUtil) Constants(org.codelibs.fess.Constants) MemoryUtil(org.codelibs.fess.util.MemoryUtil) IndexingHelper(org.codelibs.fess.helper.IndexingHelper) FessSystemException(org.codelibs.fess.exception.FessSystemException) DataService(org.codelibs.fess.crawler.service.DataService) EsDataService(org.codelibs.fess.crawler.service.impl.EsDataService) SearchEngineClient(org.codelibs.fess.es.client.SearchEngineClient) Transformer(org.codelibs.fess.crawler.transformer.Transformer) ArrayList(java.util.ArrayList) PreDestroy(javax.annotation.PreDestroy) IngestFactory(org.codelibs.fess.ingest.IngestFactory) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig) SortOrder(org.opensearch.search.sort.SortOrder) EsAccessResult(org.codelibs.fess.crawler.entity.EsAccessResult) EsUrlQueue(org.codelibs.fess.crawler.entity.EsUrlQueue) Map(java.util.Map) AccessResultData(org.codelibs.fess.crawler.entity.AccessResultData) FavoriteLogBhv(org.codelibs.fess.es.log.exbhv.FavoriteLogBhv) IntervalControlHelper(org.codelibs.fess.helper.IntervalControlHelper) SearchRequestBuilder(org.opensearch.action.search.SearchRequestBuilder) UrlFilterService(org.codelibs.fess.crawler.service.UrlFilterService) Crawler(org.codelibs.fess.crawler.Crawler) QueryBuilders(org.opensearch.index.query.QueryBuilders) ClickLogBhv(org.codelibs.fess.es.log.exbhv.ClickLogBhv) ContainerNotAvailableException(org.codelibs.fess.exception.ContainerNotAvailableException) Resource(javax.annotation.Resource) StringUtil(org.codelibs.core.lang.StringUtil) Consumer(java.util.function.Consumer) UrlQueueService(org.codelibs.fess.crawler.service.UrlQueueService) List(java.util.List) Logger(org.apache.logging.log4j.Logger) QueryBuilder(org.opensearch.index.query.QueryBuilder) SearchLogHelper(org.codelibs.fess.helper.SearchLogHelper) ComponentUtil(org.codelibs.fess.util.ComponentUtil) SystemHelper(org.codelibs.fess.helper.SystemHelper) ThreadDumpUtil(org.codelibs.fess.util.ThreadDumpUtil) PostConstruct(javax.annotation.PostConstruct) AccessResult(org.codelibs.fess.crawler.entity.AccessResult) DocList(org.codelibs.fess.util.DocList) LogManager(org.apache.logging.log4j.LogManager) Ingester(org.codelibs.fess.ingest.Ingester) EsResultList(org.codelibs.fess.crawler.util.EsResultList) ContainerNotAvailableException(org.codelibs.fess.exception.ContainerNotAvailableException) SearchRequestBuilder(org.opensearch.action.search.SearchRequestBuilder) ArrayList(java.util.ArrayList) QueryBuilder(org.opensearch.index.query.QueryBuilder) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig) FessSystemException(org.codelibs.fess.exception.FessSystemException) ContainerNotAvailableException(org.codelibs.fess.exception.ContainerNotAvailableException) FessSystemException(org.codelibs.fess.exception.FessSystemException) EsAccessResult(org.codelibs.fess.crawler.entity.EsAccessResult) DocList(org.codelibs.fess.util.DocList) IntervalControlHelper(org.codelibs.fess.helper.IntervalControlHelper) EsResultList(org.codelibs.fess.crawler.util.EsResultList)

Example 24 with QueryBuilder

use of org.opensearch.index.query.QueryBuilder in project fess by codelibs.

the class KeyMatchHelper method load.

@Override
public int load() {
    final FessConfig fessConfig = ComponentUtil.getFessConfig();
    final Map<String, Map<String, List<Tuple3<String, QueryBuilder, ScoreFunctionBuilder<?>>>>> keyMatchQueryMap = new HashMap<>();
    getAvailableKeyMatchList().stream().forEach(keyMatch -> {
        try {
            final BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
            if (logger.isDebugEnabled()) {
                logger.debug("Loading KeyMatch Query: {}, Size: {}", keyMatch.getQuery(), keyMatch.getMaxSize());
            }
            getDocumentList(keyMatch).stream().map(doc -> {
                if (logger.isDebugEnabled()) {
                    logger.debug("Loaded KeyMatch doc: {}", doc);
                }
                return DocumentUtil.getValue(doc, fessConfig.getIndexFieldDocId(), String.class);
            }).forEach(docId -> {
                boolQuery.should(QueryBuilders.termQuery(fessConfig.getIndexFieldDocId(), docId));
            });
            if (boolQuery.hasClauses()) {
                if (logger.isDebugEnabled()) {
                    logger.debug("Loaded KeyMatch Boost Query: {}", boolQuery);
                }
                String virtualHost = keyMatch.getVirtualHost();
                if (StringUtil.isBlank(virtualHost)) {
                    virtualHost = StringUtil.EMPTY;
                }
                Map<String, List<Tuple3<String, QueryBuilder, ScoreFunctionBuilder<?>>>> queryMap = keyMatchQueryMap.get(virtualHost);
                if (queryMap == null) {
                    queryMap = new HashMap<>();
                    keyMatchQueryMap.put(virtualHost, queryMap);
                }
                final String termKey = toLowerCase(keyMatch.getTerm());
                List<Tuple3<String, QueryBuilder, ScoreFunctionBuilder<?>>> boostList = queryMap.get(termKey);
                if (boostList == null) {
                    boostList = new ArrayList<>();
                    queryMap.put(termKey, boostList);
                }
                boostList.add(new Tuple3<>(keyMatch.getId(), boolQuery, ScoreFunctionBuilders.weightFactorFunction(keyMatch.getBoost())));
            } else if (logger.isDebugEnabled()) {
                logger.debug("No KeyMatch boost docs");
            }
            waitForNext();
        } catch (final Exception e) {
            logger.warn("Cannot load {}", keyMatch, e);
        }
    });
    this.keyMatchQueryMap = keyMatchQueryMap;
    return keyMatchQueryMap.size();
}
Also used : Constants(org.codelibs.fess.Constants) DocumentUtil(org.codelibs.fess.util.DocumentUtil) ScoreFunctionBuilder(org.opensearch.index.query.functionscore.ScoreFunctionBuilder) HashMap(java.util.HashMap) SearchEngineClient(org.codelibs.fess.es.client.SearchEngineClient) SearchConditionBuilder(org.codelibs.fess.es.client.SearchEngineClient.SearchConditionBuilder) ArrayList(java.util.ArrayList) SearchRequestType(org.codelibs.fess.entity.SearchRequestParams.SearchRequestType) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig) KeyMatchBhv(org.codelibs.fess.es.config.exbhv.KeyMatchBhv) Locale(java.util.Locale) Map(java.util.Map) KeyMatch(org.codelibs.fess.es.config.exentity.KeyMatch) QueryBuilders(org.opensearch.index.query.QueryBuilders) StringUtil(org.codelibs.core.lang.StringUtil) ScoreFunctionBuilders(org.opensearch.index.query.functionscore.ScoreFunctionBuilders) Tuple3(org.codelibs.core.misc.Tuple3) List(java.util.List) Logger(org.apache.logging.log4j.Logger) QueryBuilder(org.opensearch.index.query.QueryBuilder) ComponentUtil(org.codelibs.fess.util.ComponentUtil) PostConstruct(javax.annotation.PostConstruct) FilterFunctionBuilder(org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder.FilterFunctionBuilder) Collections(java.util.Collections) LogManager(org.apache.logging.log4j.LogManager) BoolQueryBuilder(org.opensearch.index.query.BoolQueryBuilder) ScoreFunctionBuilder(org.opensearch.index.query.functionscore.ScoreFunctionBuilder) HashMap(java.util.HashMap) QueryBuilder(org.opensearch.index.query.QueryBuilder) BoolQueryBuilder(org.opensearch.index.query.BoolQueryBuilder) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig) BoolQueryBuilder(org.opensearch.index.query.BoolQueryBuilder) Tuple3(org.codelibs.core.misc.Tuple3) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Example 25 with QueryBuilder

use of org.opensearch.index.query.QueryBuilder in project fess by codelibs.

the class IndexingHelper method deleteByConfigId.

public long deleteByConfigId(final String configId) {
    final FessConfig fessConfig = ComponentUtil.getFessConfig();
    final String index = fessConfig.getIndexDocumentUpdateIndex();
    final QueryBuilder queryBuilder = QueryBuilders.termQuery(fessConfig.getIndexFieldConfigId(), configId);
    return deleteByQueryBuilder(index, queryBuilder);
}
Also used : QueryBuilder(org.opensearch.index.query.QueryBuilder) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig)

Aggregations

QueryBuilder (org.opensearch.index.query.QueryBuilder)47 FessConfig (org.codelibs.fess.mylasta.direction.FessConfig)11 SearchEngineClient (org.codelibs.fess.es.client.SearchEngineClient)6 BoolQueryBuilder (org.opensearch.index.query.BoolQueryBuilder)6 Map (java.util.Map)5 LogManager (org.apache.logging.log4j.LogManager)4 Logger (org.apache.logging.log4j.Logger)4 StringUtil (org.codelibs.core.lang.StringUtil)4 Constants (org.codelibs.fess.Constants)4 MatchPhraseQueryBuilder (org.opensearch.index.query.MatchPhraseQueryBuilder)4 PrefixQueryBuilder (org.opensearch.index.query.PrefixQueryBuilder)4 ArrayList (java.util.ArrayList)3 List (java.util.List)3 Resource (javax.annotation.Resource)3 ComponentUtil (org.codelibs.fess.util.ComponentUtil)3 QueryBuilders (org.opensearch.index.query.QueryBuilders)3 HashMap (java.util.HashMap)2 Consumer (java.util.function.Consumer)2 PostConstruct (javax.annotation.PostConstruct)2 Tuple3 (org.codelibs.core.misc.Tuple3)2