use of org.opensearch.index.query.QueryBuilder in project fess by codelibs.
the class IndexingHelper method deleteOldDocuments.
private void deleteOldDocuments(final SearchEngineClient searchEngineClient, final DocList docList) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final List<String> docIdList = new ArrayList<>();
for (final Map<String, Object> inputDoc : docList) {
final Object idValue = inputDoc.get(fessConfig.getIndexFieldId());
if (idValue == null) {
continue;
}
final Object configIdValue = inputDoc.get(fessConfig.getIndexFieldConfigId());
if (configIdValue == null) {
continue;
}
final QueryBuilder queryBuilder = QueryBuilders.boolQuery().must(QueryBuilders.termQuery(fessConfig.getIndexFieldUrl(), inputDoc.get(fessConfig.getIndexFieldUrl()))).filter(QueryBuilders.termQuery(fessConfig.getIndexFieldConfigId(), configIdValue));
final List<Map<String, Object>> docs = getDocumentListByQuery(searchEngineClient, queryBuilder, new String[] { fessConfig.getIndexFieldId(), fessConfig.getIndexFieldDocId() });
for (final Map<String, Object> doc : docs) {
final Object oldIdValue = doc.get(fessConfig.getIndexFieldId());
if (!idValue.equals(oldIdValue) && oldIdValue != null) {
final Object oldDocIdValue = doc.get(fessConfig.getIndexFieldDocId());
if (oldDocIdValue != null) {
docIdList.add(oldDocIdValue.toString());
}
}
}
if (logger.isDebugEnabled()) {
logger.debug("{} => {}", queryBuilder, docs);
}
}
if (!docIdList.isEmpty()) {
searchEngineClient.deleteByQuery(fessConfig.getIndexDocumentUpdateIndex(), QueryBuilders.idsQuery().addIds(docIdList.stream().toArray(n -> new String[n])));
}
}
use of org.opensearch.index.query.QueryBuilder in project fess by codelibs.
the class KeyMatchHelper method getBoostedDocumentList.
public List<Map<String, Object>> getBoostedDocumentList(final KeyMatch keyMatch) {
final SearchEngineClient searchEngineClient = ComponentUtil.getSearchEngineClient();
String virtualHost = keyMatch.getVirtualHost();
if (StringUtil.isBlank(virtualHost)) {
virtualHost = StringUtil.EMPTY;
}
final List<Tuple3<String, QueryBuilder, ScoreFunctionBuilder<?>>> boostList = getQueryMap(virtualHost).get(toLowerCase(keyMatch.getTerm()));
if (boostList == null) {
return Collections.emptyList();
}
for (final Tuple3<String, QueryBuilder, ScoreFunctionBuilder<?>> pair : boostList) {
if (!keyMatch.getId().equals(pair.getValue1())) {
continue;
}
final FessConfig fessConfig = ComponentUtil.getFessConfig();
return searchEngineClient.getDocumentList(fessConfig.getIndexDocumentSearchIndex(), searchRequestBuilder -> {
searchRequestBuilder.setPreference(Constants.SEARCH_PREFERENCE_LOCAL).setQuery(pair.getValue2()).setSize(keyMatch.getMaxSize());
return true;
});
}
return Collections.emptyList();
}
use of org.opensearch.index.query.QueryBuilder in project fess by codelibs.
the class IndexUpdater method run.
@Override
public void run() {
if (dataService == null) {
throw new FessSystemException("DataService is null.");
}
if (logger.isDebugEnabled()) {
logger.debug("Starting indexUpdater.");
}
executeTime = 0;
documentSize = 0;
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final long updateInterval = fessConfig.getIndexerWebfsUpdateIntervalAsInteger().longValue();
final int maxEmptyListCount = fessConfig.getIndexerWebfsMaxEmptyListCountAsInteger();
final IntervalControlHelper intervalControlHelper = ComponentUtil.getIntervalControlHelper();
try {
final Consumer<SearchRequestBuilder> cb = builder -> {
final QueryBuilder queryBuilder = QueryBuilders.boolQuery().filter(QueryBuilders.termsQuery(EsAccessResult.SESSION_ID, sessionIdList)).filter(QueryBuilders.termQuery(EsAccessResult.STATUS, org.codelibs.fess.crawler.Constants.OK_STATUS));
builder.setQuery(queryBuilder);
builder.setFrom(0);
final int maxDocumentCacheSize = fessConfig.getIndexerWebfsMaxDocumentCacheSizeAsInteger();
builder.setSize(maxDocumentCacheSize <= 0 ? 1 : maxDocumentCacheSize);
builder.addSort(EsAccessResult.CREATE_TIME, SortOrder.ASC);
};
final DocList docList = new DocList();
final List<EsAccessResult> accessResultList = new ArrayList<>();
long updateTime = System.currentTimeMillis();
int errorCount = 0;
int emptyListCount = 0;
long cleanupTime = -1;
while (!finishCrawling || !accessResultList.isEmpty()) {
try {
final int sessionIdListSize = finishedSessionIdList.size();
intervalControlHelper.setCrawlerRunning(true);
updateTime = System.currentTimeMillis() - updateTime;
final long interval = updateInterval - updateTime;
if (interval > 0) {
// sleep
// 10 sec (default)
ThreadUtil.sleep(interval);
}
systemHelper.calibrateCpuLoad();
docList.clear();
accessResultList.clear();
intervalControlHelper.delayByRules();
if (logger.isDebugEnabled()) {
logger.debug("Processing documents in IndexUpdater queue.");
}
updateTime = System.currentTimeMillis();
List<EsAccessResult> arList = getAccessResultList(cb, cleanupTime);
if (arList.isEmpty()) {
emptyListCount++;
} else {
// reset
emptyListCount = 0;
}
long hitCount = ((EsResultList<EsAccessResult>) arList).getTotalHits();
while (hitCount > 0) {
if (arList.isEmpty()) {
ThreadUtil.sleep(fessConfig.getIndexerWebfsCommitMarginTimeAsInteger().longValue());
cleanupTime = -1;
} else {
processAccessResults(docList, accessResultList, arList);
cleanupTime = cleanupAccessResults(accessResultList);
}
arList = getAccessResultList(cb, cleanupTime);
hitCount = ((EsResultList<EsAccessResult>) arList).getTotalHits();
}
if (!docList.isEmpty()) {
indexingHelper.sendDocuments(searchEngineClient, docList);
}
synchronized (finishedSessionIdList) {
if (sessionIdListSize != 0 && sessionIdListSize == finishedSessionIdList.size()) {
cleanupFinishedSessionData();
}
}
executeTime += System.currentTimeMillis() - updateTime;
if (logger.isDebugEnabled()) {
logger.debug("Processed documents in IndexUpdater queue.");
}
// reset count
errorCount = 0;
} catch (final Exception e) {
if (errorCount > maxErrorCount) {
throw e;
}
errorCount++;
logger.warn("Failed to access data. Retry to access it {} times.", errorCount, e);
} finally {
if (systemHelper.isForceStop()) {
finishCrawling = true;
if (logger.isDebugEnabled()) {
logger.debug("Stopped indexUpdater.");
}
}
}
if (emptyListCount >= maxEmptyListCount) {
if (logger.isInfoEnabled()) {
logger.info("Terminating indexUpdater. emptyListCount is over {}.", maxEmptyListCount);
}
// terminate crawling
finishCrawling = true;
forceStop();
if (fessConfig.getIndexerThreadDumpEnabledAsBoolean()) {
ThreadDumpUtil.printThreadDump();
}
org.codelibs.fess.exec.Crawler.addError("QueueTimeout");
}
if (!ComponentUtil.available()) {
logger.info("IndexUpdater is terminated.");
forceStop();
break;
}
}
if (logger.isDebugEnabled()) {
logger.debug("Finished indexUpdater.");
}
} catch (final ContainerNotAvailableException e) {
if (logger.isDebugEnabled()) {
logger.error("IndexUpdater is terminated.", e);
} else if (logger.isInfoEnabled()) {
logger.info("IndexUpdater is terminated.");
}
forceStop();
} catch (final Throwable t) {
if (ComponentUtil.available()) {
logger.error("IndexUpdater is terminated.", t);
} else if (logger.isDebugEnabled()) {
logger.error("IndexUpdater is terminated.", t);
org.codelibs.fess.exec.Crawler.addError(t.getClass().getSimpleName());
} else if (logger.isInfoEnabled()) {
logger.info("IndexUpdater is terminated.");
org.codelibs.fess.exec.Crawler.addError(t.getClass().getSimpleName());
}
forceStop();
} finally {
intervalControlHelper.setCrawlerRunning(true);
}
if (logger.isInfoEnabled()) {
logger.info("[EXEC TIME] index update time: {}ms", executeTime);
}
}
use of org.opensearch.index.query.QueryBuilder in project fess by codelibs.
the class KeyMatchHelper method load.
@Override
public int load() {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final Map<String, Map<String, List<Tuple3<String, QueryBuilder, ScoreFunctionBuilder<?>>>>> keyMatchQueryMap = new HashMap<>();
getAvailableKeyMatchList().stream().forEach(keyMatch -> {
try {
final BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
if (logger.isDebugEnabled()) {
logger.debug("Loading KeyMatch Query: {}, Size: {}", keyMatch.getQuery(), keyMatch.getMaxSize());
}
getDocumentList(keyMatch).stream().map(doc -> {
if (logger.isDebugEnabled()) {
logger.debug("Loaded KeyMatch doc: {}", doc);
}
return DocumentUtil.getValue(doc, fessConfig.getIndexFieldDocId(), String.class);
}).forEach(docId -> {
boolQuery.should(QueryBuilders.termQuery(fessConfig.getIndexFieldDocId(), docId));
});
if (boolQuery.hasClauses()) {
if (logger.isDebugEnabled()) {
logger.debug("Loaded KeyMatch Boost Query: {}", boolQuery);
}
String virtualHost = keyMatch.getVirtualHost();
if (StringUtil.isBlank(virtualHost)) {
virtualHost = StringUtil.EMPTY;
}
Map<String, List<Tuple3<String, QueryBuilder, ScoreFunctionBuilder<?>>>> queryMap = keyMatchQueryMap.get(virtualHost);
if (queryMap == null) {
queryMap = new HashMap<>();
keyMatchQueryMap.put(virtualHost, queryMap);
}
final String termKey = toLowerCase(keyMatch.getTerm());
List<Tuple3<String, QueryBuilder, ScoreFunctionBuilder<?>>> boostList = queryMap.get(termKey);
if (boostList == null) {
boostList = new ArrayList<>();
queryMap.put(termKey, boostList);
}
boostList.add(new Tuple3<>(keyMatch.getId(), boolQuery, ScoreFunctionBuilders.weightFactorFunction(keyMatch.getBoost())));
} else if (logger.isDebugEnabled()) {
logger.debug("No KeyMatch boost docs");
}
waitForNext();
} catch (final Exception e) {
logger.warn("Cannot load {}", keyMatch, e);
}
});
this.keyMatchQueryMap = keyMatchQueryMap;
return keyMatchQueryMap.size();
}
use of org.opensearch.index.query.QueryBuilder in project fess by codelibs.
the class IndexingHelper method deleteByConfigId.
public long deleteByConfigId(final String configId) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final String index = fessConfig.getIndexDocumentUpdateIndex();
final QueryBuilder queryBuilder = QueryBuilders.termQuery(fessConfig.getIndexFieldConfigId(), configId);
return deleteByQueryBuilder(index, queryBuilder);
}
Aggregations