use of io.openk9.search.api.query.SearchRequest in project openk9 by smclab.
the class BaseAggregatorAnnotator method annotate_.
@Override
public List<CategorySemantics> annotate_(long tenantId, String... tokens) {
List<String> normalizedKeywords = tenantKeywordsMap.getOrDefault(tenantId, tenantKeywordsMap.get(-1L));
if (normalizedKeywords == null) {
return List.of();
}
RestHighLevelClient restHighLevelClient = restHighLevelClientProvider.get();
String token;
if (tokens.length == 1) {
token = tokens[0];
} else {
token = String.join(" ", tokens);
}
BoolQueryBuilder builder = QueryBuilders.boolQuery();
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
for (String keyword : normalizedKeywords) {
boolQueryBuilder.should(query(keyword, token));
}
builder.must(boolQueryBuilder);
SearchRequest searchRequest;
if (tenantId == -1) {
searchRequest = new SearchRequest("*-*-data");
} else {
searchRequest = new SearchRequest(tenantId + "-*-data");
}
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
searchSourceBuilder.query(builder);
for (String keyword : normalizedKeywords) {
searchSourceBuilder.aggregation(AggregationBuilders.terms(keyword).field(keyword).size(10));
}
searchRequest.source(searchSourceBuilder);
if (_log.isDebugEnabled()) {
_log.debug(builder.toString());
}
List<Tuple> scoreKeys = new ArrayList<>();
try {
SearchResponse search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
for (Aggregation aggregation : search.getAggregations()) {
Terms terms = (Terms) aggregation;
for (Terms.Bucket bucket : terms.getBuckets()) {
String keyAsString = bucket.getKeyAsString();
if (token.equalsIgnoreCase(keyAsString)) {
return List.of(_createCategorySemantics(terms.getName(), keyAsString));
}
scoreKeys.add(Tuple.of((Supplier<Double>) () -> _levenshteinDistance(token, keyAsString), keyAsString, terms.getName()));
}
}
} catch (IOException e) {
_log.error(e.getMessage(), e);
}
if (scoreKeys.isEmpty()) {
return List.of();
}
scoreKeys.sort(Collections.reverseOrder(Comparator.comparingDouble(t -> ((Supplier<Double>) t.get(0)).get())));
String key = (String) scoreKeys.get(0).get(1);
String name = (String) scoreKeys.get(0).get(2);
return List.of(_createCategorySemantics(name, key));
}
use of io.openk9.search.api.query.SearchRequest in project openk9 by smclab.
the class BaseNerAnnotator method annotate_.
@Override
public List<CategorySemantics> annotate_(long tenantId, String... tokens) {
_log.debug(Arrays.toString(tokens));
if (_containsStopword(tokens)) {
return List.of();
}
RestHighLevelClient restHighLevelClient = restHighLevelClientProvider.get();
BoolQueryBuilder builder = QueryBuilders.boolQuery();
builder.must(QueryBuilders.matchQuery("type.keyword", category));
for (String token : tokens) {
if (!stopWords.contains(token)) {
builder.must(query("name", token));
}
}
SearchRequest searchRequest;
if (tenantId == -1) {
searchRequest = new SearchRequest("*-entity");
} else {
searchRequest = new SearchRequest(tenantId + "-entity");
}
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(_annotatorConfig.nerSize());
searchSourceBuilder.query(builder);
searchRequest.source(searchSourceBuilder);
List<CategorySemantics> list = new ArrayList<>();
if (_log.isDebugEnabled()) {
_log.debug(builder.toString());
}
try {
SearchResponse search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
for (SearchHit hit : search.getHits()) {
Map<String, Object> senamtics = hit.getSourceAsMap();
list.add(CategorySemantics.of("$" + senamtics.get("type"), Map.of("tokenType", "ENTITY", "entityType", senamtics.get("type"), "entityName", senamtics.get("name"), "tenantId", senamtics.get("tenantId"), "value", senamtics.get("id"), "score", hit.getScore())));
}
if (_log.isDebugEnabled()) {
_log.debug(list.toString());
}
} catch (IOException e) {
_log.error(e.getMessage(), e);
}
return list;
}
use of io.openk9.search.api.query.SearchRequest in project openk9 by smclab.
the class IndexWriterEndpoins method _cleanOrphanEntitiesConsumer.
private void _cleanOrphanEntitiesConsumer(Message<Long> message) {
Long tenantId = message.body();
RestHighLevelClient client = _restHighLevelClientProvider.get();
try {
final Scroll scroll = new Scroll(TimeValue.timeValueSeconds(20));
String entityIndexName = tenantId + "-entity";
SearchRequest searchRequest = new SearchRequest(entityIndexName);
searchRequest.scroll(scroll);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
String scrollId = searchResponse.getScrollId();
SearchHit[] searchHits = searchResponse.getHits().getHits();
Collection<String> entitiesToDelete = new HashSet<>();
Collection<String> entityNames = new ArrayList<>();
while (searchHits != null && searchHits.length > 0) {
for (SearchHit searchHit : searchHits) {
Map<String, Object> source = searchHit.getSourceAsMap();
Object id = source.get("id");
String name = String.valueOf(source.get("name"));
String nestEntityPath = "entities";
String nestIdPath = nestEntityPath + ".id";
CountRequest countRequest = new CountRequest(tenantId + "-*-data");
countRequest.query(matchQuery(nestIdPath, id));
CountResponse countResponse = client.count(countRequest, RequestOptions.DEFAULT);
if (countResponse.getCount() == 0) {
entitiesToDelete.add(searchHit.getId());
entityNames.add(name);
}
}
SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId);
scrollRequest.scroll(scroll);
searchResponse = client.scroll(scrollRequest, RequestOptions.DEFAULT);
scrollId = searchResponse.getScrollId();
searchHits = searchResponse.getHits().getHits();
}
ClearScrollRequest clearScrollRequest = new ClearScrollRequest();
clearScrollRequest.addScrollId(scrollId);
ClearScrollResponse clearScrollResponse = client.clearScroll(clearScrollRequest, RequestOptions.DEFAULT);
boolean succeeded = clearScrollResponse.isSucceeded();
if (!entitiesToDelete.isEmpty()) {
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.add(entitiesToDelete.stream().map(id -> new DeleteRequest(entityIndexName, id)).collect(Collectors.toList())).setRefreshPolicy(WriteRequest.RefreshPolicy.WAIT_UNTIL);
BulkResponse bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT);
}
message.reply("Entities deleted " + entityNames);
} catch (Exception e) {
message.reply(e.getMessage());
}
}
use of io.openk9.search.api.query.SearchRequest in project openk9 by smclab.
the class InsertIndexWriter method _createDocWriterRequest.
private Mono<DocWriteRequest> _createDocWriterRequest(String indexName, ObjectNode enrichProcessorContext) {
return Mono.defer(() -> {
ObjectNode objectNode = enrichProcessorContext.get("payload").toObjectNode();
String contentId = objectNode.get("contentId").asText();
return _search.search(factory -> {
SearchRequest searchRequest = new SearchRequest(indexName);
MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery("contentId", contentId);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(matchQueryBuilder);
return searchRequest.source(searchSourceBuilder);
}).onErrorReturn(SearchUtil.EMPTY_SEARCH_RESPONSE).filter(e -> e.getHits().getHits().length > 0).flatMapIterable(SearchResponse::getHits).next().map(e -> new UpdateRequest(indexName, e.getId()).doc(objectNode.toString(), XContentType.JSON)).cast(DocWriteRequest.class).switchIfEmpty(Mono.fromSupplier(() -> new IndexRequest(indexName).source(objectNode.toString(), XContentType.JSON)));
});
}
use of io.openk9.search.api.query.SearchRequest in project openk9 by smclab.
the class BaseSearchHTTPHandler method _toQuerySearchRequest.
private Mono<SearchResponse> _toQuerySearchRequest(Tenant tenant, List<Datasource> datasources, PluginDriverDTOList pdDTOList, SearchRequest searchRequest, HttpServerRequest httpRequest) {
return Mono.defer(() -> {
List<PluginDriverDTO> pluginDriverDTOList = pdDTOList.getPluginDriverDTOList();
Map<String, List<SearchToken>> tokenTypeGroup = searchRequest.getSearchQuery().stream().collect(Collectors.groupingBy(SearchToken::getTokenType));
List<SearchToken> datasource = tokenTypeGroup.get("DATASOURCE");
Stream<PluginDriverDTO> documentTypeStream = pluginDriverDTOList.stream();
if (datasource != null) {
List<String> datasourceValues = datasource.stream().map(SearchToken::getValues).flatMap(Arrays::stream).distinct().collect(Collectors.toList());
documentTypeStream = documentTypeStream.filter(entry -> datasourceValues.contains(entry.getName()));
}
List<PluginDriverDTO> documentTypeList = documentTypeStream.collect(Collectors.toList());
QueryParser queryParser = _queryParsers.stream().reduce(QueryParser.NOTHING, QueryParser::andThen);
return queryParser.apply(createQueryParserContext(tenant, datasources, httpRequest, tokenTypeGroup, documentTypeList)).flatMap(boolQueryBuilderConsumer -> _search.flatMapSearch(factory -> {
long tenantId = tenant.getTenantId();
if (documentTypeList.isEmpty()) {
return Mono.just(SearchUtil.EMPTY_SEARCH_REQUEST);
}
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
boolQueryBuilderConsumer.accept(boolQuery);
org.elasticsearch.action.search.SearchRequest elasticSearchQuery;
if (datasource != null) {
String[] indexNames = documentTypeList.stream().map(PluginDriverDTO::getName).distinct().toArray(String[]::new);
elasticSearchQuery = factory.createSearchRequestData(tenantId, indexNames);
} else {
elasticSearchQuery = factory.createSearchRequestData(tenantId, "*");
}
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.fetchSource(includeFields(), excludeFields());
searchSourceBuilder.query(boolQuery);
searchSourceBuilder.trackTotalHits(true);
return customizeSearchSourceBuilderMono(tenant, datasources, searchRequest, documentTypeList, searchSourceBuilder, elasticSearchQuery);
}));
});
}
Aggregations