Search in sources :

Example 6 with SearchRequest

use of io.openk9.search.api.query.SearchRequest in project openk9 by smclab.

the class BaseAggregatorAnnotator method annotate_.

@Override
public List<CategorySemantics> annotate_(long tenantId, String... tokens) {
    List<String> normalizedKeywords = tenantKeywordsMap.getOrDefault(tenantId, tenantKeywordsMap.get(-1L));
    if (normalizedKeywords == null) {
        return List.of();
    }
    RestHighLevelClient restHighLevelClient = restHighLevelClientProvider.get();
    String token;
    if (tokens.length == 1) {
        token = tokens[0];
    } else {
        token = String.join(" ", tokens);
    }
    BoolQueryBuilder builder = QueryBuilders.boolQuery();
    BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
    for (String keyword : normalizedKeywords) {
        boolQueryBuilder.should(query(keyword, token));
    }
    builder.must(boolQueryBuilder);
    SearchRequest searchRequest;
    if (tenantId == -1) {
        searchRequest = new SearchRequest("*-*-data");
    } else {
        searchRequest = new SearchRequest(tenantId + "-*-data");
    }
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    searchSourceBuilder.query(builder);
    for (String keyword : normalizedKeywords) {
        searchSourceBuilder.aggregation(AggregationBuilders.terms(keyword).field(keyword).size(10));
    }
    searchRequest.source(searchSourceBuilder);
    if (_log.isDebugEnabled()) {
        _log.debug(builder.toString());
    }
    List<Tuple> scoreKeys = new ArrayList<>();
    try {
        SearchResponse search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        for (Aggregation aggregation : search.getAggregations()) {
            Terms terms = (Terms) aggregation;
            for (Terms.Bucket bucket : terms.getBuckets()) {
                String keyAsString = bucket.getKeyAsString();
                if (token.equalsIgnoreCase(keyAsString)) {
                    return List.of(_createCategorySemantics(terms.getName(), keyAsString));
                }
                scoreKeys.add(Tuple.of((Supplier<Double>) () -> _levenshteinDistance(token, keyAsString), keyAsString, terms.getName()));
            }
        }
    } catch (IOException e) {
        _log.error(e.getMessage(), e);
    }
    if (scoreKeys.isEmpty()) {
        return List.of();
    }
    scoreKeys.sort(Collections.reverseOrder(Comparator.comparingDouble(t -> ((Supplier<Double>) t.get(0)).get())));
    String key = (String) scoreKeys.get(0).get(1);
    String name = (String) scoreKeys.get(0).get(2);
    return List.of(_createCategorySemantics(name, key));
}
Also used : SearchRequest(org.elasticsearch.action.search.SearchRequest) ArrayList(java.util.ArrayList) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) RestHighLevelClient(org.elasticsearch.client.RestHighLevelClient) IOException(java.io.IOException) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) SearchResponse(org.elasticsearch.action.search.SearchResponse) Aggregation(org.elasticsearch.search.aggregations.Aggregation) BoolQueryBuilder(org.elasticsearch.index.query.BoolQueryBuilder) Supplier(java.util.function.Supplier) Tuple(io.openk9.search.api.query.parser.Tuple)

Example 7 with SearchRequest

use of io.openk9.search.api.query.SearchRequest in project openk9 by smclab.

the class BaseNerAnnotator method annotate_.

@Override
public List<CategorySemantics> annotate_(long tenantId, String... tokens) {
    _log.debug(Arrays.toString(tokens));
    if (_containsStopword(tokens)) {
        return List.of();
    }
    RestHighLevelClient restHighLevelClient = restHighLevelClientProvider.get();
    BoolQueryBuilder builder = QueryBuilders.boolQuery();
    builder.must(QueryBuilders.matchQuery("type.keyword", category));
    for (String token : tokens) {
        if (!stopWords.contains(token)) {
            builder.must(query("name", token));
        }
    }
    SearchRequest searchRequest;
    if (tenantId == -1) {
        searchRequest = new SearchRequest("*-entity");
    } else {
        searchRequest = new SearchRequest(tenantId + "-entity");
    }
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(_annotatorConfig.nerSize());
    searchSourceBuilder.query(builder);
    searchRequest.source(searchSourceBuilder);
    List<CategorySemantics> list = new ArrayList<>();
    if (_log.isDebugEnabled()) {
        _log.debug(builder.toString());
    }
    try {
        SearchResponse search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        for (SearchHit hit : search.getHits()) {
            Map<String, Object> senamtics = hit.getSourceAsMap();
            list.add(CategorySemantics.of("$" + senamtics.get("type"), Map.of("tokenType", "ENTITY", "entityType", senamtics.get("type"), "entityName", senamtics.get("name"), "tenantId", senamtics.get("tenantId"), "value", senamtics.get("id"), "score", hit.getScore())));
        }
        if (_log.isDebugEnabled()) {
            _log.debug(list.toString());
        }
    } catch (IOException e) {
        _log.error(e.getMessage(), e);
    }
    return list;
}
Also used : SearchRequest(org.elasticsearch.action.search.SearchRequest) CategorySemantics(io.openk9.search.api.query.parser.CategorySemantics) SearchHit(org.elasticsearch.search.SearchHit) ArrayList(java.util.ArrayList) RestHighLevelClient(org.elasticsearch.client.RestHighLevelClient) IOException(java.io.IOException) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) SearchResponse(org.elasticsearch.action.search.SearchResponse) BoolQueryBuilder(org.elasticsearch.index.query.BoolQueryBuilder)

Example 8 with SearchRequest

use of io.openk9.search.api.query.SearchRequest in project openk9 by smclab.

the class IndexWriterEndpoins method _cleanOrphanEntitiesConsumer.

private void _cleanOrphanEntitiesConsumer(Message<Long> message) {
    Long tenantId = message.body();
    RestHighLevelClient client = _restHighLevelClientProvider.get();
    try {
        final Scroll scroll = new Scroll(TimeValue.timeValueSeconds(20));
        String entityIndexName = tenantId + "-entity";
        SearchRequest searchRequest = new SearchRequest(entityIndexName);
        searchRequest.scroll(scroll);
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        searchRequest.source(searchSourceBuilder);
        SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
        String scrollId = searchResponse.getScrollId();
        SearchHit[] searchHits = searchResponse.getHits().getHits();
        Collection<String> entitiesToDelete = new HashSet<>();
        Collection<String> entityNames = new ArrayList<>();
        while (searchHits != null && searchHits.length > 0) {
            for (SearchHit searchHit : searchHits) {
                Map<String, Object> source = searchHit.getSourceAsMap();
                Object id = source.get("id");
                String name = String.valueOf(source.get("name"));
                String nestEntityPath = "entities";
                String nestIdPath = nestEntityPath + ".id";
                CountRequest countRequest = new CountRequest(tenantId + "-*-data");
                countRequest.query(matchQuery(nestIdPath, id));
                CountResponse countResponse = client.count(countRequest, RequestOptions.DEFAULT);
                if (countResponse.getCount() == 0) {
                    entitiesToDelete.add(searchHit.getId());
                    entityNames.add(name);
                }
            }
            SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId);
            scrollRequest.scroll(scroll);
            searchResponse = client.scroll(scrollRequest, RequestOptions.DEFAULT);
            scrollId = searchResponse.getScrollId();
            searchHits = searchResponse.getHits().getHits();
        }
        ClearScrollRequest clearScrollRequest = new ClearScrollRequest();
        clearScrollRequest.addScrollId(scrollId);
        ClearScrollResponse clearScrollResponse = client.clearScroll(clearScrollRequest, RequestOptions.DEFAULT);
        boolean succeeded = clearScrollResponse.isSucceeded();
        if (!entitiesToDelete.isEmpty()) {
            BulkRequest bulkRequest = new BulkRequest();
            bulkRequest.add(entitiesToDelete.stream().map(id -> new DeleteRequest(entityIndexName, id)).collect(Collectors.toList())).setRefreshPolicy(WriteRequest.RefreshPolicy.WAIT_UNTIL);
            BulkResponse bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT);
        }
        message.reply("Entities deleted " + entityNames);
    } catch (Exception e) {
        message.reply(e.getMessage());
    }
}
Also used : LoggerFactory(org.slf4j.LoggerFactory) QueryBuilders(org.elasticsearch.index.query.QueryBuilders) JsonFactory(io.openk9.json.api.JsonFactory) DocumentEntityRequest(io.openk9.index.writer.entity.model.DocumentEntityRequest) DeleteRequest(org.elasticsearch.action.delete.DeleteRequest) ReactorNettyUtils(io.openk9.reactor.netty.util.ReactorNettyUtils) IndexRequest(org.elasticsearch.action.index.IndexRequest) EventBus(io.vertx.core.eventbus.EventBus) Map(java.util.Map) SearchResponse(org.elasticsearch.action.search.SearchResponse) RequestOptions(org.elasticsearch.client.RequestOptions) PluginDriverManagerClient(io.openk9.plugin.driver.manager.client.api.PluginDriverManagerClient) DeleteByQueryRequest(org.elasticsearch.index.reindex.DeleteByQueryRequest) CountRequest(org.elasticsearch.client.core.CountRequest) TimeValue(org.elasticsearch.core.TimeValue) SearchHit(org.elasticsearch.search.SearchHit) ReactorActionListener(io.openk9.search.client.api.ReactorActionListener) Collection(java.util.Collection) BulkResponse(org.elasticsearch.action.bulk.BulkResponse) Message(io.vertx.core.eventbus.Message) DatasourceClient(io.openk9.datasource.client.api.DatasourceClient) Scroll(org.elasticsearch.search.Scroll) Collectors(java.util.stream.Collectors) BundleContext(org.osgi.framework.BundleContext) HttpServerRequest(reactor.netty.http.server.HttpServerRequest) List(java.util.List) QueryBuilders.matchQuery(org.elasticsearch.index.query.QueryBuilders.matchQuery) Builder(lombok.Builder) BoolQueryBuilder(org.elasticsearch.index.query.BoolQueryBuilder) RestHighLevelClientProvider(io.openk9.search.client.api.RestHighLevelClientProvider) HttpServerRoutes(reactor.netty.http.server.HttpServerRoutes) HttpServerResponse(reactor.netty.http.server.HttpServerResponse) MatchQueryBuilder(org.elasticsearch.index.query.MatchQueryBuilder) XContentType(org.elasticsearch.common.xcontent.XContentType) ClearScrollRequest(org.elasticsearch.action.search.ClearScrollRequest) HashMap(java.util.HashMap) SearchRequest(org.elasticsearch.action.search.SearchRequest) HttpResponseWriter(io.openk9.http.util.HttpResponseWriter) Function(java.util.function.Function) ClearScrollResponse(org.elasticsearch.action.search.ClearScrollResponse) CountResponse(org.elasticsearch.client.core.CountResponse) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) WriteRequest(org.elasticsearch.action.support.WriteRequest) Component(org.osgi.service.component.annotations.Component) Operator(org.elasticsearch.index.query.Operator) Cancellable(org.elasticsearch.client.Cancellable) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) Activate(org.osgi.service.component.annotations.Activate) BulkByScrollResponse(org.elasticsearch.index.reindex.BulkByScrollResponse) RouterHandler(io.openk9.http.web.RouterHandler) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) Logger(org.slf4j.Logger) Datasource(io.openk9.model.Datasource) Publisher(org.reactivestreams.Publisher) Mono(reactor.core.publisher.Mono) RestHighLevelClient(org.elasticsearch.client.RestHighLevelClient) Data(lombok.Data) SearchScrollRequest(org.elasticsearch.action.search.SearchScrollRequest) AllArgsConstructor(lombok.AllArgsConstructor) Reference(org.osgi.service.component.annotations.Reference) BulkRequest(org.elasticsearch.action.bulk.BulkRequest) NoArgsConstructor(lombok.NoArgsConstructor) SearchRequest(org.elasticsearch.action.search.SearchRequest) SearchHit(org.elasticsearch.search.SearchHit) ArrayList(java.util.ArrayList) SearchScrollRequest(org.elasticsearch.action.search.SearchScrollRequest) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) CountRequest(org.elasticsearch.client.core.CountRequest) ClearScrollRequest(org.elasticsearch.action.search.ClearScrollRequest) ClearScrollResponse(org.elasticsearch.action.search.ClearScrollResponse) HashSet(java.util.HashSet) Scroll(org.elasticsearch.search.Scroll) CountResponse(org.elasticsearch.client.core.CountResponse) BulkResponse(org.elasticsearch.action.bulk.BulkResponse) RestHighLevelClient(org.elasticsearch.client.RestHighLevelClient) SearchResponse(org.elasticsearch.action.search.SearchResponse) BulkRequest(org.elasticsearch.action.bulk.BulkRequest) DeleteRequest(org.elasticsearch.action.delete.DeleteRequest)

Example 9 with SearchRequest

use of io.openk9.search.api.query.SearchRequest in project openk9 by smclab.

the class InsertIndexWriter method _createDocWriterRequest.

private Mono<DocWriteRequest> _createDocWriterRequest(String indexName, ObjectNode enrichProcessorContext) {
    return Mono.defer(() -> {
        ObjectNode objectNode = enrichProcessorContext.get("payload").toObjectNode();
        String contentId = objectNode.get("contentId").asText();
        return _search.search(factory -> {
            SearchRequest searchRequest = new SearchRequest(indexName);
            MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery("contentId", contentId);
            SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
            searchSourceBuilder.query(matchQueryBuilder);
            return searchRequest.source(searchSourceBuilder);
        }).onErrorReturn(SearchUtil.EMPTY_SEARCH_RESPONSE).filter(e -> e.getHits().getHits().length > 0).flatMapIterable(SearchResponse::getHits).next().map(e -> new UpdateRequest(indexName, e.getId()).doc(objectNode.toString(), XContentType.JSON)).cast(DocWriteRequest.class).switchIfEmpty(Mono.fromSupplier(() -> new IndexRequest(indexName).source(objectNode.toString(), XContentType.JSON)));
    });
}
Also used : MatchQueryBuilder(org.elasticsearch.index.query.MatchQueryBuilder) Logger(org.slf4j.Logger) Disposable(reactor.core.Disposable) SearchUtil(io.openk9.search.client.api.util.SearchUtil) XContentType(org.elasticsearch.common.xcontent.XContentType) IndexBus(io.openk9.search.client.api.IndexBus) Deactivate(org.osgi.service.component.annotations.Deactivate) LoggerFactory(org.slf4j.LoggerFactory) Search(io.openk9.search.client.api.Search) UpdateRequest(org.elasticsearch.action.update.UpdateRequest) Mono(reactor.core.publisher.Mono) SearchRequest(org.elasticsearch.action.search.SearchRequest) DocWriteRequest(org.elasticsearch.action.DocWriteRequest) QueryBuilders(org.elasticsearch.index.query.QueryBuilders) JsonFactory(io.openk9.json.api.JsonFactory) IndexRequest(org.elasticsearch.action.index.IndexRequest) Component(org.osgi.service.component.annotations.Component) ReceiverReactor(io.openk9.ingestion.api.ReceiverReactor) SearchResponse(org.elasticsearch.action.search.SearchResponse) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) ObjectNode(io.openk9.json.api.ObjectNode) Activate(org.osgi.service.component.annotations.Activate) Reference(org.osgi.service.component.annotations.Reference) Binding(io.openk9.ingestion.api.Binding) SearchRequest(org.elasticsearch.action.search.SearchRequest) ObjectNode(io.openk9.json.api.ObjectNode) UpdateRequest(org.elasticsearch.action.update.UpdateRequest) MatchQueryBuilder(org.elasticsearch.index.query.MatchQueryBuilder) DocWriteRequest(org.elasticsearch.action.DocWriteRequest) IndexRequest(org.elasticsearch.action.index.IndexRequest) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) SearchResponse(org.elasticsearch.action.search.SearchResponse)

Example 10 with SearchRequest

use of io.openk9.search.api.query.SearchRequest in project openk9 by smclab.

the class BaseSearchHTTPHandler method _toQuerySearchRequest.

private Mono<SearchResponse> _toQuerySearchRequest(Tenant tenant, List<Datasource> datasources, PluginDriverDTOList pdDTOList, SearchRequest searchRequest, HttpServerRequest httpRequest) {
    return Mono.defer(() -> {
        List<PluginDriverDTO> pluginDriverDTOList = pdDTOList.getPluginDriverDTOList();
        Map<String, List<SearchToken>> tokenTypeGroup = searchRequest.getSearchQuery().stream().collect(Collectors.groupingBy(SearchToken::getTokenType));
        List<SearchToken> datasource = tokenTypeGroup.get("DATASOURCE");
        Stream<PluginDriverDTO> documentTypeStream = pluginDriverDTOList.stream();
        if (datasource != null) {
            List<String> datasourceValues = datasource.stream().map(SearchToken::getValues).flatMap(Arrays::stream).distinct().collect(Collectors.toList());
            documentTypeStream = documentTypeStream.filter(entry -> datasourceValues.contains(entry.getName()));
        }
        List<PluginDriverDTO> documentTypeList = documentTypeStream.collect(Collectors.toList());
        QueryParser queryParser = _queryParsers.stream().reduce(QueryParser.NOTHING, QueryParser::andThen);
        return queryParser.apply(createQueryParserContext(tenant, datasources, httpRequest, tokenTypeGroup, documentTypeList)).flatMap(boolQueryBuilderConsumer -> _search.flatMapSearch(factory -> {
            long tenantId = tenant.getTenantId();
            if (documentTypeList.isEmpty()) {
                return Mono.just(SearchUtil.EMPTY_SEARCH_REQUEST);
            }
            BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
            boolQueryBuilderConsumer.accept(boolQuery);
            org.elasticsearch.action.search.SearchRequest elasticSearchQuery;
            if (datasource != null) {
                String[] indexNames = documentTypeList.stream().map(PluginDriverDTO::getName).distinct().toArray(String[]::new);
                elasticSearchQuery = factory.createSearchRequestData(tenantId, indexNames);
            } else {
                elasticSearchQuery = factory.createSearchRequestData(tenantId, "*");
            }
            SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
            searchSourceBuilder.fetchSource(includeFields(), excludeFields());
            searchSourceBuilder.query(boolQuery);
            searchSourceBuilder.trackTotalHits(true);
            return customizeSearchSourceBuilderMono(tenant, datasources, searchRequest, documentTypeList, searchSourceBuilder, elasticSearchQuery);
        }));
    });
}
Also used : HttpServerResponse(reactor.netty.http.server.HttpServerResponse) PluginDriverDTO(io.openk9.plugin.driver.manager.model.PluginDriverDTO) Arrays(java.util.Arrays) SearchUtil(io.openk9.search.client.api.util.SearchUtil) SearchHits(org.elasticsearch.search.SearchHits) PluginDriverDTOList(io.openk9.plugin.driver.manager.model.PluginDriverDTOList) LoggerFactory(org.slf4j.LoggerFactory) Tuples(reactor.util.function.Tuples) Search(io.openk9.search.client.api.Search) Tuple2(reactor.util.function.Tuple2) HashMap(java.util.HashMap) QueryParser(io.openk9.search.api.query.QueryParser) HttpResponseWriter(io.openk9.http.util.HttpResponseWriter) QueryBuilders(org.elasticsearch.index.query.QueryBuilders) SearchToken(io.openk9.search.api.query.SearchToken) JsonFactory(io.openk9.json.api.JsonFactory) ArrayList(java.util.ArrayList) HighlightBuilder(org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder) ReactorNettyUtils(io.openk9.reactor.netty.util.ReactorNettyUtils) Text(org.elasticsearch.common.text.Text) Map(java.util.Map) SearchResponse(org.elasticsearch.action.search.SearchResponse) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) HighlightField(org.elasticsearch.search.fetch.subphase.highlight.HighlightField) PluginDriverManagerClient(io.openk9.plugin.driver.manager.client.api.PluginDriverManagerClient) RouterHandler(io.openk9.http.web.RouterHandler) SearchHit(org.elasticsearch.search.SearchHit) Tenant(io.openk9.model.Tenant) Logger(org.slf4j.Logger) Datasource(io.openk9.model.Datasource) Publisher(org.reactivestreams.Publisher) Collection(java.util.Collection) DocumentTypeDTO(io.openk9.plugin.driver.manager.model.DocumentTypeDTO) DatasourceClient(io.openk9.datasource.client.api.DatasourceClient) Mono(reactor.core.publisher.Mono) SearchKeywordDTO(io.openk9.plugin.driver.manager.model.SearchKeywordDTO) Collectors(java.util.stream.Collectors) TotalHits(org.apache.lucene.search.TotalHits) SearchRequest(io.openk9.search.api.query.SearchRequest) HttpServerRequest(reactor.netty.http.server.HttpServerRequest) List(java.util.List) HttpHandler(io.openk9.http.web.HttpHandler) Stream(java.util.stream.Stream) SearchTokenizer(io.openk9.search.api.query.SearchTokenizer) BoolQueryBuilder(org.elasticsearch.index.query.BoolQueryBuilder) Response(io.openk9.search.query.internal.response.Response) SearchRequest(io.openk9.search.api.query.SearchRequest) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) QueryParser(io.openk9.search.api.query.QueryParser) BoolQueryBuilder(org.elasticsearch.index.query.BoolQueryBuilder) SearchToken(io.openk9.search.api.query.SearchToken) PluginDriverDTOList(io.openk9.plugin.driver.manager.model.PluginDriverDTOList) ArrayList(java.util.ArrayList) List(java.util.List) Arrays(java.util.Arrays) PluginDriverDTO(io.openk9.plugin.driver.manager.model.PluginDriverDTO)

Aggregations

SearchSourceBuilder (org.elasticsearch.search.builder.SearchSourceBuilder)11 SearchResponse (org.elasticsearch.action.search.SearchResponse)9 BoolQueryBuilder (org.elasticsearch.index.query.BoolQueryBuilder)9 SearchHit (org.elasticsearch.search.SearchHit)8 JsonFactory (io.openk9.json.api.JsonFactory)7 SearchRequest (org.elasticsearch.action.search.SearchRequest)7 QueryBuilders (org.elasticsearch.index.query.QueryBuilders)7 DatasourceClient (io.openk9.datasource.client.api.DatasourceClient)6 RouterHandler (io.openk9.http.web.RouterHandler)6 Datasource (io.openk9.model.Datasource)6 PluginDriverManagerClient (io.openk9.plugin.driver.manager.client.api.PluginDriverManagerClient)6 Search (io.openk9.search.client.api.Search)6 List (java.util.List)6 Map (java.util.Map)6 Logger (org.slf4j.Logger)6 LoggerFactory (org.slf4j.LoggerFactory)6 Mono (reactor.core.publisher.Mono)6 HttpResponseWriter (io.openk9.http.util.HttpResponseWriter)5 Tenant (io.openk9.model.Tenant)5 PluginDriverDTO (io.openk9.plugin.driver.manager.model.PluginDriverDTO)5