Search in sources :

Example 1 with CategorySemantics

use of io.openk9.search.api.query.parser.CategorySemantics in project openk9 by smclab.

the class Grammar method applyAnnotators.

private void applyAnnotators(Map<Tuple, List<Parse>> chart, String[] tokens, int i, int j, long tenantId, Set<String> context) {
    tokens = Arrays.stream(tokens, i, j).toArray(String[]::new);
    Tuple<Integer> chartKey = Tuple.of(i, j);
    for (Annotator annotator : annotators) {
        for (CategorySemantics categorySemantics : annotator.annotate(tenantId, context, tokens)) {
            String category = categorySemantics.getCategory();
            Map<String, Object> semantics = categorySemantics.getSemantics();
            Rule rule = new Rule(category, tokens, Semantic.of(chartKey, semantics));
            chart.computeIfAbsent(chartKey, (k) -> new ArrayList<>()).add(Parse.of(rule, chartKey, tokens));
        }
    }
}
Also used : IntStream(java.util.stream.IntStream) ReactorStopWatch(io.openk9.common.api.reactor.util.ReactorStopWatch) Arrays(java.util.Arrays) Logger(org.slf4j.Logger) Utils(io.openk9.search.query.internal.query.parser.util.Utils) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) Mono(reactor.core.publisher.Mono) HashMap(java.util.HashMap) Annotator(io.openk9.search.api.query.parser.Annotator) Function(java.util.function.Function) Collectors(java.util.stream.Collectors) CategorySemantics(io.openk9.search.api.query.parser.CategorySemantics) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Flux(reactor.core.publisher.Flux) List(java.util.List) Stream(java.util.stream.Stream) Itertools(io.openk9.search.query.internal.query.parser.util.Itertools) Map(java.util.Map) Schedulers(reactor.core.scheduler.Schedulers) Tuple(io.openk9.search.api.query.parser.Tuple) CategorySemantics(io.openk9.search.api.query.parser.CategorySemantics) Annotator(io.openk9.search.api.query.parser.Annotator) ArrayList(java.util.ArrayList)

Example 2 with CategorySemantics

use of io.openk9.search.api.query.parser.CategorySemantics in project openk9 by smclab.

the class BaseAggregatorAnnotator method annotate_.

@Override
public List<CategorySemantics> annotate_(long tenantId, String... tokens) {
    List<String> normalizedKeywords = tenantKeywordsMap.getOrDefault(tenantId, tenantKeywordsMap.get(-1L));
    if (normalizedKeywords == null) {
        return List.of();
    }
    RestHighLevelClient restHighLevelClient = restHighLevelClientProvider.get();
    String token;
    if (tokens.length == 1) {
        token = tokens[0];
    } else {
        token = String.join(" ", tokens);
    }
    BoolQueryBuilder builder = QueryBuilders.boolQuery();
    BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
    for (String keyword : normalizedKeywords) {
        boolQueryBuilder.should(query(keyword, token));
    }
    builder.must(boolQueryBuilder);
    SearchRequest searchRequest;
    if (tenantId == -1) {
        searchRequest = new SearchRequest("*-*-data");
    } else {
        searchRequest = new SearchRequest(tenantId + "-*-data");
    }
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(0);
    searchSourceBuilder.query(builder);
    for (String keyword : normalizedKeywords) {
        searchSourceBuilder.aggregation(AggregationBuilders.terms(keyword).field(keyword).size(10));
    }
    searchRequest.source(searchSourceBuilder);
    if (_log.isDebugEnabled()) {
        _log.debug(builder.toString());
    }
    List<Tuple> scoreKeys = new ArrayList<>();
    try {
        SearchResponse search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        for (Aggregation aggregation : search.getAggregations()) {
            Terms terms = (Terms) aggregation;
            for (Terms.Bucket bucket : terms.getBuckets()) {
                String keyAsString = bucket.getKeyAsString();
                if (token.equalsIgnoreCase(keyAsString)) {
                    return List.of(_createCategorySemantics(terms.getName(), keyAsString));
                }
                scoreKeys.add(Tuple.of((Supplier<Double>) () -> _levenshteinDistance(token, keyAsString), keyAsString, terms.getName()));
            }
        }
    } catch (IOException e) {
        _log.error(e.getMessage(), e);
    }
    if (scoreKeys.isEmpty()) {
        return List.of();
    }
    scoreKeys.sort(Collections.reverseOrder(Comparator.comparingDouble(t -> ((Supplier<Double>) t.get(0)).get())));
    String key = (String) scoreKeys.get(0).get(1);
    String name = (String) scoreKeys.get(0).get(2);
    return List.of(_createCategorySemantics(name, key));
}
Also used : SearchRequest(org.elasticsearch.action.search.SearchRequest) ArrayList(java.util.ArrayList) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) RestHighLevelClient(org.elasticsearch.client.RestHighLevelClient) IOException(java.io.IOException) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) SearchResponse(org.elasticsearch.action.search.SearchResponse) Aggregation(org.elasticsearch.search.aggregations.Aggregation) BoolQueryBuilder(org.elasticsearch.index.query.BoolQueryBuilder) Supplier(java.util.function.Supplier) Tuple(io.openk9.search.api.query.parser.Tuple)

Example 3 with CategorySemantics

use of io.openk9.search.api.query.parser.CategorySemantics in project openk9 by smclab.

the class BaseNerAnnotator method annotate_.

@Override
public List<CategorySemantics> annotate_(long tenantId, String... tokens) {
    _log.debug(Arrays.toString(tokens));
    if (_containsStopword(tokens)) {
        return List.of();
    }
    RestHighLevelClient restHighLevelClient = restHighLevelClientProvider.get();
    BoolQueryBuilder builder = QueryBuilders.boolQuery();
    builder.must(QueryBuilders.matchQuery("type.keyword", category));
    for (String token : tokens) {
        if (!stopWords.contains(token)) {
            builder.must(query("name", token));
        }
    }
    SearchRequest searchRequest;
    if (tenantId == -1) {
        searchRequest = new SearchRequest("*-entity");
    } else {
        searchRequest = new SearchRequest(tenantId + "-entity");
    }
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.size(_annotatorConfig.nerSize());
    searchSourceBuilder.query(builder);
    searchRequest.source(searchSourceBuilder);
    List<CategorySemantics> list = new ArrayList<>();
    if (_log.isDebugEnabled()) {
        _log.debug(builder.toString());
    }
    try {
        SearchResponse search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
        for (SearchHit hit : search.getHits()) {
            Map<String, Object> senamtics = hit.getSourceAsMap();
            list.add(CategorySemantics.of("$" + senamtics.get("type"), Map.of("tokenType", "ENTITY", "entityType", senamtics.get("type"), "entityName", senamtics.get("name"), "tenantId", senamtics.get("tenantId"), "value", senamtics.get("id"), "score", hit.getScore())));
        }
        if (_log.isDebugEnabled()) {
            _log.debug(list.toString());
        }
    } catch (IOException e) {
        _log.error(e.getMessage(), e);
    }
    return list;
}
Also used : SearchRequest(org.elasticsearch.action.search.SearchRequest) CategorySemantics(io.openk9.search.api.query.parser.CategorySemantics) SearchHit(org.elasticsearch.search.SearchHit) ArrayList(java.util.ArrayList) RestHighLevelClient(org.elasticsearch.client.RestHighLevelClient) IOException(java.io.IOException) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) SearchResponse(org.elasticsearch.action.search.SearchResponse) BoolQueryBuilder(org.elasticsearch.index.query.BoolQueryBuilder)

Aggregations

ArrayList (java.util.ArrayList)3 CategorySemantics (io.openk9.search.api.query.parser.CategorySemantics)2 Tuple (io.openk9.search.api.query.parser.Tuple)2 IOException (java.io.IOException)2 SearchRequest (org.elasticsearch.action.search.SearchRequest)2 SearchResponse (org.elasticsearch.action.search.SearchResponse)2 RestHighLevelClient (org.elasticsearch.client.RestHighLevelClient)2 BoolQueryBuilder (org.elasticsearch.index.query.BoolQueryBuilder)2 SearchSourceBuilder (org.elasticsearch.search.builder.SearchSourceBuilder)2 ReactorStopWatch (io.openk9.common.api.reactor.util.ReactorStopWatch)1 Annotator (io.openk9.search.api.query.parser.Annotator)1 Itertools (io.openk9.search.query.internal.query.parser.util.Itertools)1 Utils (io.openk9.search.query.internal.query.parser.util.Utils)1 Arrays (java.util.Arrays)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 Function (java.util.function.Function)1