use of io.openk9.search.api.query.parser.CategorySemantics in project openk9 by smclab.
the class Grammar method applyAnnotators.
private void applyAnnotators(Map<Tuple, List<Parse>> chart, String[] tokens, int i, int j, long tenantId, Set<String> context) {
tokens = Arrays.stream(tokens, i, j).toArray(String[]::new);
Tuple<Integer> chartKey = Tuple.of(i, j);
for (Annotator annotator : annotators) {
for (CategorySemantics categorySemantics : annotator.annotate(tenantId, context, tokens)) {
String category = categorySemantics.getCategory();
Map<String, Object> semantics = categorySemantics.getSemantics();
Rule rule = new Rule(category, tokens, Semantic.of(chartKey, semantics));
chart.computeIfAbsent(chartKey, (k) -> new ArrayList<>()).add(Parse.of(rule, chartKey, tokens));
}
}
}
use of io.openk9.search.api.query.parser.CategorySemantics in project openk9 by smclab.
the class BaseAggregatorAnnotator method annotate_.
@Override
public List<CategorySemantics> annotate_(long tenantId, String... tokens) {
List<String> normalizedKeywords = tenantKeywordsMap.getOrDefault(tenantId, tenantKeywordsMap.get(-1L));
if (normalizedKeywords == null) {
return List.of();
}
RestHighLevelClient restHighLevelClient = restHighLevelClientProvider.get();
String token;
if (tokens.length == 1) {
token = tokens[0];
} else {
token = String.join(" ", tokens);
}
BoolQueryBuilder builder = QueryBuilders.boolQuery();
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
for (String keyword : normalizedKeywords) {
boolQueryBuilder.should(query(keyword, token));
}
builder.must(boolQueryBuilder);
SearchRequest searchRequest;
if (tenantId == -1) {
searchRequest = new SearchRequest("*-*-data");
} else {
searchRequest = new SearchRequest(tenantId + "-*-data");
}
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
searchSourceBuilder.query(builder);
for (String keyword : normalizedKeywords) {
searchSourceBuilder.aggregation(AggregationBuilders.terms(keyword).field(keyword).size(10));
}
searchRequest.source(searchSourceBuilder);
if (_log.isDebugEnabled()) {
_log.debug(builder.toString());
}
List<Tuple> scoreKeys = new ArrayList<>();
try {
SearchResponse search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
for (Aggregation aggregation : search.getAggregations()) {
Terms terms = (Terms) aggregation;
for (Terms.Bucket bucket : terms.getBuckets()) {
String keyAsString = bucket.getKeyAsString();
if (token.equalsIgnoreCase(keyAsString)) {
return List.of(_createCategorySemantics(terms.getName(), keyAsString));
}
scoreKeys.add(Tuple.of((Supplier<Double>) () -> _levenshteinDistance(token, keyAsString), keyAsString, terms.getName()));
}
}
} catch (IOException e) {
_log.error(e.getMessage(), e);
}
if (scoreKeys.isEmpty()) {
return List.of();
}
scoreKeys.sort(Collections.reverseOrder(Comparator.comparingDouble(t -> ((Supplier<Double>) t.get(0)).get())));
String key = (String) scoreKeys.get(0).get(1);
String name = (String) scoreKeys.get(0).get(2);
return List.of(_createCategorySemantics(name, key));
}
use of io.openk9.search.api.query.parser.CategorySemantics in project openk9 by smclab.
the class BaseNerAnnotator method annotate_.
@Override
public List<CategorySemantics> annotate_(long tenantId, String... tokens) {
_log.debug(Arrays.toString(tokens));
if (_containsStopword(tokens)) {
return List.of();
}
RestHighLevelClient restHighLevelClient = restHighLevelClientProvider.get();
BoolQueryBuilder builder = QueryBuilders.boolQuery();
builder.must(QueryBuilders.matchQuery("type.keyword", category));
for (String token : tokens) {
if (!stopWords.contains(token)) {
builder.must(query("name", token));
}
}
SearchRequest searchRequest;
if (tenantId == -1) {
searchRequest = new SearchRequest("*-entity");
} else {
searchRequest = new SearchRequest(tenantId + "-entity");
}
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(_annotatorConfig.nerSize());
searchSourceBuilder.query(builder);
searchRequest.source(searchSourceBuilder);
List<CategorySemantics> list = new ArrayList<>();
if (_log.isDebugEnabled()) {
_log.debug(builder.toString());
}
try {
SearchResponse search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
for (SearchHit hit : search.getHits()) {
Map<String, Object> senamtics = hit.getSourceAsMap();
list.add(CategorySemantics.of("$" + senamtics.get("type"), Map.of("tokenType", "ENTITY", "entityType", senamtics.get("type"), "entityName", senamtics.get("name"), "tenantId", senamtics.get("tenantId"), "value", senamtics.get("id"), "score", hit.getScore())));
}
if (_log.isDebugEnabled()) {
_log.debug(list.toString());
}
} catch (IOException e) {
_log.error(e.getMessage(), e);
}
return list;
}
Aggregations