use of org.graylog.shaded.elasticsearch7.org.apache.lucene.search.TotalHits in project snow-owl by b2ihealthcare.
the class EsDocumentSearcher method search.
@Override
public <T> Hits<T> search(Query<T> query) throws IOException {
Stopwatch w = Stopwatch.createStarted();
admin.log().trace("Executing query '{}'", query);
final EsClient client = admin.client();
final List<DocumentMapping> mappings = admin.mappings().getDocumentMapping(query);
final DocumentMapping primaryMapping = Iterables.getFirst(mappings, null);
// Restrict variables to the theoretical maximum
final int limit = query.getLimit();
final int toRead = Ints.min(limit, resultWindow);
// TODO support multiple document mappings during query building
final EsQueryBuilder esQueryBuilder = new EsQueryBuilder(primaryMapping, admin.settings(), admin.log());
final QueryBuilder esQuery = esQueryBuilder.build(query.getWhere());
final SearchRequest req = new SearchRequest(admin.getTypeIndexes(mappings).toArray(length -> new String[length]));
// configure caching
req.requestCache(query.isCached());
final SearchSourceBuilder reqSource = req.source().size(toRead).query(esQuery).trackScores(esQueryBuilder.needsScoring()).trackTotalHitsUpTo(Integer.MAX_VALUE);
// field selection
final boolean fetchSource = applySourceFiltering(query.getFields(), primaryMapping, reqSource);
// ES internals require loading the _id field when we require the _source
if (fetchSource) {
reqSource.storedFields(STORED_FIELDS_ID_ONLY);
} else {
reqSource.storedFields(STORED_FIELDS_NONE);
}
// paging config
final boolean isLocalStreaming = limit > resultWindow;
final boolean isLiveStreaming = !Strings.isNullOrEmpty(query.getSearchAfter());
if (isLocalStreaming) {
checkArgument(!isLiveStreaming, "Cannot use searchAfter when requesting more items (%s) than the configured result window (%s).", limit, resultWindow);
} else if (isLiveStreaming) {
reqSource.searchAfter(fromSearchAfterToken(query.getSearchAfter()));
}
// sorting config with a default sort field based on scroll config
addSort(primaryMapping, reqSource, query.getSortBy());
// disable explain explicitly, just in case
reqSource.explain(false);
// disable version field explicitly, just in case
reqSource.version(false);
// perform search
SearchResponse response = null;
try {
response = client.search(req);
} catch (Exception e) {
if (e instanceof ElasticsearchStatusException && ((ElasticsearchStatusException) e).status() == RestStatus.BAD_REQUEST) {
throw new IllegalArgumentException(e.getMessage(), e);
}
admin.log().error("Couldn't execute query", e);
throw new IndexException("Couldn't execute query: " + e.getMessage(), null);
}
SearchHits responseHits = response.getHits();
final TotalHits total = responseHits.getTotalHits();
checkState(total.relation == Relation.EQUAL_TO, "Searches should always track total hits accurately");
final int totalHitCount = (int) total.value;
final SearchHit[] firstHits = responseHits.getHits();
final int firstCount = firstHits.length;
final int remainingCount = Math.min(limit, totalHitCount) - firstCount;
// Add the first set of results
final ImmutableList.Builder<SearchHit> allHits = ImmutableList.builder();
allHits.addAll(responseHits);
// If the client requested all data at once and there are more hits to retrieve, collect them all as part of the request
if (isLocalStreaming && remainingCount > 0) {
admin.log().warn("Returning all matches (totalHits: '{}') larger than the currently configured result_window ('{}') might not be the most " + "efficient way of getting the data. Consider using the index pagination API (searchAfter) instead.", totalHitCount, resultWindow);
while (true) {
// Extract searchAfter values for the next set of results
final SearchHit lastHit = Iterables.getLast(responseHits, null);
if (lastHit == null) {
break;
}
reqSource.searchAfter(lastHit.getSortValues());
// Request more search results, adding them to the list builder
response = client.search(req);
responseHits = response.getHits();
allHits.addAll(responseHits);
}
}
final Class<T> select = query.getSelection().getSelect();
final List<Class<?>> from = query.getSelection().getFrom();
final Hits<T> hits = toHits(select, from, query.getFields(), fetchSource, limit, totalHitCount, query.getSortBy(), allHits.build());
admin.log().trace("Executed query '{}' in '{}'", query, w);
return hits;
}
use of org.graylog.shaded.elasticsearch7.org.apache.lucene.search.TotalHits in project sonarqube by SonarSource.
the class RuleIndexDefinitionTest method support_long_html_description.
@Test
public void support_long_html_description() {
String longText = StringUtils.repeat("The quick brown fox jumps over the lazy dog ", 700);
List<AnalyzeResponse.AnalyzeToken> tokens = analyzeIndexedTokens(longText);
assertThat(tokens).extracting(AnalyzeResponse.AnalyzeToken::getTerm).containsOnly("quick", "brown", "fox", "jump", "over", "lazi", "dog");
// the following method fails if PUT fails
tester.putDocuments(TYPE_RULE, new RuleDoc(ImmutableMap.of(FIELD_RULE_UUID, "123", FIELD_RULE_HTML_DESCRIPTION, longText, FIELD_RULE_REPOSITORY, "squid", FIELD_RULE_KEY, "squid:S001")));
assertThat(tester.countDocuments(TYPE_RULE)).isOne();
assertThat(tester.client().search(EsClient.prepareSearch(TYPE_RULE).source(new SearchSourceBuilder().query(matchQuery(ENGLISH_HTML_ANALYZER.subField(FIELD_RULE_HTML_DESCRIPTION), "brown fox jumps lazy")))).getHits().getTotalHits()).isEqualTo(new TotalHits(1, TotalHits.Relation.EQUAL_TO));
}
use of org.graylog.shaded.elasticsearch7.org.apache.lucene.search.TotalHits in project sonarqube by SonarSource.
the class EsUtilsTest method convertToDocs.
@Test
public void convertToDocs() {
SearchHits hits = new SearchHits(new SearchHit[] { new SearchHit(16) }, new TotalHits(1, TotalHits.Relation.EQUAL_TO), 1);
List<BaseDoc> docs = EsUtils.convertToDocs(hits, IssueDoc::new);
assertThat(docs).hasSize(1);
}
use of org.graylog.shaded.elasticsearch7.org.apache.lucene.search.TotalHits in project Anserini by castorini.
the class SearchCollection method searchTweets.
public <K> ScoredDocuments searchTweets(IndexSearcher searcher, K qid, String queryString, long t, RerankerCascade cascade, ScoredDocuments queryQrels, boolean hasRelDocs) throws IOException {
Query keywordQuery;
if (args.sdm) {
keywordQuery = new SdmQueryGenerator(args.sdm_tw, args.sdm_ow, args.sdm_uw).buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
} else {
try {
QueryGenerator generator = (QueryGenerator) Class.forName("io.anserini.search.query." + args.queryGenerator).getConstructor().newInstance();
keywordQuery = generator.buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
} catch (Exception e) {
e.printStackTrace();
throw new IllegalArgumentException("Unable to load QueryGenerator: " + args.topicReader);
}
}
List<String> queryTokens = AnalyzerUtils.analyze(analyzer, queryString);
// Do not consider the tweets with tweet ids that are beyond the queryTweetTime
// <querytweettime> tag contains the timestamp of the query in terms of the
// chronologically nearest tweet id within the corpus
Query filter = LongPoint.newRangeQuery(TweetGenerator.TweetField.ID_LONG.name, 0L, t);
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(filter, BooleanClause.Occur.FILTER);
builder.add(keywordQuery, BooleanClause.Occur.MUST);
Query compositeQuery = builder.build();
TopDocs rs = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {});
if (!isRerank || (args.rerankcutoff > 0 && args.rf_qrels == null) || (args.rf_qrels != null && !hasRelDocs)) {
if (args.arbitraryScoreTieBreak) {
// Figure out how to break the scoring ties.
rs = searcher.search(compositeQuery, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits);
} else {
rs = searcher.search(compositeQuery, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits, BREAK_SCORE_TIES_BY_TWEETID, true);
}
}
RerankerContext context = new RerankerContext<>(searcher, qid, keywordQuery, null, queryString, queryTokens, filter, args);
ScoredDocuments scoredFbDocs;
if (isRerank && args.rf_qrels != null) {
if (hasRelDocs) {
scoredFbDocs = queryQrels;
} else {
// if no relevant documents, only perform score based tie breaking next
scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
cascade = new RerankerCascade();
cascade.add(new ScoreTiesAdjusterReranker());
}
} else {
scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
}
return cascade.run(scoredFbDocs, context);
}
use of org.graylog.shaded.elasticsearch7.org.apache.lucene.search.TotalHits in project Anserini by castorini.
the class SearchCollection method search.
public <K> ScoredDocuments search(IndexSearcher searcher, K qid, String queryString, RerankerCascade cascade, ScoredDocuments queryQrels, boolean hasRelDocs) throws IOException {
Query query = null;
if (args.sdm) {
query = new SdmQueryGenerator(args.sdm_tw, args.sdm_ow, args.sdm_uw).buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
} else {
QueryGenerator generator;
try {
generator = (QueryGenerator) Class.forName("io.anserini.search.query." + args.queryGenerator).getConstructor().newInstance();
} catch (Exception e) {
e.printStackTrace();
throw new IllegalArgumentException("Unable to load QueryGenerator: " + args.topicReader);
}
query = generator.buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
}
TopDocs rs = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {});
if (!isRerank || (args.rerankcutoff > 0 && args.rf_qrels == null) || (args.rf_qrels != null && !hasRelDocs)) {
if (args.arbitraryScoreTieBreak) {
// Figure out how to break the scoring ties.
rs = searcher.search(query, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits);
} else {
rs = searcher.search(query, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits, BREAK_SCORE_TIES_BY_DOCID, true);
}
}
List<String> queryTokens = AnalyzerUtils.analyze(analyzer, queryString);
queries.put(qid.toString(), queryTokens);
RerankerContext context = new RerankerContext<>(searcher, qid, query, null, queryString, queryTokens, null, args);
ScoredDocuments scoredFbDocs;
if (isRerank && args.rf_qrels != null) {
if (hasRelDocs) {
scoredFbDocs = queryQrels;
} else {
// if no relevant documents, only perform score based tie breaking next
LOG.info("No relevant documents for " + qid.toString());
scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
cascade = new RerankerCascade();
cascade.add(new ScoreTiesAdjusterReranker());
}
} else {
scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
}
return cascade.run(scoredFbDocs, context);
}
Aggregations