Search in sources :

Example 6 with TotalHits

use of org.graylog.shaded.elasticsearch7.org.apache.lucene.search.TotalHits in project snow-owl by b2ihealthcare.

the class EsDocumentSearcher method search.

@Override
public <T> Hits<T> search(Query<T> query) throws IOException {
    Stopwatch w = Stopwatch.createStarted();
    admin.log().trace("Executing query '{}'", query);
    final EsClient client = admin.client();
    final List<DocumentMapping> mappings = admin.mappings().getDocumentMapping(query);
    final DocumentMapping primaryMapping = Iterables.getFirst(mappings, null);
    // Restrict variables to the theoretical maximum
    final int limit = query.getLimit();
    final int toRead = Ints.min(limit, resultWindow);
    // TODO support multiple document mappings during query building
    final EsQueryBuilder esQueryBuilder = new EsQueryBuilder(primaryMapping, admin.settings(), admin.log());
    final QueryBuilder esQuery = esQueryBuilder.build(query.getWhere());
    final SearchRequest req = new SearchRequest(admin.getTypeIndexes(mappings).toArray(length -> new String[length]));
    // configure caching
    req.requestCache(query.isCached());
    final SearchSourceBuilder reqSource = req.source().size(toRead).query(esQuery).trackScores(esQueryBuilder.needsScoring()).trackTotalHitsUpTo(Integer.MAX_VALUE);
    // field selection
    final boolean fetchSource = applySourceFiltering(query.getFields(), primaryMapping, reqSource);
    // ES internals require loading the _id field when we require the _source
    if (fetchSource) {
        reqSource.storedFields(STORED_FIELDS_ID_ONLY);
    } else {
        reqSource.storedFields(STORED_FIELDS_NONE);
    }
    // paging config
    final boolean isLocalStreaming = limit > resultWindow;
    final boolean isLiveStreaming = !Strings.isNullOrEmpty(query.getSearchAfter());
    if (isLocalStreaming) {
        checkArgument(!isLiveStreaming, "Cannot use searchAfter when requesting more items (%s) than the configured result window (%s).", limit, resultWindow);
    } else if (isLiveStreaming) {
        reqSource.searchAfter(fromSearchAfterToken(query.getSearchAfter()));
    }
    // sorting config with a default sort field based on scroll config
    addSort(primaryMapping, reqSource, query.getSortBy());
    // disable explain explicitly, just in case
    reqSource.explain(false);
    // disable version field explicitly, just in case
    reqSource.version(false);
    // perform search
    SearchResponse response = null;
    try {
        response = client.search(req);
    } catch (Exception e) {
        if (e instanceof ElasticsearchStatusException && ((ElasticsearchStatusException) e).status() == RestStatus.BAD_REQUEST) {
            throw new IllegalArgumentException(e.getMessage(), e);
        }
        admin.log().error("Couldn't execute query", e);
        throw new IndexException("Couldn't execute query: " + e.getMessage(), null);
    }
    SearchHits responseHits = response.getHits();
    final TotalHits total = responseHits.getTotalHits();
    checkState(total.relation == Relation.EQUAL_TO, "Searches should always track total hits accurately");
    final int totalHitCount = (int) total.value;
    final SearchHit[] firstHits = responseHits.getHits();
    final int firstCount = firstHits.length;
    final int remainingCount = Math.min(limit, totalHitCount) - firstCount;
    // Add the first set of results
    final ImmutableList.Builder<SearchHit> allHits = ImmutableList.builder();
    allHits.addAll(responseHits);
    // If the client requested all data at once and there are more hits to retrieve, collect them all as part of the request
    if (isLocalStreaming && remainingCount > 0) {
        admin.log().warn("Returning all matches (totalHits: '{}') larger than the currently configured result_window ('{}') might not be the most " + "efficient way of getting the data. Consider using the index pagination API (searchAfter) instead.", totalHitCount, resultWindow);
        while (true) {
            // Extract searchAfter values for the next set of results
            final SearchHit lastHit = Iterables.getLast(responseHits, null);
            if (lastHit == null) {
                break;
            }
            reqSource.searchAfter(lastHit.getSortValues());
            // Request more search results, adding them to the list builder
            response = client.search(req);
            responseHits = response.getHits();
            allHits.addAll(responseHits);
        }
    }
    final Class<T> select = query.getSelection().getSelect();
    final List<Class<?>> from = query.getSelection().getFrom();
    final Hits<T> hits = toHits(select, from, query.getFields(), fetchSource, limit, totalHitCount, query.getSortBy(), allHits.build());
    admin.log().trace("Executed query '{}' in '{}'", query, w);
    return hits;
}
Also used : GetResponse(org.elasticsearch.action.get.GetResponse) SortBuilders(org.elasticsearch.search.sort.SortBuilders) Query(com.b2international.index.query.Query) SearchHits(org.elasticsearch.search.SearchHits) TopHitsAggregationBuilder(org.elasticsearch.search.aggregations.metrics.TopHitsAggregationBuilder) FormattedRuntimeException(com.b2international.commons.exceptions.FormattedRuntimeException) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ByteArrayInputStream(java.io.ByteArrayInputStream) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchStatusException(org.elasticsearch.ElasticsearchStatusException) CompareUtils(com.b2international.commons.CompareUtils) com.google.common.collect(com.google.common.collect) SearchHit(org.elasticsearch.search.SearchHit) GetRequest(org.elasticsearch.action.get.GetRequest) Aggregations(org.elasticsearch.search.aggregations.Aggregations) SortBy(com.b2international.index.query.SortBy) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) EsClient(com.b2international.index.es.client.EsClient) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) Expressions(com.b2international.index.query.Expressions) RestStatus(org.elasticsearch.rest.RestStatus) AggregationBuilder(com.b2international.index.aggregations.AggregationBuilder) SortOrder(org.elasticsearch.search.sort.SortOrder) ReverseNested(org.elasticsearch.search.aggregations.bucket.nested.ReverseNested) EsQueryBuilder(com.b2international.index.es.query.EsQueryBuilder) JavaBinCodec(org.apache.solr.common.util.JavaBinCodec) TopHits(org.elasticsearch.search.aggregations.metrics.TopHits) FetchSourceContext(org.elasticsearch.search.fetch.subphase.FetchSourceContext) SortByScript(com.b2international.index.query.SortBy.SortByScript) DataInputStream(java.io.DataInputStream) SortByField(com.b2international.index.query.SortBy.SortByField) java.util(java.util) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Stopwatch(com.google.common.base.Stopwatch) SearchRequest(org.elasticsearch.action.search.SearchRequest) Aggregation(com.b2international.index.aggregations.Aggregation) EsIndexAdmin(com.b2international.index.es.admin.EsIndexAdmin) Strings(com.google.common.base.Strings) com.b2international.index(com.b2international.index) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) MultiSortBy(com.b2international.index.query.SortBy.MultiSortBy) Nested(org.elasticsearch.search.aggregations.bucket.nested.Nested) TermsAggregationBuilder(org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder) BadRequestException(com.b2international.commons.exceptions.BadRequestException) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) IOException(java.io.IOException) AggregationBuilders(org.elasticsearch.search.aggregations.AggregationBuilders) Relation(org.apache.lucene.search.TotalHits.Relation) Bucket(com.b2international.index.aggregations.Bucket) Ints(com.google.common.primitives.Ints) DocumentMapping(com.b2international.index.mapping.DocumentMapping) TotalHits(org.apache.lucene.search.TotalHits) TotalHits(org.apache.lucene.search.TotalHits) SearchRequest(org.elasticsearch.action.search.SearchRequest) SearchHit(org.elasticsearch.search.SearchHit) Stopwatch(com.google.common.base.Stopwatch) EsQueryBuilder(com.b2international.index.es.query.EsQueryBuilder) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) ElasticsearchStatusException(org.elasticsearch.ElasticsearchStatusException) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) SearchHits(org.elasticsearch.search.SearchHits) EsQueryBuilder(com.b2international.index.es.query.EsQueryBuilder) EsClient(com.b2international.index.es.client.EsClient) DocumentMapping(com.b2international.index.mapping.DocumentMapping) FormattedRuntimeException(com.b2international.commons.exceptions.FormattedRuntimeException) ElasticsearchStatusException(org.elasticsearch.ElasticsearchStatusException) BadRequestException(com.b2international.commons.exceptions.BadRequestException) IOException(java.io.IOException) SearchResponse(org.elasticsearch.action.search.SearchResponse)

Example 7 with TotalHits

use of org.graylog.shaded.elasticsearch7.org.apache.lucene.search.TotalHits in project sonarqube by SonarSource.

the class RuleIndexDefinitionTest method support_long_html_description.

@Test
public void support_long_html_description() {
    String longText = StringUtils.repeat("The quick brown fox jumps over the lazy dog ", 700);
    List<AnalyzeResponse.AnalyzeToken> tokens = analyzeIndexedTokens(longText);
    assertThat(tokens).extracting(AnalyzeResponse.AnalyzeToken::getTerm).containsOnly("quick", "brown", "fox", "jump", "over", "lazi", "dog");
    // the following method fails if PUT fails
    tester.putDocuments(TYPE_RULE, new RuleDoc(ImmutableMap.of(FIELD_RULE_UUID, "123", FIELD_RULE_HTML_DESCRIPTION, longText, FIELD_RULE_REPOSITORY, "squid", FIELD_RULE_KEY, "squid:S001")));
    assertThat(tester.countDocuments(TYPE_RULE)).isOne();
    assertThat(tester.client().search(EsClient.prepareSearch(TYPE_RULE).source(new SearchSourceBuilder().query(matchQuery(ENGLISH_HTML_ANALYZER.subField(FIELD_RULE_HTML_DESCRIPTION), "brown fox jumps lazy")))).getHits().getTotalHits()).isEqualTo(new TotalHits(1, TotalHits.Relation.EQUAL_TO));
}
Also used : TotalHits(org.apache.lucene.search.TotalHits) AnalyzeResponse(org.elasticsearch.client.indices.AnalyzeResponse) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) Test(org.junit.Test)

Example 8 with TotalHits

use of org.graylog.shaded.elasticsearch7.org.apache.lucene.search.TotalHits in project sonarqube by SonarSource.

the class EsUtilsTest method convertToDocs.

@Test
public void convertToDocs() {
    SearchHits hits = new SearchHits(new SearchHit[] { new SearchHit(16) }, new TotalHits(1, TotalHits.Relation.EQUAL_TO), 1);
    List<BaseDoc> docs = EsUtils.convertToDocs(hits, IssueDoc::new);
    assertThat(docs).hasSize(1);
}
Also used : TotalHits(org.apache.lucene.search.TotalHits) IssueDoc(org.sonar.server.issue.index.IssueDoc) SearchHit(org.elasticsearch.search.SearchHit) SearchHits(org.elasticsearch.search.SearchHits) Test(org.junit.Test)

Example 9 with TotalHits

use of org.graylog.shaded.elasticsearch7.org.apache.lucene.search.TotalHits in project Anserini by castorini.

the class SearchCollection method searchTweets.

public <K> ScoredDocuments searchTweets(IndexSearcher searcher, K qid, String queryString, long t, RerankerCascade cascade, ScoredDocuments queryQrels, boolean hasRelDocs) throws IOException {
    Query keywordQuery;
    if (args.sdm) {
        keywordQuery = new SdmQueryGenerator(args.sdm_tw, args.sdm_ow, args.sdm_uw).buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
    } else {
        try {
            QueryGenerator generator = (QueryGenerator) Class.forName("io.anserini.search.query." + args.queryGenerator).getConstructor().newInstance();
            keywordQuery = generator.buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
        } catch (Exception e) {
            e.printStackTrace();
            throw new IllegalArgumentException("Unable to load QueryGenerator: " + args.topicReader);
        }
    }
    List<String> queryTokens = AnalyzerUtils.analyze(analyzer, queryString);
    // Do not consider the tweets with tweet ids that are beyond the queryTweetTime
    // <querytweettime> tag contains the timestamp of the query in terms of the
    // chronologically nearest tweet id within the corpus
    Query filter = LongPoint.newRangeQuery(TweetGenerator.TweetField.ID_LONG.name, 0L, t);
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(filter, BooleanClause.Occur.FILTER);
    builder.add(keywordQuery, BooleanClause.Occur.MUST);
    Query compositeQuery = builder.build();
    TopDocs rs = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {});
    if (!isRerank || (args.rerankcutoff > 0 && args.rf_qrels == null) || (args.rf_qrels != null && !hasRelDocs)) {
        if (args.arbitraryScoreTieBreak) {
            // Figure out how to break the scoring ties.
            rs = searcher.search(compositeQuery, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits);
        } else {
            rs = searcher.search(compositeQuery, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits, BREAK_SCORE_TIES_BY_TWEETID, true);
        }
    }
    RerankerContext context = new RerankerContext<>(searcher, qid, keywordQuery, null, queryString, queryTokens, filter, args);
    ScoredDocuments scoredFbDocs;
    if (isRerank && args.rf_qrels != null) {
        if (hasRelDocs) {
            scoredFbDocs = queryQrels;
        } else {
            // if no relevant documents, only perform score based tie breaking next
            scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
            cascade = new RerankerCascade();
            cascade.add(new ScoreTiesAdjusterReranker());
        }
    } else {
        scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
    }
    return cascade.run(scoredFbDocs, context);
}
Also used : TotalHits(org.apache.lucene.search.TotalHits) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) TermInSetQuery(org.apache.lucene.search.TermInSetQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) ScoredDocuments(io.anserini.rerank.ScoredDocuments) QueryNodeException(org.apache.lucene.queryparser.flexible.core.QueryNodeException) IOException(java.io.IOException) CompletionException(java.util.concurrent.CompletionException) CmdLineException(org.kohsuke.args4j.CmdLineException) AtomicMoveNotSupportedException(java.nio.file.AtomicMoveNotSupportedException) TopDocs(org.apache.lucene.search.TopDocs) RerankerCascade(io.anserini.rerank.RerankerCascade) QueryGenerator(io.anserini.search.query.QueryGenerator) SdmQueryGenerator(io.anserini.search.query.SdmQueryGenerator) ScoreTiesAdjusterReranker(io.anserini.rerank.lib.ScoreTiesAdjusterReranker) SdmQueryGenerator(io.anserini.search.query.SdmQueryGenerator) RerankerContext(io.anserini.rerank.RerankerContext)

Example 10 with TotalHits

use of org.graylog.shaded.elasticsearch7.org.apache.lucene.search.TotalHits in project Anserini by castorini.

the class SearchCollection method search.

public <K> ScoredDocuments search(IndexSearcher searcher, K qid, String queryString, RerankerCascade cascade, ScoredDocuments queryQrels, boolean hasRelDocs) throws IOException {
    Query query = null;
    if (args.sdm) {
        query = new SdmQueryGenerator(args.sdm_tw, args.sdm_ow, args.sdm_uw).buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
    } else {
        QueryGenerator generator;
        try {
            generator = (QueryGenerator) Class.forName("io.anserini.search.query." + args.queryGenerator).getConstructor().newInstance();
        } catch (Exception e) {
            e.printStackTrace();
            throw new IllegalArgumentException("Unable to load QueryGenerator: " + args.topicReader);
        }
        query = generator.buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
    }
    TopDocs rs = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {});
    if (!isRerank || (args.rerankcutoff > 0 && args.rf_qrels == null) || (args.rf_qrels != null && !hasRelDocs)) {
        if (args.arbitraryScoreTieBreak) {
            // Figure out how to break the scoring ties.
            rs = searcher.search(query, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits);
        } else {
            rs = searcher.search(query, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits, BREAK_SCORE_TIES_BY_DOCID, true);
        }
    }
    List<String> queryTokens = AnalyzerUtils.analyze(analyzer, queryString);
    queries.put(qid.toString(), queryTokens);
    RerankerContext context = new RerankerContext<>(searcher, qid, query, null, queryString, queryTokens, null, args);
    ScoredDocuments scoredFbDocs;
    if (isRerank && args.rf_qrels != null) {
        if (hasRelDocs) {
            scoredFbDocs = queryQrels;
        } else {
            // if no relevant documents, only perform score based tie breaking next
            LOG.info("No relevant documents for " + qid.toString());
            scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
            cascade = new RerankerCascade();
            cascade.add(new ScoreTiesAdjusterReranker());
        }
    } else {
        scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
    }
    return cascade.run(scoredFbDocs, context);
}
Also used : TotalHits(org.apache.lucene.search.TotalHits) Query(org.apache.lucene.search.Query) TermInSetQuery(org.apache.lucene.search.TermInSetQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) ScoredDocuments(io.anserini.rerank.ScoredDocuments) QueryNodeException(org.apache.lucene.queryparser.flexible.core.QueryNodeException) IOException(java.io.IOException) CompletionException(java.util.concurrent.CompletionException) CmdLineException(org.kohsuke.args4j.CmdLineException) AtomicMoveNotSupportedException(java.nio.file.AtomicMoveNotSupportedException) TopDocs(org.apache.lucene.search.TopDocs) RerankerCascade(io.anserini.rerank.RerankerCascade) QueryGenerator(io.anserini.search.query.QueryGenerator) SdmQueryGenerator(io.anserini.search.query.SdmQueryGenerator) ScoreTiesAdjusterReranker(io.anserini.rerank.lib.ScoreTiesAdjusterReranker) SdmQueryGenerator(io.anserini.search.query.SdmQueryGenerator) RerankerContext(io.anserini.rerank.RerankerContext)

Aggregations

TotalHits (org.apache.lucene.search.TotalHits)18 Test (org.junit.Test)14 SearchHits (org.elasticsearch.search.SearchHits)13 SearchHit (org.elasticsearch.search.SearchHit)12 TestSupport (com.hazelcast.jet.core.test.TestSupport)9 ParallelJVMTest (com.hazelcast.test.annotation.ParallelJVMTest)9 QuickTest (com.hazelcast.test.annotation.QuickTest)9 SearchRequest (org.elasticsearch.action.search.SearchRequest)8 SearchResponse (org.elasticsearch.action.search.SearchResponse)7 BytesArray (org.elasticsearch.common.bytes.BytesArray)3 Text (org.elasticsearch.common.text.Text)3 SliceBuilder (org.elasticsearch.search.slice.SliceBuilder)3 FunctionEx (com.hazelcast.function.FunctionEx)2 TestOutbox (com.hazelcast.jet.core.test.TestOutbox)2 Prirep (com.hazelcast.jet.elastic.impl.Shard.Prirep)2 HazelcastParallelClassRunner (com.hazelcast.test.HazelcastParallelClassRunner)2 RerankerCascade (io.anserini.rerank.RerankerCascade)2 RerankerContext (io.anserini.rerank.RerankerContext)2 ScoredDocuments (io.anserini.rerank.ScoredDocuments)2 ScoreTiesAdjusterReranker (io.anserini.rerank.lib.ScoreTiesAdjusterReranker)2