Search in sources :

Example 1 with PopularityCounter

use of org.opengrok.suggest.popular.PopularityCounter in project OpenGrok by OpenGrok.

the class SuggesterSearcher method suggest.

private List<LookupResultItem> suggest(final Query query, final LeafReaderContext leafReaderContext, final String project, final SuggesterQuery suggesterQuery, final PopularityCounter searchCounts) throws IOException {
    if (Thread.currentThread().isInterrupted()) {
        interrupted = true;
        return Collections.emptyList();
    }
    boolean shouldLeaveOutSameTerms = shouldLeaveOutSameTerms(query, suggesterQuery);
    Set<BytesRef> tokensAlreadyIncluded = null;
    if (shouldLeaveOutSameTerms) {
        tokensAlreadyIncluded = SuggesterUtils.intoTermsExceptPhraseQuery(query).stream().filter(t -> t.field().equals(suggesterQuery.getField())).map(Term::bytes).collect(Collectors.toSet());
    }
    boolean needsDocumentIds = query != null && !(query instanceof MatchAllDocsQuery);
    ComplexQueryData complexQueryData = null;
    if (needsDocumentIds) {
        complexQueryData = getComplexQueryData(query, leafReaderContext);
        if (interrupted) {
            return Collections.emptyList();
        }
    }
    Terms terms = leafReaderContext.reader().terms(suggesterQuery.getField());
    TermsEnum termsEnum = suggesterQuery.getTermsEnumForSuggestions(terms);
    LookupPriorityQueue queue = new LookupPriorityQueue(resultSize);
    boolean needPositionsAndFrequencies = needPositionsAndFrequencies(query);
    PostingsEnum postingsEnum = null;
    BytesRef term = termsEnum.next();
    while (term != null) {
        if (Thread.currentThread().isInterrupted()) {
            interrupted = true;
            break;
        }
        if (needPositionsAndFrequencies) {
            postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.POSITIONS | PostingsEnum.FREQS);
        } else {
            postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
        }
        int score = 0;
        if (!needsDocumentIds) {
            score = normalizeDocumentFrequency(termsEnum.docFreq(), numDocs);
        } else if (needPositionsAndFrequencies) {
            score = getPhraseScore(complexQueryData, leafReaderContext.docBase, postingsEnum);
        } else if (complexQueryData != null) {
            score = getDocumentFrequency(complexQueryData.documentIds, leafReaderContext.docBase, postingsEnum);
        }
        if (score > 0) {
            if (!shouldLeaveOutSameTerms || !tokensAlreadyIncluded.contains(term)) {
                score += searchCounts.get(term) * TERM_ALREADY_SEARCHED_MULTIPLIER;
                if (queue.canInsert(score)) {
                    queue.insertWithOverflow(new LookupResultItem(term.utf8ToString(), project, score));
                }
            }
        }
        term = termsEnum.next();
    }
    return queue.getResult();
}
Also used : Query(org.apache.lucene.search.Query) LeafCollector(org.apache.lucene.search.LeafCollector) Term(org.apache.lucene.index.Term) IntsHolder(org.opengrok.suggest.query.data.IntsHolder) Scorable(org.apache.lucene.search.Scorable) CustomPhraseQuery(org.opengrok.suggest.query.customized.CustomPhraseQuery) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) PopularityCounter(org.opengrok.suggest.popular.PopularityCounter) TermsEnum(org.apache.lucene.index.TermsEnum) SuggesterRangeQuery(org.opengrok.suggest.query.SuggesterRangeQuery) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) BitIntsHolder(org.opengrok.suggest.query.data.BitIntsHolder) PostingsEnum(org.apache.lucene.index.PostingsEnum) Terms(org.apache.lucene.index.Terms) Scorer(org.apache.lucene.search.Scorer) BytesRef(org.apache.lucene.util.BytesRef) Set(java.util.Set) IOException(java.io.IOException) Collector(org.apache.lucene.search.Collector) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) Logger(java.util.logging.Logger) SuggesterQuery(org.opengrok.suggest.query.SuggesterQuery) Collectors(java.util.stream.Collectors) BooleanClause(org.apache.lucene.search.BooleanClause) ScoreMode(org.apache.lucene.search.ScoreMode) List(java.util.List) BooleanQuery(org.apache.lucene.search.BooleanQuery) Collections(java.util.Collections) IndexReader(org.apache.lucene.index.IndexReader) PhraseScorer(org.opengrok.suggest.query.PhraseScorer) IndexSearcher(org.apache.lucene.search.IndexSearcher) Terms(org.apache.lucene.index.Terms) Term(org.apache.lucene.index.Term) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermsEnum(org.apache.lucene.index.TermsEnum) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 List (java.util.List)1 Set (java.util.Set)1 Level (java.util.logging.Level)1 Logger (java.util.logging.Logger)1 Collectors (java.util.stream.Collectors)1 IndexReader (org.apache.lucene.index.IndexReader)1 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)1 PostingsEnum (org.apache.lucene.index.PostingsEnum)1 Term (org.apache.lucene.index.Term)1 Terms (org.apache.lucene.index.Terms)1 TermsEnum (org.apache.lucene.index.TermsEnum)1 BooleanClause (org.apache.lucene.search.BooleanClause)1 BooleanQuery (org.apache.lucene.search.BooleanQuery)1 Collector (org.apache.lucene.search.Collector)1 DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)1 IndexSearcher (org.apache.lucene.search.IndexSearcher)1 LeafCollector (org.apache.lucene.search.LeafCollector)1