Search in sources :

Example 1 with SuggestWordQueue

use of org.apache.lucene.search.spell.SuggestWordQueue in project lucene-solr by apache.

the class SolrSpellChecker method mergeSuggestions.

/**
   * Integrate spelling suggestions from the various shards in a distributed environment.
   */
public SpellingResult mergeSuggestions(SpellCheckMergeData mergeData, int numSug, int count, boolean extendedResults) {
    float min = 0.5f;
    try {
        min = getAccuracy();
    } catch (UnsupportedOperationException uoe) {
    //just use .5 as a default
    }
    StringDistance sd = null;
    try {
        sd = getStringDistance() == null ? new LevensteinDistance() : getStringDistance();
    } catch (UnsupportedOperationException uoe) {
        sd = new LevensteinDistance();
    }
    SpellingResult result = new SpellingResult();
    for (Map.Entry<String, HashSet<String>> entry : mergeData.origVsSuggested.entrySet()) {
        String original = entry.getKey();
        //Only use this suggestion if all shards reported it as misspelled, 
        //unless it was not a term original to the user's query
        //(WordBreakSolrSpellChecker can add new terms to the response, and we want to keep these)
        Integer numShards = mergeData.origVsShards.get(original);
        if (numShards < mergeData.totalNumberShardResponses && mergeData.isOriginalToQuery(original)) {
            continue;
        }
        HashSet<String> suggested = entry.getValue();
        SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
        for (String suggestion : suggested) {
            SuggestWord sug = mergeData.suggestedVsWord.get(suggestion);
            sug.score = sd.getDistance(original, sug.string);
            if (sug.score < min)
                continue;
            sugQueue.insertWithOverflow(sug);
            if (sugQueue.size() == numSug) {
                // if queue full, maintain the minScore score
                min = sugQueue.top().score;
            }
        }
        // create token
        SpellCheckResponse.Suggestion suggestion = mergeData.origVsSuggestion.get(original);
        Token token = new Token(original, suggestion.getStartOffset(), suggestion.getEndOffset());
        // get top 'count' suggestions out of 'sugQueue.size()' candidates
        SuggestWord[] suggestions = new SuggestWord[Math.min(count, sugQueue.size())];
        // skip the first sugQueue.size() - count elements
        for (int k = 0; k < sugQueue.size() - count; k++) sugQueue.pop();
        // now collect the top 'count' responses
        for (int k = Math.min(count, sugQueue.size()) - 1; k >= 0; k--) {
            suggestions[k] = sugQueue.pop();
        }
        if (extendedResults) {
            Integer o = mergeData.origVsFreq.get(original);
            if (o != null)
                result.addFrequency(token, o);
            for (SuggestWord word : suggestions) result.add(token, word.string, word.freq);
        } else {
            List<String> words = new ArrayList<>(sugQueue.size());
            for (SuggestWord word : suggestions) words.add(word.string);
            result.add(token, words);
        }
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) Token(org.apache.lucene.analysis.Token) LevensteinDistance(org.apache.lucene.search.spell.LevensteinDistance) SpellCheckResponse(org.apache.solr.client.solrj.response.SpellCheckResponse) StringDistance(org.apache.lucene.search.spell.StringDistance) SuggestWordQueue(org.apache.lucene.search.spell.SuggestWordQueue) SuggestWord(org.apache.lucene.search.spell.SuggestWord) Map(java.util.Map) HashSet(java.util.HashSet)

Aggregations

ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 Map (java.util.Map)1 Token (org.apache.lucene.analysis.Token)1 LevensteinDistance (org.apache.lucene.search.spell.LevensteinDistance)1 StringDistance (org.apache.lucene.search.spell.StringDistance)1 SuggestWord (org.apache.lucene.search.spell.SuggestWord)1 SuggestWordQueue (org.apache.lucene.search.spell.SuggestWordQueue)1 SpellCheckResponse (org.apache.solr.client.solrj.response.SpellCheckResponse)1