use of org.apache.lucene.search.spell.SuggestWordQueue in project lucene-solr by apache.
the class SolrSpellChecker method mergeSuggestions.
/**
* Integrate spelling suggestions from the various shards in a distributed environment.
*/
public SpellingResult mergeSuggestions(SpellCheckMergeData mergeData, int numSug, int count, boolean extendedResults) {
float min = 0.5f;
try {
min = getAccuracy();
} catch (UnsupportedOperationException uoe) {
//just use .5 as a default
}
StringDistance sd = null;
try {
sd = getStringDistance() == null ? new LevensteinDistance() : getStringDistance();
} catch (UnsupportedOperationException uoe) {
sd = new LevensteinDistance();
}
SpellingResult result = new SpellingResult();
for (Map.Entry<String, HashSet<String>> entry : mergeData.origVsSuggested.entrySet()) {
String original = entry.getKey();
//Only use this suggestion if all shards reported it as misspelled,
//unless it was not a term original to the user's query
//(WordBreakSolrSpellChecker can add new terms to the response, and we want to keep these)
Integer numShards = mergeData.origVsShards.get(original);
if (numShards < mergeData.totalNumberShardResponses && mergeData.isOriginalToQuery(original)) {
continue;
}
HashSet<String> suggested = entry.getValue();
SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
for (String suggestion : suggested) {
SuggestWord sug = mergeData.suggestedVsWord.get(suggestion);
sug.score = sd.getDistance(original, sug.string);
if (sug.score < min)
continue;
sugQueue.insertWithOverflow(sug);
if (sugQueue.size() == numSug) {
// if queue full, maintain the minScore score
min = sugQueue.top().score;
}
}
// create token
SpellCheckResponse.Suggestion suggestion = mergeData.origVsSuggestion.get(original);
Token token = new Token(original, suggestion.getStartOffset(), suggestion.getEndOffset());
// get top 'count' suggestions out of 'sugQueue.size()' candidates
SuggestWord[] suggestions = new SuggestWord[Math.min(count, sugQueue.size())];
// skip the first sugQueue.size() - count elements
for (int k = 0; k < sugQueue.size() - count; k++) sugQueue.pop();
// now collect the top 'count' responses
for (int k = Math.min(count, sugQueue.size()) - 1; k >= 0; k--) {
suggestions[k] = sugQueue.pop();
}
if (extendedResults) {
Integer o = mergeData.origVsFreq.get(original);
if (o != null)
result.addFrequency(token, o);
for (SuggestWord word : suggestions) result.add(token, word.string, word.freq);
} else {
List<String> words = new ArrayList<>(sugQueue.size());
for (SuggestWord word : suggestions) words.add(word.string);
result.add(token, words);
}
}
return result;
}
Aggregations