use of org.apache.lucene.search.spell.SuggestWord in project elasticsearch by elastic.
the class TermSuggester method innerExecute.
@Override
public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
DirectSpellChecker directSpellChecker = suggestion.getDirectSpellCheckerSettings().createDirectSpellChecker();
final IndexReader indexReader = searcher.getIndexReader();
TermSuggestion response = new TermSuggestion(name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort());
List<Token> tokens = queryTerms(suggestion, spare);
for (Token token : tokens) {
// TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef
SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar(token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode());
Text key = new Text(new BytesArray(token.term.bytes()));
TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset);
for (SuggestWord suggestWord : suggestedWords) {
Text word = new Text(suggestWord.string);
resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score));
}
response.addTerm(resultEntry);
}
return response;
}
use of org.apache.lucene.search.spell.SuggestWord in project elasticsearch by elastic.
the class DirectSpellcheckerSettings method createDirectSpellChecker.
public DirectSpellChecker createDirectSpellChecker() {
DirectSpellChecker directSpellChecker = new DirectSpellChecker();
directSpellChecker.setAccuracy(accuracy());
Comparator<SuggestWord> comparator;
switch(sort()) {
case SCORE:
comparator = SCORE_COMPARATOR;
break;
case FREQUENCY:
comparator = LUCENE_FREQUENCY;
break;
default:
throw new IllegalArgumentException("Illegal suggest sort: " + sort());
}
directSpellChecker.setComparator(comparator);
directSpellChecker.setDistance(stringDistance());
directSpellChecker.setMaxEdits(maxEdits());
directSpellChecker.setMaxInspections(maxInspections());
directSpellChecker.setMaxQueryFrequency(maxTermFreq());
directSpellChecker.setMinPrefix(prefixLength());
directSpellChecker.setMinQueryLength(minWordLength());
directSpellChecker.setThresholdFrequency(minDocFreq());
directSpellChecker.setLowerCaseTerms(false);
return directSpellChecker;
}
use of org.apache.lucene.search.spell.SuggestWord in project elasticsearch by elastic.
the class DirectCandidateGenerator method drawCandidates.
@Override
public CandidateSet drawCandidates(CandidateSet set) throws IOException {
Candidate original = set.originalTerm;
BytesRef term = preFilter(original.term, spare, byteSpare);
final long frequency = original.frequency;
spellchecker.setThresholdFrequency(this.suggestMode == SuggestMode.SUGGEST_ALWAYS ? 0 : thresholdFrequency(frequency, dictSize));
SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode);
List<Candidate> candidates = new ArrayList<>(suggestSimilar.length);
for (int i = 0; i < suggestSimilar.length; i++) {
SuggestWord suggestWord = suggestSimilar[i];
BytesRef candidate = new BytesRef(suggestWord.string);
postFilter(new Candidate(candidate, internalFrequency(candidate), suggestWord.score, score(suggestWord.freq, suggestWord.score, dictSize), false), spare, byteSpare, candidates);
}
set.addCandidates(candidates);
return set;
}
use of org.apache.lucene.search.spell.SuggestWord in project lucene-solr by apache.
the class SpellCheckComponent method collectShardSuggestions.
@SuppressWarnings("unchecked")
private void collectShardSuggestions(NamedList nl, SpellCheckMergeData mergeData) {
SpellCheckResponse spellCheckResp = new SpellCheckResponse(nl);
Iterable<Object> originalTermStrings = (Iterable<Object>) nl.get("originalTerms");
if (originalTermStrings != null) {
mergeData.originalTerms = new HashSet<>();
for (Object originalTermObj : originalTermStrings) {
mergeData.originalTerms.add(originalTermObj.toString());
}
}
for (SpellCheckResponse.Suggestion suggestion : spellCheckResp.getSuggestions()) {
mergeData.origVsSuggestion.put(suggestion.getToken(), suggestion);
HashSet<String> suggested = mergeData.origVsSuggested.get(suggestion.getToken());
if (suggested == null) {
suggested = new HashSet<>();
mergeData.origVsSuggested.put(suggestion.getToken(), suggested);
}
// sum up original frequency
int origFreq = 0;
Integer o = mergeData.origVsFreq.get(suggestion.getToken());
if (o != null)
origFreq += o;
origFreq += suggestion.getOriginalFrequency();
mergeData.origVsFreq.put(suggestion.getToken(), origFreq);
//# shards reporting
Integer origShards = mergeData.origVsShards.get(suggestion.getToken());
if (origShards == null) {
mergeData.origVsShards.put(suggestion.getToken(), 1);
} else {
mergeData.origVsShards.put(suggestion.getToken(), ++origShards);
}
// find best suggestions
for (int i = 0; i < suggestion.getNumFound(); i++) {
String alternative = suggestion.getAlternatives().get(i);
suggested.add(alternative);
SuggestWord sug = mergeData.suggestedVsWord.get(alternative);
if (sug == null) {
sug = new SuggestWord();
mergeData.suggestedVsWord.put(alternative, sug);
}
sug.string = alternative;
// alternative frequency is present only for extendedResults=true
if (suggestion.getAlternativeFrequencies() != null && suggestion.getAlternativeFrequencies().size() > 0) {
Integer freq = suggestion.getAlternativeFrequencies().get(i);
if (freq != null)
sug.freq += freq;
}
}
}
}
use of org.apache.lucene.search.spell.SuggestWord in project lucene-solr by apache.
the class AbstractLuceneSpellChecker method init.
@Override
public String init(NamedList config, SolrCore core) {
super.init(config, core);
indexDir = (String) config.get(INDEX_DIR);
String accuracy = (String) config.get(ACCURACY);
//If indexDir is relative then create index inside core.getDataDir()
if (indexDir != null) {
if (!new File(indexDir).isAbsolute()) {
indexDir = core.getDataDir() + File.separator + indexDir;
}
}
sourceLocation = (String) config.get(LOCATION);
String compClass = (String) config.get(COMPARATOR_CLASS);
Comparator<SuggestWord> comp = null;
if (compClass != null) {
if (compClass.equalsIgnoreCase(SCORE_COMP)) {
comp = SuggestWordQueue.DEFAULT_COMPARATOR;
} else if (compClass.equalsIgnoreCase(FREQ_COMP)) {
comp = new SuggestWordFrequencyComparator();
} else {
//must be a FQCN
comp = (Comparator<SuggestWord>) core.getResourceLoader().newInstance(compClass, Comparator.class);
}
} else {
comp = SuggestWordQueue.DEFAULT_COMPARATOR;
}
String strDistanceName = (String) config.get(STRING_DISTANCE);
if (strDistanceName != null) {
sd = core.getResourceLoader().newInstance(strDistanceName, StringDistance.class);
//TODO: Figure out how to configure options. Where's Spring when you need it? Or at least BeanUtils...
} else {
sd = new LevensteinDistance();
}
try {
initIndex();
spellChecker = new SpellChecker(index, sd, comp);
} catch (IOException e) {
throw new RuntimeException(e);
}
if (accuracy != null) {
try {
this.accuracy = Float.parseFloat(accuracy);
spellChecker.setAccuracy(this.accuracy);
} catch (NumberFormatException e) {
throw new RuntimeException("Unparseable accuracy given for dictionary: " + name, e);
}
}
return name;
}
Aggregations