use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class DirectSolrSpellChecker method getSuggestions.
@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
LOG.debug("getSuggestions: " + options.tokens);
SpellingResult result = new SpellingResult();
float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy;
for (Token token : options.tokens) {
String tokenText = token.toString();
Term term = new Term(field, tokenText);
int freq = options.reader.docFreq(term);
int count = (options.alternativeTermCount > 0 && freq > 0) ? options.alternativeTermCount : options.count;
SuggestWord[] suggestions = checker.suggestSimilar(term, count, options.reader, options.suggestMode, accuracy);
result.addFrequency(token, freq);
// original as a viable suggestion.
if (options.alternativeTermCount > 0 && freq > 0) {
boolean foundOriginal = false;
SuggestWord[] suggestionsWithOrig = new SuggestWord[suggestions.length + 1];
for (int i = 0; i < suggestions.length; i++) {
if (suggestions[i].string.equals(tokenText)) {
foundOriginal = true;
break;
}
suggestionsWithOrig[i + 1] = suggestions[i];
}
if (!foundOriginal) {
SuggestWord orig = new SuggestWord();
orig.freq = freq;
orig.string = tokenText;
suggestionsWithOrig[0] = orig;
suggestions = suggestionsWithOrig;
}
}
if (suggestions.length == 0 && freq == 0) {
List<String> empty = Collections.emptyList();
result.add(token, empty);
} else {
for (SuggestWord suggestion : suggestions) {
result.add(token, suggestion.string, suggestion.freq);
}
}
}
return result;
}
use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class SolrSpellChecker method mergeSuggestions.
/**
* Integrate spelling suggestions from the various shards in a distributed environment.
*/
public SpellingResult mergeSuggestions(SpellCheckMergeData mergeData, int numSug, int count, boolean extendedResults) {
float min = 0.5f;
try {
min = getAccuracy();
} catch (UnsupportedOperationException uoe) {
//just use .5 as a default
}
StringDistance sd = null;
try {
sd = getStringDistance() == null ? new LevensteinDistance() : getStringDistance();
} catch (UnsupportedOperationException uoe) {
sd = new LevensteinDistance();
}
SpellingResult result = new SpellingResult();
for (Map.Entry<String, HashSet<String>> entry : mergeData.origVsSuggested.entrySet()) {
String original = entry.getKey();
//Only use this suggestion if all shards reported it as misspelled,
//unless it was not a term original to the user's query
//(WordBreakSolrSpellChecker can add new terms to the response, and we want to keep these)
Integer numShards = mergeData.origVsShards.get(original);
if (numShards < mergeData.totalNumberShardResponses && mergeData.isOriginalToQuery(original)) {
continue;
}
HashSet<String> suggested = entry.getValue();
SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
for (String suggestion : suggested) {
SuggestWord sug = mergeData.suggestedVsWord.get(suggestion);
sug.score = sd.getDistance(original, sug.string);
if (sug.score < min)
continue;
sugQueue.insertWithOverflow(sug);
if (sugQueue.size() == numSug) {
// if queue full, maintain the minScore score
min = sugQueue.top().score;
}
}
// create token
SpellCheckResponse.Suggestion suggestion = mergeData.origVsSuggestion.get(original);
Token token = new Token(original, suggestion.getStartOffset(), suggestion.getEndOffset());
// get top 'count' suggestions out of 'sugQueue.size()' candidates
SuggestWord[] suggestions = new SuggestWord[Math.min(count, sugQueue.size())];
// skip the first sugQueue.size() - count elements
for (int k = 0; k < sugQueue.size() - count; k++) sugQueue.pop();
// now collect the top 'count' responses
for (int k = Math.min(count, sugQueue.size()) - 1; k >= 0; k--) {
suggestions[k] = sugQueue.pop();
}
if (extendedResults) {
Integer o = mergeData.origVsFreq.get(original);
if (o != null)
result.addFrequency(token, o);
for (SuggestWord word : suggestions) result.add(token, word.string, word.freq);
} else {
List<String> words = new ArrayList<>(sugQueue.size());
for (SuggestWord word : suggestions) words.add(word.string);
result.add(token, words);
}
}
return result;
}
use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class AbstractLuceneSpellChecker method getSuggestions.
@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
SpellingResult result = new SpellingResult(options.tokens);
IndexReader reader = determineReader(options.reader);
Term term = field != null ? new Term(field, "") : null;
float theAccuracy = (options.accuracy == Float.MIN_VALUE) ? spellChecker.getAccuracy() : options.accuracy;
int count = Math.max(options.count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT);
for (Token token : options.tokens) {
String tokenText = new String(token.buffer(), 0, token.length());
term = new Term(field, tokenText);
int docFreq = 0;
if (reader != null) {
docFreq = reader.docFreq(term);
}
String[] suggestions = spellChecker.suggestSimilar(tokenText, ((options.alternativeTermCount == 0 || docFreq == 0) ? count : // workaround LUCENE-1295
options.alternativeTermCount), // workaround LUCENE-1295
field != null ? reader : null, field, options.suggestMode, theAccuracy);
if (suggestions.length == 1 && suggestions[0].equals(tokenText) && options.alternativeTermCount == 0) {
// These are spelled the same, continue on
continue;
}
// original as a viable suggestion.
if (options.alternativeTermCount > 0 && docFreq > 0) {
boolean foundOriginal = false;
String[] suggestionsWithOrig = new String[suggestions.length + 1];
for (int i = 0; i < suggestions.length; i++) {
if (suggestions[i].equals(tokenText)) {
foundOriginal = true;
break;
}
suggestionsWithOrig[i + 1] = suggestions[i];
}
if (!foundOriginal) {
suggestionsWithOrig[0] = tokenText;
suggestions = suggestionsWithOrig;
}
}
if (options.extendedResults == true && reader != null && field != null) {
result.addFrequency(token, docFreq);
int countLimit = Math.min(options.count, suggestions.length);
if (countLimit > 0) {
for (int i = 0; i < countLimit; i++) {
term = new Term(field, suggestions[i]);
result.add(token, suggestions[i], reader.docFreq(term));
}
} else {
List<String> suggList = Collections.emptyList();
result.add(token, suggList);
}
} else {
if (suggestions.length > 0) {
List<String> suggList = Arrays.asList(suggestions);
if (suggestions.length > options.count) {
suggList = suggList.subList(0, options.count);
}
result.add(token, suggList);
} else {
List<String> suggList = Collections.emptyList();
result.add(token, suggList);
}
}
}
return result;
}
use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class DummyCustomParamSpellChecker method getSuggestions.
@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
SpellingResult result = new SpellingResult();
//just spit back out the results
// sort the keys to make ordering predictable
Iterator<String> iterator = options.customParams.getParameterNamesIterator();
List<String> lst = new ArrayList<>();
while (iterator.hasNext()) {
lst.add(iterator.next());
}
Collections.sort(lst);
int i = 0;
for (String name : lst) {
String value = options.customParams.get(name);
result.add(new Token(name, i, i + 1), Collections.singletonList(value));
i += 2;
}
return result;
}
use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class TestTermAutomatonQuery method token.
private static Token token(String term, int posInc, int posLength) {
final Token t = new Token(term, 0, term.length());
t.setPositionIncrement(posInc);
t.setPositionLength(posLength);
return t;
}
Aggregations