use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class AbstractLuceneSpellChecker method getSuggestions.
@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
SpellingResult result = new SpellingResult(options.tokens);
IndexReader reader = determineReader(options.reader);
Term term = field != null ? new Term(field, "") : null;
float theAccuracy = (options.accuracy == Float.MIN_VALUE) ? spellChecker.getAccuracy() : options.accuracy;
int count = Math.max(options.count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT);
for (Token token : options.tokens) {
String tokenText = new String(token.buffer(), 0, token.length());
term = new Term(field, tokenText);
int docFreq = 0;
if (reader != null) {
docFreq = reader.docFreq(term);
}
String[] suggestions = spellChecker.suggestSimilar(tokenText, ((options.alternativeTermCount == 0 || docFreq == 0) ? count : // workaround LUCENE-1295
options.alternativeTermCount), // workaround LUCENE-1295
field != null ? reader : null, field, options.suggestMode, theAccuracy);
if (suggestions.length == 1 && suggestions[0].equals(tokenText) && options.alternativeTermCount == 0) {
// These are spelled the same, continue on
continue;
}
// original as a viable suggestion.
if (options.alternativeTermCount > 0 && docFreq > 0) {
boolean foundOriginal = false;
String[] suggestionsWithOrig = new String[suggestions.length + 1];
for (int i = 0; i < suggestions.length; i++) {
if (suggestions[i].equals(tokenText)) {
foundOriginal = true;
break;
}
suggestionsWithOrig[i + 1] = suggestions[i];
}
if (!foundOriginal) {
suggestionsWithOrig[0] = tokenText;
suggestions = suggestionsWithOrig;
}
}
if (options.extendedResults == true && reader != null && field != null) {
result.addFrequency(token, docFreq);
int countLimit = Math.min(options.count, suggestions.length);
if (countLimit > 0) {
for (int i = 0; i < countLimit; i++) {
term = new Term(field, suggestions[i]);
result.add(token, suggestions[i], reader.docFreq(term));
}
} else {
List<String> suggList = Collections.emptyList();
result.add(token, suggList);
}
} else {
if (suggestions.length > 0) {
List<String> suggList = Arrays.asList(suggestions);
if (suggestions.length > options.count) {
suggList = suggList.subList(0, options.count);
}
result.add(token, suggList);
} else {
List<String> suggList = Collections.emptyList();
result.add(token, suggList);
}
}
}
return result;
}
use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class TestGraphTokenStreamFiniteStrings method token.
private static Token token(String term, int posInc, int posLength) {
final Token t = new Token(term, 0, term.length());
t.setPositionIncrement(posInc);
t.setPositionLength(posLength);
return t;
}
use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class AnalyzingSuggesterTest method testTooManyExpansions.
public void testTooManyExpansions() throws Exception {
final Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer) {
@Override
public TokenStream getTokenStream() {
Token a = new Token("a", 0, 1);
a.setPositionIncrement(1);
Token b = new Token("b", 0, 1);
b.setPositionIncrement(0);
return new CannedTokenStream(new Token[] { a, b });
}
@Override
protected void setReader(final Reader reader) {
}
};
}
};
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, 1, true);
suggester.build(new InputArrayIterator(new Input[] { new Input("a", 1) }));
assertEquals("[a/1]", suggester.lookup("a", false, 1).toString());
IOUtils.close(a, tempDir);
}
use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class AnalyzingSuggesterTest method testInputPathRequired.
public void testInputPathRequired() throws Exception {
// SynonymMap.Builder b = new SynonymMap.Builder(false);
// b.add(new CharsRef("ab"), new CharsRef("ba"), true);
// final SynonymMap map = b.build();
// The Analyzer below mimics the functionality of the SynonymAnalyzer
// using the above map, so that the suggest module does not need a dependency on the
// synonym module
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer) {
int tokenStreamCounter = 0;
final TokenStream[] tokenStreams = new TokenStream[] { new CannedTokenStream(new Token[] { token("ab", 1, 1), token("ba", 0, 1), token("xc", 1, 1) }), new CannedTokenStream(new Token[] { token("ba", 1, 1), token("xd", 1, 1) }), new CannedTokenStream(new Token[] { token("ab", 1, 1), token("ba", 0, 1), token("x", 1, 1) }) };
@Override
public TokenStream getTokenStream() {
TokenStream result = tokenStreams[tokenStreamCounter];
tokenStreamCounter++;
return result;
}
@Override
protected void setReader(final Reader reader) {
}
};
}
};
Input[] keys = new Input[] { new Input("ab xc", 50), new Input("ba xd", 50) };
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup("ab x", false, 1);
assertTrue(results.size() == 1);
IOUtils.close(analyzer, tempDir);
}
use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class AnalyzingSuggesterTest method testGraphDups.
public void testGraphDups() throws Exception {
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer) {
int tokenStreamCounter = 0;
final TokenStream[] tokenStreams = new TokenStream[] { new CannedTokenStream(new Token[] { token("wifi", 1, 1), token("hotspot", 0, 2), token("network", 1, 1), token("is", 1, 1), token("slow", 1, 1) }), new CannedTokenStream(new Token[] { token("wi", 1, 1), token("hotspot", 0, 3), token("fi", 1, 1), token("network", 1, 1), token("is", 1, 1), token("fast", 1, 1) }), new CannedTokenStream(new Token[] { token("wifi", 1, 1), token("hotspot", 0, 2), token("network", 1, 1) }) };
@Override
public TokenStream getTokenStream() {
TokenStream result = tokenStreams[tokenStreamCounter];
tokenStreamCounter++;
return result;
}
@Override
protected void setReader(final Reader reader) {
}
};
}
};
Input[] keys = new Input[] { new Input("wifi network is slow", 50), new Input("wi fi network is fast", 10) };
//AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer, AnalyzingSuggester.EXACT_FIRST, 256, -1);
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup("wifi network", false, 10);
if (VERBOSE) {
System.out.println("Results: " + results);
}
assertEquals(2, results.size());
assertEquals("wifi network is slow", results.get(0).key);
assertEquals(50, results.get(0).value);
assertEquals("wi fi network is fast", results.get(1).key);
assertEquals(10, results.get(1).value);
IOUtils.close(analyzer, tempDir);
}
Aggregations