use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.
the class FuzzySuggesterTest method testRandomEdits.
public void testRandomEdits() throws IOException {
List<Input> keys = new ArrayList<>();
int numTerms = atLeast(100);
for (int i = 0; i < numTerms; i++) {
keys.add(new Input("boo" + TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
}
keys.add(new Input("foo bar boo far", 12));
MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, 0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE);
suggester.build(new InputArrayIterator(keys));
int numIters = atLeast(10);
for (int i = 0; i < numIters; i++) {
String addRandomEdit = addRandomEdit("foo bar boo", FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX);
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence(addRandomEdit, random()), false, 2);
assertEquals(addRandomEdit, 1, results.size());
assertEquals("foo bar boo far", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
}
IOUtils.close(analyzer, tempDir);
}
use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.
the class FuzzySuggesterTest method slowFuzzyMatch.
private List<LookupResult> slowFuzzyMatch(int prefixLen, int maxEdits, boolean allowTransposition, List<Input> answers, String frag) {
final List<LookupResult> results = new ArrayList<>();
final int fragLen = frag.length();
for (Input tf : answers) {
//System.out.println(" check s=" + tf.term.utf8ToString());
boolean prefixMatches = true;
for (int i = 0; i < prefixLen; i++) {
if (i == fragLen) {
// Prefix still matches:
break;
}
if (i == tf.term.length || tf.term.bytes[i] != (byte) frag.charAt(i)) {
prefixMatches = false;
break;
}
}
if (prefixMatches) {
final int len = tf.term.length;
if (len >= fragLen - maxEdits) {
// OK it's possible:
//System.out.println(" possible");
int d;
final String s = tf.term.utf8ToString();
if (fragLen == prefixLen) {
d = 0;
} else if (false && len < fragLen) {
d = getDistance(frag, s, allowTransposition);
} else {
//System.out.println(" try loop");
d = maxEdits + 1;
//for(int ed=-maxEdits;ed<=maxEdits;ed++) {
for (int ed = -maxEdits; ed <= maxEdits; ed++) {
if (s.length() < fragLen - ed) {
continue;
}
String check = s.substring(0, fragLen - ed);
d = getDistance(frag, check, allowTransposition);
//System.out.println(" sub check s=" + check + " d=" + d);
if (d <= maxEdits) {
break;
}
}
}
if (d <= maxEdits) {
results.add(new LookupResult(tf.term.utf8ToString(), tf.v));
}
}
}
Collections.sort(results, new CompareByCostThenAlpha());
}
return results;
}
use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.
the class FuzzySuggesterTest method testNoSeps.
public void testNoSeps() throws Exception {
Input[] keys = new Input[] { new Input("ab cd", 0), new Input("abcd", 1) };
int options = 0;
Analyzer a = new MockAnalyzer(random());
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", a, a, options, 256, -1, true, 1, true, 1, 3, false);
suggester.build(new InputArrayIterator(keys));
// TODO: would be nice if "ab " would allow the test to
// pass, and more generally if the analyzer can know
// that the user's current query has ended at a word,
// but, analyzers don't produce SEP tokens!
List<LookupResult> r = suggester.lookup(TestUtil.stringToCharSequence("ab c", random()), false, 2);
assertEquals(2, r.size());
// With no PRESERVE_SEPS specified, "ab c" should also
// complete to "abcd", which has higher weight so should
// appear first:
assertEquals("abcd", r.get(0).key.toString());
IOUtils.close(a, tempDir);
}
use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.
the class FuzzySuggesterTest method testStandard.
/**
* basic "standardanalyzer" test with stopword removal
*/
public void testStandard() throws Exception {
Input[] keys = new Input[] { new Input("the ghost of christmas past", 50) };
Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", standard, standard, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, false, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE);
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
assertEquals(1, results.size());
assertEquals("the ghost of christmas past", results.get(0).key.toString());
assertEquals(50, results.get(0).value, 0.01F);
// omit the 'the' since it's a stopword, it's suggested anyway
results = suggester.lookup(TestUtil.stringToCharSequence("ghost of chris", random()), false, 1);
assertEquals(1, results.size());
assertEquals("the ghost of christmas past", results.get(0).key.toString());
assertEquals(50, results.get(0).value, 0.01F);
// omit the 'the' and 'of' since they are stopwords, it's suggested anyway
results = suggester.lookup(TestUtil.stringToCharSequence("ghost chris", random()), false, 1);
assertEquals(1, results.size());
assertEquals("the ghost of christmas past", results.get(0).key.toString());
assertEquals(50, results.get(0).value, 0.01F);
IOUtils.close(standard, tempDir);
}
use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.
the class FuzzySuggesterTest method testNonLatinRandomEdits.
public void testNonLatinRandomEdits() throws IOException {
List<Input> keys = new ArrayList<>();
int numTerms = atLeast(100);
for (int i = 0; i < numTerms; i++) {
keys.add(new Input("буу" + TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
}
keys.add(new Input("фуу бар буу фар", 12));
MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, 0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, true);
suggester.build(new InputArrayIterator(keys));
int numIters = atLeast(10);
for (int i = 0; i < numIters; i++) {
String addRandomEdit = addRandomEdit("фуу бар буу", 0);
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence(addRandomEdit, random()), false, 2);
assertEquals(addRandomEdit, 1, results.size());
assertEquals("фуу бар буу фар", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
}
IOUtils.close(analyzer, tempDir);
}
Aggregations