use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.
the class BlendedInfixSuggesterTest method testBlendingType.
/**
* Verify the different flavours of the blender types
*/
public void testBlendingType() throws IOException {
BytesRef pl = new BytesRef("lake");
long w = 20;
Input[] keys = new Input[] { new Input("top of the lake", w, pl) };
Path tempDir = createTempDir("BlendedInfixSuggesterTest");
Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
// BlenderType.LINEAR is used by default (remove position*10%)
BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a);
suggester.build(new InputArrayIterator(keys));
assertEquals(w, getInResults(suggester, "top", pl, 1));
assertEquals((int) (w * (1 - 0.10 * 2)), getInResults(suggester, "the", pl, 1));
assertEquals((int) (w * (1 - 0.10 * 3)), getInResults(suggester, "lake", pl, 1));
suggester.close();
// BlenderType.RECIPROCAL is using 1/(1+p) * w where w is weight and p the position of the word
suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a, AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1, false);
suggester.build(new InputArrayIterator(keys));
assertEquals(w, getInResults(suggester, "top", pl, 1));
assertEquals((int) (w * 1 / (1 + 2)), getInResults(suggester, "the", pl, 1));
assertEquals((int) (w * 1 / (1 + 3)), getInResults(suggester, "lake", pl, 1));
suggester.close();
// BlenderType.EXPONENTIAL_RECIPROCAL is using 1/(pow(1+p, exponent)) * w where w is weight and p the position of the word
suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a, AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_EXPONENTIAL_RECIPROCAL, 1, 4.0, false, true, false);
suggester.build(new InputArrayIterator(keys));
assertEquals(w, getInResults(suggester, "top", pl, 1));
assertEquals((int) (w * 1 / (Math.pow(1 + 2, 4.0))), getInResults(suggester, "the", pl, 1));
assertEquals((int) (w * 1 / (Math.pow(1 + 3, 4.0))), getInResults(suggester, "lake", pl, 1));
suggester.close();
}
use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.
the class BlendedInfixSuggesterTest method testRequiresMore.
/**
* Assert that the factor is important to get results that might be lower in term of weight but
* would be pushed up after the blending transformation
*/
public void testRequiresMore() throws IOException {
BytesRef lake = new BytesRef("lake");
BytesRef star = new BytesRef("star");
BytesRef ret = new BytesRef("ret");
Input[] keys = new Input[] { new Input("top of the lake", 18, lake), new Input("star wars: episode v - the empire strikes back", 12, star), new Input("the returned", 10, ret) };
Path tempDir = createTempDir("BlendedInfixSuggesterTest");
Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
// if factor is small, we don't get the expected element
BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a, AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1, false);
suggester.build(new InputArrayIterator(keys));
// we don't find it for in the 2 first
assertEquals(2, suggester.lookup("the", 2, true, false).size());
long w0 = getInResults(suggester, "the", ret, 2);
assertTrue(w0 < 0);
// but it's there if we search for 3 elements
assertEquals(3, suggester.lookup("the", 3, true, false).size());
long w1 = getInResults(suggester, "the", ret, 3);
assertTrue(w1 > 0);
suggester.close();
// if we increase the factor we have it
suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a, AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 2, false);
suggester.build(new InputArrayIterator(keys));
// we have it
long w2 = getInResults(suggester, "the", ret, 2);
assertTrue(w2 > 0);
// but we don't have the other
long w3 = getInResults(suggester, "the", star, 2);
assertTrue(w3 < 0);
suggester.close();
}
use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.
the class FuzzySuggesterTest method testNoSeps.
public void testNoSeps() throws Exception {
Input[] keys = new Input[] { new Input("ab cd", 0), new Input("abcd", 1) };
int options = 0;
Analyzer a = new MockAnalyzer(random());
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", a, a, options, 256, -1, true, 1, true, 1, 3, false);
suggester.build(new InputArrayIterator(keys));
// TODO: would be nice if "ab " would allow the test to
// pass, and more generally if the analyzer can know
// that the user's current query has ended at a word,
// but, analyzers don't produce SEP tokens!
List<LookupResult> r = suggester.lookup(TestUtil.stringToCharSequence("ab c", random()), false, 2);
assertEquals(2, r.size());
// With no PRESERVE_SEPS specified, "ab c" should also
// complete to "abcd", which has higher weight so should
// appear first:
assertEquals("abcd", r.get(0).key.toString());
IOUtils.close(a, tempDir);
}
use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.
the class FuzzySuggesterTest method testStandard.
/**
* basic "standardanalyzer" test with stopword removal
*/
public void testStandard() throws Exception {
Input[] keys = new Input[] { new Input("the ghost of christmas past", 50) };
Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", standard, standard, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, false, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE);
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
assertEquals(1, results.size());
assertEquals("the ghost of christmas past", results.get(0).key.toString());
assertEquals(50, results.get(0).value, 0.01F);
// omit the 'the' since it's a stopword, it's suggested anyway
results = suggester.lookup(TestUtil.stringToCharSequence("ghost of chris", random()), false, 1);
assertEquals(1, results.size());
assertEquals("the ghost of christmas past", results.get(0).key.toString());
assertEquals(50, results.get(0).value, 0.01F);
// omit the 'the' and 'of' since they are stopwords, it's suggested anyway
results = suggester.lookup(TestUtil.stringToCharSequence("ghost chris", random()), false, 1);
assertEquals(1, results.size());
assertEquals("the ghost of christmas past", results.get(0).key.toString());
assertEquals(50, results.get(0).value, 0.01F);
IOUtils.close(standard, tempDir);
}
use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.
the class FuzzySuggesterTest method testNonLatinRandomEdits.
public void testNonLatinRandomEdits() throws IOException {
List<Input> keys = new ArrayList<>();
int numTerms = atLeast(100);
for (int i = 0; i < numTerms; i++) {
keys.add(new Input("буу" + TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
}
keys.add(new Input("фуу бар буу фар", 12));
MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, 0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, true);
suggester.build(new InputArrayIterator(keys));
int numIters = atLeast(10);
for (int i = 0; i < numIters; i++) {
String addRandomEdit = addRandomEdit("фуу бар буу", 0);
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence(addRandomEdit, random()), false, 2);
assertEquals(addRandomEdit, 1, results.size());
assertEquals("фуу бар буу фар", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
}
IOUtils.close(analyzer, tempDir);
}
Aggregations