use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.
the class WFSTCompletionTest method testRandom.
public void testRandom() throws Exception {
int numWords = atLeast(1000);
final TreeMap<String, Long> slowCompletor = new TreeMap<>();
final TreeSet<String> allPrefixes = new TreeSet<>();
Input[] keys = new Input[numWords];
for (int i = 0; i < numWords; i++) {
String s;
while (true) {
// TODO: would be nice to fix this slowCompletor/comparator to
// use full range, but we might lose some coverage too...
s = TestUtil.randomSimpleString(random());
if (!slowCompletor.containsKey(s)) {
break;
}
}
for (int j = 1; j < s.length(); j++) {
allPrefixes.add(s.substring(0, j));
}
// we can probably do Integer.MAX_VALUE here, but why worry.
int weight = random().nextInt(1 << 24);
slowCompletor.put(s, (long) weight);
keys[i] = new Input(s, weight);
}
Directory tempDir = getDirectory();
WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false);
suggester.build(new InputArrayIterator(keys));
assertEquals(numWords, suggester.getCount());
Random random = new Random(random().nextLong());
for (String prefix : allPrefixes) {
final int topN = TestUtil.nextInt(random, 1, 10);
List<LookupResult> r = suggester.lookup(TestUtil.stringToCharSequence(prefix, random), false, topN);
// 2. go thru whole treemap (slowCompletor) and check it's actually the best suggestion
final List<LookupResult> matches = new ArrayList<>();
// TODO: could be faster... but it's slowCompletor for a reason
for (Map.Entry<String, Long> e : slowCompletor.entrySet()) {
if (e.getKey().startsWith(prefix)) {
matches.add(new LookupResult(e.getKey(), e.getValue().longValue()));
}
}
assertTrue(matches.size() > 0);
Collections.sort(matches, new Comparator<LookupResult>() {
@Override
public int compare(LookupResult left, LookupResult right) {
int cmp = Float.compare(right.value, left.value);
if (cmp == 0) {
return left.compareTo(right);
} else {
return cmp;
}
}
});
if (matches.size() > topN) {
matches.subList(topN, matches.size()).clear();
}
assertEquals(matches.size(), r.size());
for (int hit = 0; hit < r.size(); hit++) {
//System.out.println(" check hit " + hit);
assertEquals(matches.get(hit).key.toString(), r.get(hit).key.toString());
assertEquals(matches.get(hit).value, r.get(hit).value, 0f);
}
}
tempDir.close();
}
use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.
the class FSTCompletionTest method testRandom.
public void testRandom() throws Exception {
List<Input> freqs = new ArrayList<>();
Random rnd = random();
for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) {
int weight = rnd.nextInt(100);
freqs.add(new Input("" + rnd.nextLong(), weight));
}
Directory tempDir = getDirectory();
FSTCompletionLookup lookup = new FSTCompletionLookup(tempDir, "fst");
lookup.build(new InputArrayIterator(freqs.toArray(new Input[freqs.size()])));
for (Input tf : freqs) {
final String term = tf.term.utf8ToString();
for (int i = 1; i < term.length(); i++) {
String prefix = term.substring(0, i);
for (LookupResult lr : lookup.lookup(stringToCharSequence(prefix), true, 10)) {
assertTrue(lr.key.toString().startsWith(prefix));
}
}
}
tempDir.close();
}
use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.
the class WFSTCompletionTest method testEmpty.
public void testEmpty() throws Exception {
Directory tempDir = getDirectory();
WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false);
suggester.build(new InputArrayIterator(new Input[0]));
assertEquals(0, suggester.getCount());
List<LookupResult> result = suggester.lookup("a", false, 20);
assertTrue(result.isEmpty());
tempDir.close();
}
use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.
the class AnalyzingSuggesterTest method testDupSurfaceFormsMissingResults2.
public void testDupSurfaceFormsMissingResults2() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer) {
int count;
@Override
public TokenStream getTokenStream() {
if (count == 0) {
count++;
return new CannedTokenStream(new Token[] { token("p", 1, 1), token("q", 1, 1), token("r", 0, 1), token("s", 0, 1) });
} else {
return new CannedTokenStream(new Token[] { token("p", 1, 1) });
}
}
@Override
protected void setReader(final Reader reader) {
}
};
}
};
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, -1, true);
suggester.build(new InputArrayIterator(new Input[] { new Input("a", 6), new Input("b", 5) }));
List<LookupResult> results = suggester.lookup("a", false, 2);
assertEquals(2, results.size());
assertEquals("a", results.get(0).key);
assertEquals(6, results.get(0).value);
assertEquals("b", results.get(1).key);
assertEquals(5, results.get(1).value);
// Try again after save/load:
Path tmpDir = createTempDir("AnalyzingSuggesterTest");
Path path = tmpDir.resolve("suggester");
OutputStream os = Files.newOutputStream(path);
suggester.store(os);
os.close();
InputStream is = Files.newInputStream(path);
suggester.load(is);
is.close();
results = suggester.lookup("a", false, 2);
assertEquals(2, results.size());
assertEquals("a", results.get(0).key);
assertEquals(6, results.get(0).value);
assertEquals("b", results.get(1).key);
assertEquals(5, results.get(1).value);
IOUtils.close(a, tempDir);
}
use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.
the class AnalyzingSuggesterTest method testKeyword.
/** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */
public void testKeyword() throws Exception {
Iterable<Input> keys = shuffle(new Input("foo", 50), new Input("bar", 10), new Input("barbar", 10), new Input("barbar", 12), new Input("barbara", 6), new Input("bar", 5), new Input("barbara", 1));
Directory tempDir = getDirectory();
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
suggester.build(new InputArrayIterator(keys));
// top N of 2, but only foo is available
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("f", random()), false, 2);
assertEquals(1, results.size());
assertEquals("foo", results.get(0).key.toString());
assertEquals(50, results.get(0).value, 0.01F);
// top N of 1 for 'bar': we return this even though
// barbar is higher because exactFirst is enabled:
results = suggester.lookup(TestUtil.stringToCharSequence("bar", random()), false, 1);
assertEquals(1, results.size());
assertEquals("bar", results.get(0).key.toString());
assertEquals(10, results.get(0).value, 0.01F);
// top N Of 2 for 'b'
results = suggester.lookup(TestUtil.stringToCharSequence("b", random()), false, 2);
assertEquals(2, results.size());
assertEquals("barbar", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
assertEquals("bar", results.get(1).key.toString());
assertEquals(10, results.get(1).value, 0.01F);
// top N of 3 for 'ba'
results = suggester.lookup(TestUtil.stringToCharSequence("ba", random()), false, 3);
assertEquals(3, results.size());
assertEquals("barbar", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
assertEquals("bar", results.get(1).key.toString());
assertEquals(10, results.get(1).value, 0.01F);
assertEquals("barbara", results.get(2).key.toString());
assertEquals(6, results.get(2).value, 0.01F);
IOUtils.close(analyzer, tempDir);
}
Aggregations