use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.
the class WFSTCompletionTest method testBasic.
public void testBasic() throws Exception {
Input[] keys = new Input[] { new Input("foo", 50), new Input("bar", 10), new Input("barbar", 12), new Input("barbara", 6) };
Random random = new Random(random().nextLong());
Directory tempDir = getDirectory();
WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst");
suggester.build(new InputArrayIterator(keys));
// top N of 2, but only foo is available
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("f", random), false, 2);
assertEquals(1, results.size());
assertEquals("foo", results.get(0).key.toString());
assertEquals(50, results.get(0).value, 0.01F);
// make sure we don't get a dup exact suggestion:
results = suggester.lookup(TestUtil.stringToCharSequence("foo", random), false, 2);
assertEquals(1, results.size());
assertEquals("foo", results.get(0).key.toString());
assertEquals(50, results.get(0).value, 0.01F);
// top N of 1 for 'bar': we return this even though barbar is higher
results = suggester.lookup(TestUtil.stringToCharSequence("bar", random), false, 1);
assertEquals(1, results.size());
assertEquals("bar", results.get(0).key.toString());
assertEquals(10, results.get(0).value, 0.01F);
// top N Of 2 for 'b'
results = suggester.lookup(TestUtil.stringToCharSequence("b", random), false, 2);
assertEquals(2, results.size());
assertEquals("barbar", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
assertEquals("bar", results.get(1).key.toString());
assertEquals(10, results.get(1).value, 0.01F);
// top N of 3 for 'ba'
results = suggester.lookup(TestUtil.stringToCharSequence("ba", random), false, 3);
assertEquals(3, results.size());
assertEquals("barbar", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
assertEquals("bar", results.get(1).key.toString());
assertEquals(10, results.get(1).value, 0.01F);
assertEquals("barbara", results.get(2).key.toString());
assertEquals(6, results.get(2).value, 0.01F);
tempDir.close();
}
use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.
the class AnalyzingInfixSuggesterTest method testBasicContext.
// LUCENE-5528 and LUCENE-6464
public void testBasicContext() throws Exception {
Input[] keys = new Input[] { new Input("lend me your ear", 8, new BytesRef("foobar"), asSet("foo", "bar")), new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz"), asSet("foo", "baz")) };
Path tempDir = createTempDir("analyzingInfixContext");
for (int iter = 0; iter < 2; iter++) {
AnalyzingInfixSuggester suggester;
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
if (iter == 0) {
suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
suggester.build(new InputArrayIterator(keys));
} else {
// Test again, after close/reopen:
suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
}
// No context provided, all results returned
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
assertEquals(2, results.size());
LookupResult result = results.get(0);
assertEquals("a penny saved is a penny earned", result.key);
assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
assertEquals(10, result.value);
assertEquals(new BytesRef("foobaz"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("baz")));
result = results.get(1);
assertEquals("lend me your ear", result.key);
assertEquals("lend me your <b>ear</b>", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("bar")));
// Both have "foo" context:
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("foo"), 10, true, true);
assertEquals(2, results.size());
result = results.get(0);
assertEquals("a penny saved is a penny earned", result.key);
assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
assertEquals(10, result.value);
assertEquals(new BytesRef("foobaz"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("baz")));
result = results.get(1);
assertEquals("lend me your ear", result.key);
assertEquals("lend me your <b>ear</b>", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("bar")));
// Only one has "bar" context:
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("bar"), 10, true, true);
assertEquals(1, results.size());
result = results.get(0);
assertEquals("lend me your ear", result.key);
assertEquals("lend me your <b>ear</b>", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("bar")));
// None do not have "foo" context:
Map<BytesRef, BooleanClause.Occur> contextInfo = new HashMap<>();
contextInfo.put(new BytesRef("foo"), BooleanClause.Occur.MUST_NOT);
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
assertEquals(0, results.size());
// Only one does not have "bar" context:
contextInfo.clear();
contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST_NOT);
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
assertEquals(1, results.size());
result = results.get(0);
assertEquals("a penny saved is a penny earned", result.key);
assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
assertEquals(10, result.value);
assertEquals(new BytesRef("foobaz"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("baz")));
// Both have "foo" or "bar" context:
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("foo", "bar"), 10, true, true);
assertEquals(2, results.size());
result = results.get(0);
assertEquals("a penny saved is a penny earned", result.key);
assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
assertEquals(10, result.value);
assertEquals(new BytesRef("foobaz"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("baz")));
result = results.get(1);
assertEquals("lend me your ear", result.key);
assertEquals("lend me your <b>ear</b>", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("bar")));
// Both have "bar" or "baz" context:
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("bar", "baz"), 10, true, true);
assertEquals(2, results.size());
result = results.get(0);
assertEquals("a penny saved is a penny earned", result.key);
assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
assertEquals(10, result.value);
assertEquals(new BytesRef("foobaz"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("baz")));
result = results.get(1);
assertEquals("lend me your ear", result.key);
assertEquals("lend me your <b>ear</b>", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("bar")));
// Only one has "foo" and "bar" context:
contextInfo.clear();
contextInfo.put(new BytesRef("foo"), BooleanClause.Occur.MUST);
contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST);
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
assertEquals(1, results.size());
result = results.get(0);
assertEquals("lend me your ear", result.key);
assertEquals("lend me your <b>ear</b>", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("bar")));
// None have "bar" and "baz" context:
contextInfo.clear();
contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST);
contextInfo.put(new BytesRef("baz"), BooleanClause.Occur.MUST);
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
assertEquals(0, results.size());
// None do not have "foo" and do not have "bar" context:
contextInfo.clear();
contextInfo.put(new BytesRef("foo"), BooleanClause.Occur.MUST_NOT);
contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST_NOT);
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
assertEquals(0, results.size());
// Both do not have "bar" and do not have "baz" context:
contextInfo.clear();
contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST_NOT);
contextInfo.put(new BytesRef("baz"), BooleanClause.Occur.MUST_NOT);
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("bar", "baz"), 10, true, true);
assertEquals(2, results.size());
result = results.get(0);
assertEquals("a penny saved is a penny earned", result.key);
assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
assertEquals(10, result.value);
assertEquals(new BytesRef("foobaz"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("baz")));
result = results.get(1);
assertEquals("lend me your ear", result.key);
assertEquals("lend me your <b>ear</b>", result.highlightKey);
assertEquals(8, result.value);
assertEquals(new BytesRef("foobar"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("bar")));
// Only one has "foo" and does not have "bar" context:
contextInfo.clear();
contextInfo.put(new BytesRef("foo"), BooleanClause.Occur.MUST);
contextInfo.put(new BytesRef("bar"), BooleanClause.Occur.MUST_NOT);
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), contextInfo, 10, true, true);
assertEquals(1, results.size());
result = results.get(0);
assertEquals("a penny saved is a penny earned", result.key);
assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
assertEquals(10, result.value);
assertEquals(new BytesRef("foobaz"), result.payload);
assertNotNull(result.contexts);
assertEquals(2, result.contexts.size());
assertTrue(result.contexts.contains(new BytesRef("foo")));
assertTrue(result.contexts.contains(new BytesRef("baz")));
//LUCENE-6464 Using the advanced context filtering by query.
//Note that this is just a sanity test as all the above tests run through the filter by query method
BooleanQuery.Builder query = new BooleanQuery.Builder();
suggester.addContextToQuery(query, new BytesRef("foo"), BooleanClause.Occur.MUST);
suggester.addContextToQuery(query, new BytesRef("bar"), BooleanClause.Occur.MUST_NOT);
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), query.build(), 10, true, true);
assertEquals(1, results.size());
suggester.close();
a.close();
}
}
use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.
the class AnalyzingInfixSuggesterTest method testBasicNRT.
public void testBasicNRT() throws Exception {
Input[] keys = new Input[] { new Input("lend me your ear", 8, new BytesRef("foobar")) };
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false);
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("lend me your ear", results.get(0).key);
assertEquals("lend me your <b>ear</b>", results.get(0).highlightKey);
assertEquals(8, results.get(0).value);
assertEquals(new BytesRef("foobar"), results.get(0).payload);
// Add a new suggestion:
suggester.add(new BytesRef("a penny saved is a penny earned"), null, 10, new BytesRef("foobaz"));
// Must refresh to see any newly added suggestions:
suggester.refresh();
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
assertEquals(2, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).highlightKey);
assertEquals(10, results.get(0).value);
assertEquals(new BytesRef("foobaz"), results.get(0).payload);
assertEquals("lend me your ear", results.get(1).key);
assertEquals("lend me your <b>ear</b>", results.get(1).highlightKey);
assertEquals(8, results.get(1).value);
assertEquals(new BytesRef("foobar"), results.get(1).payload);
results = suggester.lookup(TestUtil.stringToCharSequence("ear ", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("lend me your ear", results.get(0).key);
assertEquals("lend me your <b>ear</b>", results.get(0).highlightKey);
assertEquals(8, results.get(0).value);
assertEquals(new BytesRef("foobar"), results.get(0).payload);
results = suggester.lookup(TestUtil.stringToCharSequence("pen", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
assertEquals("a <b>pen</b>ny saved is a <b>pen</b>ny earned", results.get(0).highlightKey);
assertEquals(10, results.get(0).value);
assertEquals(new BytesRef("foobaz"), results.get(0).payload);
results = suggester.lookup(TestUtil.stringToCharSequence("p", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
assertEquals("a <b>p</b>enny saved is a <b>p</b>enny earned", results.get(0).highlightKey);
assertEquals(10, results.get(0).value);
assertEquals(new BytesRef("foobaz"), results.get(0).payload);
// Change the weight:
suggester.update(new BytesRef("lend me your ear"), null, 12, new BytesRef("foobox"));
// Must refresh to see any newly added suggestions:
suggester.refresh();
results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
assertEquals(2, results.size());
assertEquals("lend me your ear", results.get(0).key);
assertEquals("lend me your <b>ear</b>", results.get(0).highlightKey);
assertEquals(12, results.get(0).value);
assertEquals(new BytesRef("foobox"), results.get(0).payload);
assertEquals("a penny saved is a penny earned", results.get(1).key);
assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(1).highlightKey);
assertEquals(10, results.get(1).value);
assertEquals(new BytesRef("foobaz"), results.get(1).payload);
suggester.close();
a.close();
}
use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.
the class AnalyzingInfixSuggesterTest method testHighlightAsObject.
@SuppressWarnings("unchecked")
public void testHighlightAsObject() throws Exception {
Input[] keys = new Input[] { new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")) };
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false) {
@Override
protected Object highlight(String text, Set<String> matchedTokens, String prefixToken) throws IOException {
try (TokenStream ts = queryAnalyzer.tokenStream("text", new StringReader(text))) {
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
ts.reset();
List<LookupHighlightFragment> fragments = new ArrayList<>();
int upto = 0;
while (ts.incrementToken()) {
String token = termAtt.toString();
int startOffset = offsetAtt.startOffset();
int endOffset = offsetAtt.endOffset();
if (upto < startOffset) {
fragments.add(new LookupHighlightFragment(text.substring(upto, startOffset), false));
upto = startOffset;
} else if (upto > startOffset) {
continue;
}
if (matchedTokens.contains(token)) {
// Token matches.
fragments.add(new LookupHighlightFragment(text.substring(startOffset, endOffset), true));
upto = endOffset;
} else if (prefixToken != null && token.startsWith(prefixToken)) {
fragments.add(new LookupHighlightFragment(text.substring(startOffset, startOffset + prefixToken.length()), true));
if (prefixToken.length() < token.length()) {
fragments.add(new LookupHighlightFragment(text.substring(startOffset + prefixToken.length(), startOffset + token.length()), false));
}
upto = endOffset;
}
}
ts.end();
int endOffset = offsetAtt.endOffset();
if (upto < endOffset) {
fragments.add(new LookupHighlightFragment(text.substring(upto), false));
}
return fragments;
}
}
};
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("a penny saved is a penny <b>ear</b>ned", toString((List<LookupHighlightFragment>) results.get(0).highlightKey));
assertEquals(10, results.get(0).value);
assertEquals(new BytesRef("foobaz"), results.get(0).payload);
suggester.close();
a.close();
}
use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.
the class AnalyzingInfixSuggesterTest method testAfterLoad.
public void testAfterLoad() throws Exception {
Input[] keys = new Input[] { new Input("lend me your ear", 8, new BytesRef("foobar")), new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")) };
Path tempDir = createTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
suggester.build(new InputArrayIterator(keys));
assertEquals(2, suggester.getCount());
suggester.close();
suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
assertEquals(2, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).highlightKey);
assertEquals(10, results.get(0).value);
assertEquals(new BytesRef("foobaz"), results.get(0).payload);
assertEquals(2, suggester.getCount());
suggester.close();
a.close();
}
Aggregations