use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.
the class TestFreeTextSuggester method testEmptyString.
// Lookup of just empty string produces unicode only matches:
public void testEmptyString() throws Exception {
Iterable<Input> keys = AnalyzingSuggesterTest.shuffle(new Input("foo bar bar bar bar", 50));
Analyzer a = new MockAnalyzer(random());
FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20);
sug.build(new InputArrayIterator(keys));
expectThrows(IllegalArgumentException.class, () -> {
sug.lookup("", 10);
});
a.close();
}
use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.
the class TestFreeTextSuggester method testNoDupsAcrossGrams.
// Make sure the last token is not duplicated
public void testNoDupsAcrossGrams() throws Exception {
Iterable<Input> keys = AnalyzingSuggesterTest.shuffle(new Input("foo bar bar bar bar", 50));
Analyzer a = new MockAnalyzer(random());
FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20);
sug.build(new InputArrayIterator(keys));
assertEquals("foo bar/1.00", toString(sug.lookup("foo b", 10)));
a.close();
}
use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.
the class TestFreeTextSuggester method testEndingHole.
// With one ending hole, ShingleFilter produces "of _" and
// we should properly predict from that:
public void testEndingHole() throws Exception {
// Just deletes "of"
Analyzer a = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String field) {
Tokenizer tokenizer = new MockTokenizer();
CharArraySet stopSet = StopFilter.makeStopSet("of");
return new TokenStreamComponents(tokenizer, new StopFilter(tokenizer, stopSet));
}
};
Iterable<Input> keys = AnalyzingSuggesterTest.shuffle(new Input("wizard of oz", 50));
FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20);
sug.build(new InputArrayIterator(keys));
assertEquals("wizard _ oz/1.00", toString(sug.lookup("wizard of", 10)));
// Falls back to unigram model, with backoff 0.4 times
// prop 0.5:
assertEquals("oz/0.20", toString(sug.lookup("wizard o", 10)));
a.close();
}
use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.
the class TestFreeTextSuggester method testTwoEndingHoles.
// If the number of ending holes exceeds the ngrams window
// then there are no predictions, because ShingleFilter
// does not produce e.g. a hole only "_ _" token:
public void testTwoEndingHoles() throws Exception {
// Just deletes "of"
Analyzer a = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String field) {
Tokenizer tokenizer = new MockTokenizer();
CharArraySet stopSet = StopFilter.makeStopSet("of");
return new TokenStreamComponents(tokenizer, new StopFilter(tokenizer, stopSet));
}
};
Iterable<Input> keys = AnalyzingSuggesterTest.shuffle(new Input("wizard of of oz", 50));
FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20);
sug.build(new InputArrayIterator(keys));
assertEquals("", toString(sug.lookup("wizard of of", 10)));
a.close();
}
use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.
the class TestFreeTextSuggester method testIllegalByteDuringQuery.
public void testIllegalByteDuringQuery() throws Exception {
// Default separator is INFORMATION SEPARATOR TWO
// (0x1e), so no input token is allowed to contain it
Iterable<Input> keys = AnalyzingSuggesterTest.shuffle(new Input("foo bar baz", 50));
Analyzer analyzer = new MockAnalyzer(random());
FreeTextSuggester sug = new FreeTextSuggester(analyzer);
sug.build(new InputArrayIterator(keys));
expectThrows(IllegalArgumentException.class, () -> {
sug.lookup("foob", 10);
});
analyzer.close();
}
Aggregations