Search in sources :

Example 16 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class TestFreeTextSuggester method testEmptyString.

// Lookup of just empty string produces unicode only matches:
public void testEmptyString() throws Exception {
    Iterable<Input> keys = AnalyzingSuggesterTest.shuffle(new Input("foo bar bar bar bar", 50));
    Analyzer a = new MockAnalyzer(random());
    FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20);
    sug.build(new InputArrayIterator(keys));
    expectThrows(IllegalArgumentException.class, () -> {
        sug.lookup("", 10);
    });
    a.close();
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer)

Example 17 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class TestFreeTextSuggester method testNoDupsAcrossGrams.

// Make sure the last token is not duplicated
public void testNoDupsAcrossGrams() throws Exception {
    Iterable<Input> keys = AnalyzingSuggesterTest.shuffle(new Input("foo bar bar bar bar", 50));
    Analyzer a = new MockAnalyzer(random());
    FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20);
    sug.build(new InputArrayIterator(keys));
    assertEquals("foo bar/1.00", toString(sug.lookup("foo b", 10)));
    a.close();
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer)

Example 18 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class TestFreeTextSuggester method testEndingHole.

// With one ending hole, ShingleFilter produces "of _" and
// we should properly predict from that:
public void testEndingHole() throws Exception {
    // Just deletes "of"
    Analyzer a = new Analyzer() {

        @Override
        public TokenStreamComponents createComponents(String field) {
            Tokenizer tokenizer = new MockTokenizer();
            CharArraySet stopSet = StopFilter.makeStopSet("of");
            return new TokenStreamComponents(tokenizer, new StopFilter(tokenizer, stopSet));
        }
    };
    Iterable<Input> keys = AnalyzingSuggesterTest.shuffle(new Input("wizard of oz", 50));
    FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20);
    sug.build(new InputArrayIterator(keys));
    assertEquals("wizard _ oz/1.00", toString(sug.lookup("wizard of", 10)));
    // Falls back to unigram model, with backoff 0.4 times
    // prop 0.5:
    assertEquals("oz/0.20", toString(sug.lookup("wizard o", 10)));
    a.close();
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) CharArraySet(org.apache.lucene.analysis.CharArraySet) Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) StopFilter(org.apache.lucene.analysis.StopFilter) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer)

Example 19 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class TestFreeTextSuggester method testTwoEndingHoles.

// If the number of ending holes exceeds the ngrams window
// then there are no predictions, because ShingleFilter
// does not produce e.g. a hole only "_ _" token:
public void testTwoEndingHoles() throws Exception {
    // Just deletes "of"
    Analyzer a = new Analyzer() {

        @Override
        public TokenStreamComponents createComponents(String field) {
            Tokenizer tokenizer = new MockTokenizer();
            CharArraySet stopSet = StopFilter.makeStopSet("of");
            return new TokenStreamComponents(tokenizer, new StopFilter(tokenizer, stopSet));
        }
    };
    Iterable<Input> keys = AnalyzingSuggesterTest.shuffle(new Input("wizard of of oz", 50));
    FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20);
    sug.build(new InputArrayIterator(keys));
    assertEquals("", toString(sug.lookup("wizard of of", 10)));
    a.close();
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) CharArraySet(org.apache.lucene.analysis.CharArraySet) Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) StopFilter(org.apache.lucene.analysis.StopFilter) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer)

Example 20 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class TestFreeTextSuggester method testIllegalByteDuringQuery.

public void testIllegalByteDuringQuery() throws Exception {
    // Default separator is INFORMATION SEPARATOR TWO
    // (0x1e), so no input token is allowed to contain it
    Iterable<Input> keys = AnalyzingSuggesterTest.shuffle(new Input("foo bar baz", 50));
    Analyzer analyzer = new MockAnalyzer(random());
    FreeTextSuggester sug = new FreeTextSuggester(analyzer);
    sug.build(new InputArrayIterator(keys));
    expectThrows(IllegalArgumentException.class, () -> {
        sug.lookup("foob", 10);
    });
    analyzer.close();
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer)

Aggregations

InputArrayIterator (org.apache.lucene.search.suggest.InputArrayIterator)76 Input (org.apache.lucene.search.suggest.Input)71 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)67 Analyzer (org.apache.lucene.analysis.Analyzer)65 LookupResult (org.apache.lucene.search.suggest.Lookup.LookupResult)48 Directory (org.apache.lucene.store.Directory)43 BytesRef (org.apache.lucene.util.BytesRef)26 Path (java.nio.file.Path)17 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)11 Tokenizer (org.apache.lucene.analysis.Tokenizer)10 Reader (java.io.Reader)8 ArrayList (java.util.ArrayList)8 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)8 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)7 TokenStream (org.apache.lucene.analysis.TokenStream)6 HashSet (java.util.HashSet)5 CharArraySet (org.apache.lucene.analysis.CharArraySet)5 Token (org.apache.lucene.analysis.Token)5 InputStream (java.io.InputStream)4 OutputStream (java.io.OutputStream)4