Search in sources :

Example 46 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class TestFreeTextSuggester method testIllegalByteDuringBuild.

public void testIllegalByteDuringBuild() throws Exception {
    // Default separator is INFORMATION SEPARATOR TWO
    // (0x1e), so no input token is allowed to contain it
    Iterable<Input> keys = AnalyzingSuggesterTest.shuffle(new Input("foobar baz", 50));
    Analyzer analyzer = new MockAnalyzer(random());
    FreeTextSuggester sug = new FreeTextSuggester(analyzer);
    expectThrows(IllegalArgumentException.class, () -> {
        sug.build(new InputArrayIterator(keys));
    });
    analyzer.close();
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer)

Example 47 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class TestFreeTextSuggester method testBasic.

public void testBasic() throws Exception {
    Iterable<Input> keys = AnalyzingSuggesterTest.shuffle(new Input("foo bar baz blah", 50), new Input("boo foo bar foo bee", 20));
    Analyzer a = new MockAnalyzer(random());
    FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20);
    sug.build(new InputArrayIterator(keys));
    assertEquals(2, sug.getCount());
    for (int i = 0; i < 2; i++) {
        // Uses bigram model and unigram backoff:
        assertEquals("foo bar/0.67 foo bee/0.33 baz/0.04 blah/0.04 boo/0.04", toString(sug.lookup("foo b", 10)));
        // Uses only bigram model:
        assertEquals("foo bar/0.67 foo bee/0.33", toString(sug.lookup("foo ", 10)));
        // Uses only unigram model:
        assertEquals("foo/0.33", toString(sug.lookup("foo", 10)));
        // Uses only unigram model:
        assertEquals("bar/0.22 baz/0.11 bee/0.11 blah/0.11 boo/0.11", toString(sug.lookup("b", 10)));
        // Try again after save/load:
        Path tmpDir = createTempDir("FreeTextSuggesterTest");
        Path path = tmpDir.resolve("suggester");
        OutputStream os = Files.newOutputStream(path);
        sug.store(os);
        os.close();
        InputStream is = Files.newInputStream(path);
        sug = new FreeTextSuggester(a, a, 2, (byte) 0x20);
        sug.load(is);
        is.close();
        assertEquals(2, sug.getCount());
    }
    a.close();
}
Also used : Path(java.nio.file.Path) Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer)

Example 48 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class TestFreeTextSuggester method testUnigrams.

// Make sure you can suggest based only on unigram model:
public void testUnigrams() throws Exception {
    Iterable<Input> keys = AnalyzingSuggesterTest.shuffle(new Input("foo bar baz blah boo foo bar foo bee", 50));
    Analyzer a = new MockAnalyzer(random());
    FreeTextSuggester sug = new FreeTextSuggester(a, a, 1, (byte) 0x20);
    sug.build(new InputArrayIterator(keys));
    // Sorts first by count, descending, second by term, ascending
    assertEquals("bar/0.22 baz/0.11 bee/0.11 blah/0.11 boo/0.11", toString(sug.lookup("b", 10)));
    a.close();
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer)

Example 49 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class WFSTCompletionTest method testExactFirst.

public void testExactFirst() throws Exception {
    Directory tempDir = getDirectory();
    WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", true);
    suggester.build(new InputArrayIterator(new Input[] { new Input("x y", 20), new Input("x", 2) }));
    for (int topN = 1; topN < 4; topN++) {
        List<LookupResult> results = suggester.lookup("x", false, topN);
        assertEquals(Math.min(topN, 2), results.size());
        assertEquals("x", results.get(0).key);
        assertEquals(2, results.get(0).value);
        if (topN > 1) {
            assertEquals("x y", results.get(1).key);
            assertEquals(20, results.get(1).value);
        }
    }
    tempDir.close();
}
Also used : Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Directory(org.apache.lucene.store.Directory)

Example 50 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class WFSTCompletionTest method testNonExactFirst.

public void testNonExactFirst() throws Exception {
    Directory tempDir = getDirectory();
    WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false);
    suggester.build(new InputArrayIterator(new Input[] { new Input("x y", 20), new Input("x", 2) }));
    for (int topN = 1; topN < 4; topN++) {
        List<LookupResult> results = suggester.lookup("x", false, topN);
        assertEquals(Math.min(topN, 2), results.size());
        assertEquals("x y", results.get(0).key);
        assertEquals(20, results.get(0).value);
        if (topN > 1) {
            assertEquals("x", results.get(1).key);
            assertEquals(2, results.get(1).value);
        }
    }
    tempDir.close();
}
Also used : Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Directory(org.apache.lucene.store.Directory)

Aggregations

InputArrayIterator (org.apache.lucene.search.suggest.InputArrayIterator)76 Input (org.apache.lucene.search.suggest.Input)71 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)67 Analyzer (org.apache.lucene.analysis.Analyzer)65 LookupResult (org.apache.lucene.search.suggest.Lookup.LookupResult)48 Directory (org.apache.lucene.store.Directory)43 BytesRef (org.apache.lucene.util.BytesRef)26 Path (java.nio.file.Path)17 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)11 Tokenizer (org.apache.lucene.analysis.Tokenizer)10 Reader (java.io.Reader)8 ArrayList (java.util.ArrayList)8 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)8 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)7 TokenStream (org.apache.lucene.analysis.TokenStream)6 HashSet (java.util.HashSet)5 CharArraySet (org.apache.lucene.analysis.CharArraySet)5 Token (org.apache.lucene.analysis.Token)5 InputStream (java.io.InputStream)4 OutputStream (java.io.OutputStream)4