Search in sources :

Example 56 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class AnalyzingInfixSuggesterTest method testRefreshAfterBuild.

public void testRefreshAfterBuild() throws Exception {
    performOperationWithAllOptionCombinations(suggester -> {
        suggester.build(new InputArrayIterator(sharedInputs));
        suggester.refresh();
    });
}
Also used : InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator)

Example 57 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class AnalyzingSuggesterTest method test0ByteKeys.

public void test0ByteKeys() throws Exception {
    final Analyzer a = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            return new TokenStreamComponents(tokenizer) {

                int tokenStreamCounter = 0;

                final TokenStream[] tokenStreams = new TokenStream[] { new CannedBinaryTokenStream(new BinaryToken[] { token(new BytesRef(new byte[] { 0x0, 0x0, 0x0 })) }), new CannedBinaryTokenStream(new BinaryToken[] { token(new BytesRef(new byte[] { 0x0, 0x0 })) }), new CannedBinaryTokenStream(new BinaryToken[] { token(new BytesRef(new byte[] { 0x0, 0x0, 0x0 })) }), new CannedBinaryTokenStream(new BinaryToken[] { token(new BytesRef(new byte[] { 0x0, 0x0 })) }) };

                @Override
                public TokenStream getTokenStream() {
                    TokenStream result = tokenStreams[tokenStreamCounter];
                    tokenStreamCounter++;
                    return result;
                }

                @Override
                protected void setReader(final Reader reader) {
                }
            };
        }
    };
    Directory tempDir = getDirectory();
    AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, -1, true);
    suggester.build(new InputArrayIterator(new Input[] { new Input("a a", 50), new Input("a b", 50) }));
    IOUtils.close(a, tempDir);
}
Also used : CannedBinaryTokenStream(org.apache.lucene.analysis.CannedBinaryTokenStream) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) CannedBinaryTokenStream(org.apache.lucene.analysis.CannedBinaryTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Reader(java.io.Reader) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) BinaryToken(org.apache.lucene.analysis.CannedBinaryTokenStream.BinaryToken) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 58 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testKeywordWithPayloads.

public void testKeywordWithPayloads() throws Exception {
    Iterable<Input> keys = shuffle(new Input("foo", 50, new BytesRef("hello")), new Input("bar", 10, new BytesRef("goodbye")), new Input("barbar", 12, new BytesRef("thank you")), new Input("bar", 9, new BytesRef("should be deduplicated")), new Input("bar", 8, new BytesRef("should also be deduplicated")), new Input("barbara", 6, new BytesRef("for all the fish")));
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
    Directory tempDir = getDirectory();
    AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
    suggester.build(new InputArrayIterator(keys));
    for (int i = 0; i < 2; i++) {
        // top N of 2, but only foo is available
        List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("f", random()), false, 2);
        assertEquals(1, results.size());
        assertEquals("foo", results.get(0).key.toString());
        assertEquals(50, results.get(0).value, 0.01F);
        assertEquals(new BytesRef("hello"), results.get(0).payload);
        // top N of 1 for 'bar': we return this even though
        // barbar is higher because exactFirst is enabled:
        results = suggester.lookup(TestUtil.stringToCharSequence("bar", random()), false, 1);
        assertEquals(1, results.size());
        assertEquals("bar", results.get(0).key.toString());
        assertEquals(10, results.get(0).value, 0.01F);
        assertEquals(new BytesRef("goodbye"), results.get(0).payload);
        // top N Of 2 for 'b'
        results = suggester.lookup(TestUtil.stringToCharSequence("b", random()), false, 2);
        assertEquals(2, results.size());
        assertEquals("barbar", results.get(0).key.toString());
        assertEquals(12, results.get(0).value, 0.01F);
        assertEquals(new BytesRef("thank you"), results.get(0).payload);
        assertEquals("bar", results.get(1).key.toString());
        assertEquals(10, results.get(1).value, 0.01F);
        assertEquals(new BytesRef("goodbye"), results.get(1).payload);
        // top N of 3 for 'ba'
        results = suggester.lookup(TestUtil.stringToCharSequence("ba", random()), false, 3);
        assertEquals(3, results.size());
        assertEquals("barbar", results.get(0).key.toString());
        assertEquals(12, results.get(0).value, 0.01F);
        assertEquals(new BytesRef("thank you"), results.get(0).payload);
        assertEquals("bar", results.get(1).key.toString());
        assertEquals(10, results.get(1).value, 0.01F);
        assertEquals(new BytesRef("goodbye"), results.get(1).payload);
        assertEquals("barbara", results.get(2).key.toString());
        assertEquals(6, results.get(2).value, 0.01F);
        assertEquals(new BytesRef("for all the fish"), results.get(2).payload);
    }
    IOUtils.close(analyzer, tempDir);
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 59 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testTooManyExpansions.

public void testTooManyExpansions() throws Exception {
    final Analyzer a = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            return new TokenStreamComponents(tokenizer) {

                @Override
                public TokenStream getTokenStream() {
                    Token a = new Token("a", 0, 1);
                    a.setPositionIncrement(1);
                    Token b = new Token("b", 0, 1);
                    b.setPositionIncrement(0);
                    return new CannedTokenStream(new Token[] { a, b });
                }

                @Override
                protected void setReader(final Reader reader) {
                }
            };
        }
    };
    Directory tempDir = getDirectory();
    AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, 1, true);
    suggester.build(new InputArrayIterator(new Input[] { new Input("a", 1) }));
    assertEquals("[a/1]", suggester.lookup("a", false, 1).toString());
    IOUtils.close(a, tempDir);
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) Reader(java.io.Reader) Token(org.apache.lucene.analysis.Token) BinaryToken(org.apache.lucene.analysis.CannedBinaryTokenStream.BinaryToken) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Directory(org.apache.lucene.store.Directory)

Example 60 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testInputPathRequired.

public void testInputPathRequired() throws Exception {
    //  SynonymMap.Builder b = new SynonymMap.Builder(false);
    //  b.add(new CharsRef("ab"), new CharsRef("ba"), true);
    //  final SynonymMap map = b.build();
    //  The Analyzer below mimics the functionality of the SynonymAnalyzer
    //  using the above map, so that the suggest module does not need a dependency on the 
    //  synonym module 
    final Analyzer analyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            return new TokenStreamComponents(tokenizer) {

                int tokenStreamCounter = 0;

                final TokenStream[] tokenStreams = new TokenStream[] { new CannedTokenStream(new Token[] { token("ab", 1, 1), token("ba", 0, 1), token("xc", 1, 1) }), new CannedTokenStream(new Token[] { token("ba", 1, 1), token("xd", 1, 1) }), new CannedTokenStream(new Token[] { token("ab", 1, 1), token("ba", 0, 1), token("x", 1, 1) }) };

                @Override
                public TokenStream getTokenStream() {
                    TokenStream result = tokenStreams[tokenStreamCounter];
                    tokenStreamCounter++;
                    return result;
                }

                @Override
                protected void setReader(final Reader reader) {
                }
            };
        }
    };
    Input[] keys = new Input[] { new Input("ab xc", 50), new Input("ba xd", 50) };
    Directory tempDir = getDirectory();
    AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
    suggester.build(new InputArrayIterator(keys));
    List<LookupResult> results = suggester.lookup("ab x", false, 1);
    assertTrue(results.size() == 1);
    IOUtils.close(analyzer, tempDir);
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) CannedBinaryTokenStream(org.apache.lucene.analysis.CannedBinaryTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Reader(java.io.Reader) Token(org.apache.lucene.analysis.Token) BinaryToken(org.apache.lucene.analysis.CannedBinaryTokenStream.BinaryToken) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Directory(org.apache.lucene.store.Directory)

Aggregations

InputArrayIterator (org.apache.lucene.search.suggest.InputArrayIterator)76 Input (org.apache.lucene.search.suggest.Input)71 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)67 Analyzer (org.apache.lucene.analysis.Analyzer)65 LookupResult (org.apache.lucene.search.suggest.Lookup.LookupResult)48 Directory (org.apache.lucene.store.Directory)43 BytesRef (org.apache.lucene.util.BytesRef)26 Path (java.nio.file.Path)17 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)11 Tokenizer (org.apache.lucene.analysis.Tokenizer)10 Reader (java.io.Reader)8 ArrayList (java.util.ArrayList)8 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)8 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)7 TokenStream (org.apache.lucene.analysis.TokenStream)6 HashSet (java.util.HashSet)5 CharArraySet (org.apache.lucene.analysis.CharArraySet)5 Token (org.apache.lucene.analysis.Token)5 InputStream (java.io.InputStream)4 OutputStream (java.io.OutputStream)4