Search in sources :

Example 76 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testInputPathRequired.

public void testInputPathRequired() throws Exception {
    //  SynonymMap.Builder b = new SynonymMap.Builder(false);
    //  b.add(new CharsRef("ab"), new CharsRef("ba"), true);
    //  final SynonymMap map = b.build();
    //  The Analyzer below mimics the functionality of the SynonymAnalyzer
    //  using the above map, so that the suggest module does not need a dependency on the 
    //  synonym module 
    final Analyzer analyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            return new TokenStreamComponents(tokenizer) {

                int tokenStreamCounter = 0;

                final TokenStream[] tokenStreams = new TokenStream[] { new CannedTokenStream(new Token[] { token("ab", 1, 1), token("ba", 0, 1), token("xc", 1, 1) }), new CannedTokenStream(new Token[] { token("ba", 1, 1), token("xd", 1, 1) }), new CannedTokenStream(new Token[] { token("ab", 1, 1), token("ba", 0, 1), token("x", 1, 1) }) };

                @Override
                public TokenStream getTokenStream() {
                    TokenStream result = tokenStreams[tokenStreamCounter];
                    tokenStreamCounter++;
                    return result;
                }

                @Override
                protected void setReader(final Reader reader) {
                }
            };
        }
    };
    Input[] keys = new Input[] { new Input("ab xc", 50), new Input("ba xd", 50) };
    Directory tempDir = getDirectory();
    AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
    suggester.build(new InputArrayIterator(keys));
    List<LookupResult> results = suggester.lookup("ab x", false, 1);
    assertTrue(results.size() == 1);
    IOUtils.close(analyzer, tempDir);
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) CannedBinaryTokenStream(org.apache.lucene.analysis.CannedBinaryTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Reader(java.io.Reader) Token(org.apache.lucene.analysis.Token) BinaryToken(org.apache.lucene.analysis.CannedBinaryTokenStream.BinaryToken) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Directory(org.apache.lucene.store.Directory)

Example 77 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testGraphDups.

public void testGraphDups() throws Exception {
    final Analyzer analyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            return new TokenStreamComponents(tokenizer) {

                int tokenStreamCounter = 0;

                final TokenStream[] tokenStreams = new TokenStream[] { new CannedTokenStream(new Token[] { token("wifi", 1, 1), token("hotspot", 0, 2), token("network", 1, 1), token("is", 1, 1), token("slow", 1, 1) }), new CannedTokenStream(new Token[] { token("wi", 1, 1), token("hotspot", 0, 3), token("fi", 1, 1), token("network", 1, 1), token("is", 1, 1), token("fast", 1, 1) }), new CannedTokenStream(new Token[] { token("wifi", 1, 1), token("hotspot", 0, 2), token("network", 1, 1) }) };

                @Override
                public TokenStream getTokenStream() {
                    TokenStream result = tokenStreams[tokenStreamCounter];
                    tokenStreamCounter++;
                    return result;
                }

                @Override
                protected void setReader(final Reader reader) {
                }
            };
        }
    };
    Input[] keys = new Input[] { new Input("wifi network is slow", 50), new Input("wi fi network is fast", 10) };
    //AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer, AnalyzingSuggester.EXACT_FIRST, 256, -1);
    Directory tempDir = getDirectory();
    AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
    suggester.build(new InputArrayIterator(keys));
    List<LookupResult> results = suggester.lookup("wifi network", false, 10);
    if (VERBOSE) {
        System.out.println("Results: " + results);
    }
    assertEquals(2, results.size());
    assertEquals("wifi network is slow", results.get(0).key);
    assertEquals(50, results.get(0).value);
    assertEquals("wi fi network is fast", results.get(1).key);
    assertEquals(10, results.get(1).value);
    IOUtils.close(analyzer, tempDir);
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) CannedBinaryTokenStream(org.apache.lucene.analysis.CannedBinaryTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Reader(java.io.Reader) Token(org.apache.lucene.analysis.Token) BinaryToken(org.apache.lucene.analysis.CannedBinaryTokenStream.BinaryToken) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Directory(org.apache.lucene.store.Directory)

Example 78 with CannedTokenStream

use of org.apache.lucene.analysis.CannedTokenStream in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testDupSurfaceFormsMissingResults.

public void testDupSurfaceFormsMissingResults() throws Exception {
    Analyzer a = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            return new TokenStreamComponents(tokenizer) {

                @Override
                public TokenStream getTokenStream() {
                    return new CannedTokenStream(new Token[] { token("hairy", 1, 1), token("smelly", 0, 1), token("dog", 1, 1) });
                }

                @Override
                protected void setReader(final Reader reader) {
                }
            };
        }
    };
    Directory tempDir = getDirectory();
    AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, -1, true);
    suggester.build(new InputArrayIterator(shuffle(new Input("hambone", 6), new Input("nellie", 5))));
    List<LookupResult> results = suggester.lookup("nellie", false, 2);
    assertEquals(2, results.size());
    assertEquals("hambone", results.get(0).key);
    assertEquals(6, results.get(0).value);
    assertEquals("nellie", results.get(1).key);
    assertEquals(5, results.get(1).value);
    // Try again after save/load:
    Path tmpDir = createTempDir("AnalyzingSuggesterTest");
    Path path = tmpDir.resolve("suggester");
    OutputStream os = Files.newOutputStream(path);
    suggester.store(os);
    os.close();
    InputStream is = Files.newInputStream(path);
    suggester.load(is);
    is.close();
    results = suggester.lookup("nellie", false, 2);
    assertEquals(2, results.size());
    assertEquals("hambone", results.get(0).key);
    assertEquals(6, results.get(0).value);
    assertEquals("nellie", results.get(1).key);
    assertEquals(5, results.get(1).value);
    IOUtils.close(a, tempDir);
}
Also used : Path(java.nio.file.Path) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) Reader(java.io.Reader) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Directory(org.apache.lucene.store.Directory)

Aggregations

CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)78 TokenStream (org.apache.lucene.analysis.TokenStream)43 Token (org.apache.lucene.analysis.Token)37 Directory (org.apache.lucene.store.Directory)33 Document (org.apache.lucene.document.Document)26 TextField (org.apache.lucene.document.TextField)22 Field (org.apache.lucene.document.Field)15 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)14 BytesRef (org.apache.lucene.util.BytesRef)14 FieldType (org.apache.lucene.document.FieldType)13 Term (org.apache.lucene.index.Term)13 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)11 Tokenizer (org.apache.lucene.analysis.Tokenizer)11 IndexReader (org.apache.lucene.index.IndexReader)10 Analyzer (org.apache.lucene.analysis.Analyzer)9 StringField (org.apache.lucene.document.StringField)8 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)8 Reader (java.io.Reader)7 StringReader (java.io.StringReader)7 Input (org.apache.lucene.search.suggest.Input)7