Search in sources :

Example 21 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class FuzzySuggesterTest method testGraphDups.

public void testGraphDups() throws Exception {
    final Analyzer analyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            return new TokenStreamComponents(tokenizer) {

                int tokenStreamCounter = 0;

                final TokenStream[] tokenStreams = new TokenStream[] { new CannedTokenStream(new Token[] { token("wifi", 1, 1), token("hotspot", 0, 2), token("network", 1, 1), token("is", 1, 1), token("slow", 1, 1) }), new CannedTokenStream(new Token[] { token("wi", 1, 1), token("hotspot", 0, 3), token("fi", 1, 1), token("network", 1, 1), token("is", 1, 1), token("fast", 1, 1) }), new CannedTokenStream(new Token[] { token("wifi", 1, 1), token("hotspot", 0, 2), token("network", 1, 1) }) };

                @Override
                public TokenStream getTokenStream() {
                    TokenStream result = tokenStreams[tokenStreamCounter];
                    tokenStreamCounter++;
                    return result;
                }

                @Override
                protected void setReader(final Reader reader) {
                }
            };
        }
    };
    Input[] keys = new Input[] { new Input("wifi network is slow", 50), new Input("wi fi network is fast", 10) };
    Directory tempDir = getDirectory();
    FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", analyzer);
    suggester.build(new InputArrayIterator(keys));
    List<LookupResult> results = suggester.lookup("wifi network", false, 10);
    if (VERBOSE) {
        System.out.println("Results: " + results);
    }
    assertEquals(2, results.size());
    assertEquals("wifi network is slow", results.get(0).key);
    assertEquals(50, results.get(0).value);
    assertEquals("wi fi network is fast", results.get(1).key);
    assertEquals(10, results.get(1).value);
    IOUtils.close(tempDir, analyzer);
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Reader(java.io.Reader) Token(org.apache.lucene.analysis.Token) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Directory(org.apache.lucene.store.Directory)

Example 22 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class AnalyzingInfixSuggesterTest method testHighlight.

public void testHighlight() throws Exception {
    Input[] keys = new Input[] { new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")) };
    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false);
    suggester.build(new InputArrayIterator(keys));
    List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("a penny saved is a penny earned", results.get(0).key);
    assertEquals("a <b>penn</b>y saved is a <b>penn</b>y earned", results.get(0).highlightKey);
    suggester.close();
    a.close();
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BytesRef(org.apache.lucene.util.BytesRef)

Example 23 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class AnalyzingInfixSuggesterTest method testSuggestStopFilter.

public void testSuggestStopFilter() throws Exception {
    final CharArraySet stopWords = StopFilter.makeStopSet("a");
    Analyzer indexAnalyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            MockTokenizer tokens = new MockTokenizer();
            return new TokenStreamComponents(tokens, new StopFilter(tokens, stopWords));
        }
    };
    Analyzer queryAnalyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            MockTokenizer tokens = new MockTokenizer();
            return new TokenStreamComponents(tokens, new SuggestStopFilter(tokens, stopWords));
        }
    };
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), indexAnalyzer, queryAnalyzer, 3, false);
    Input[] keys = new Input[] { new Input("a bob for apples", 10, new BytesRef("foobaz")) };
    suggester.build(new InputArrayIterator(keys));
    List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("a", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("a bob for apples", results.get(0).key);
    assertEquals("a bob for <b>a</b>pples", results.get(0).highlightKey);
    suggester.close();
    IOUtils.close(suggester, indexAnalyzer, queryAnalyzer);
}
Also used : CharArraySet(org.apache.lucene.analysis.CharArraySet) StopFilter(org.apache.lucene.analysis.StopFilter) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) BytesRef(org.apache.lucene.util.BytesRef)

Example 24 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class AnalyzingInfixSuggesterTest method testContextNotAllTermsRequired.

public void testContextNotAllTermsRequired() throws Exception {
    Input[] keys = new Input[] { new Input("lend me your ear", 8, new BytesRef("foobar"), asSet("foo", "bar")), new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz"), asSet("foo", "baz")) };
    Path tempDir = createTempDir("analyzingInfixContext");
    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
    suggester.build(new InputArrayIterator(keys));
    // No context provided, all results returned
    List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, false, true);
    assertEquals(2, results.size());
    LookupResult result = results.get(0);
    assertEquals("a penny saved is a penny earned", result.key);
    assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
    assertEquals(10, result.value);
    assertEquals(new BytesRef("foobaz"), result.payload);
    assertNotNull(result.contexts);
    assertEquals(2, result.contexts.size());
    assertTrue(result.contexts.contains(new BytesRef("foo")));
    assertTrue(result.contexts.contains(new BytesRef("baz")));
    result = results.get(1);
    assertEquals("lend me your ear", result.key);
    assertEquals("lend me your <b>ear</b>", result.highlightKey);
    assertEquals(8, result.value);
    assertEquals(new BytesRef("foobar"), result.payload);
    assertNotNull(result.contexts);
    assertEquals(2, result.contexts.size());
    assertTrue(result.contexts.contains(new BytesRef("foo")));
    assertTrue(result.contexts.contains(new BytesRef("bar")));
    // Both have "foo" context:
    results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("foo"), 10, false, true);
    assertEquals(2, results.size());
    result = results.get(0);
    assertEquals("a penny saved is a penny earned", result.key);
    assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
    assertEquals(10, result.value);
    assertEquals(new BytesRef("foobaz"), result.payload);
    assertNotNull(result.contexts);
    assertEquals(2, result.contexts.size());
    assertTrue(result.contexts.contains(new BytesRef("foo")));
    assertTrue(result.contexts.contains(new BytesRef("baz")));
    result = results.get(1);
    assertEquals("lend me your ear", result.key);
    assertEquals("lend me your <b>ear</b>", result.highlightKey);
    assertEquals(8, result.value);
    assertEquals(new BytesRef("foobar"), result.payload);
    assertNotNull(result.contexts);
    assertEquals(2, result.contexts.size());
    assertTrue(result.contexts.contains(new BytesRef("foo")));
    assertTrue(result.contexts.contains(new BytesRef("bar")));
    // Only one has "foo" context and len
    results = suggester.lookup(TestUtil.stringToCharSequence("len", random()), asSet("foo"), 10, false, true);
    assertEquals(1, results.size());
    result = results.get(0);
    assertEquals("lend me your ear", result.key);
    assertEquals("<b>len</b>d me your ear", result.highlightKey);
    assertEquals(8, result.value);
    assertEquals(new BytesRef("foobar"), result.payload);
    assertNotNull(result.contexts);
    assertEquals(2, result.contexts.size());
    assertTrue(result.contexts.contains(new BytesRef("foo")));
    assertTrue(result.contexts.contains(new BytesRef("bar")));
    suggester.close();
}
Also used : Path(java.nio.file.Path) Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BytesRef(org.apache.lucene.util.BytesRef)

Example 25 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class AnalyzingInfixSuggesterTest method testHighlightCaseChange.

public void testHighlightCaseChange() throws Exception {
    Input[] keys = new Input[] { new Input("a Penny saved is a penny earned", 10, new BytesRef("foobaz")) };
    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true);
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false);
    suggester.build(new InputArrayIterator(keys));
    List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("a Penny saved is a penny earned", results.get(0).key);
    assertEquals("a <b>Penn</b>y saved is a <b>penn</b>y earned", results.get(0).highlightKey);
    suggester.close();
    // Try again, but overriding addPrefixMatch to highlight
    // the entire hit:
    suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false) {

        @Override
        protected void addPrefixMatch(StringBuilder sb, String surface, String analyzed, String prefixToken) {
            sb.append("<b>");
            sb.append(surface);
            sb.append("</b>");
        }
    };
    suggester.build(new InputArrayIterator(keys));
    results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("a Penny saved is a penny earned", results.get(0).key);
    assertEquals("a <b>Penny</b> saved is a <b>penny</b> earned", results.get(0).highlightKey);
    suggester.close();
    a.close();
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

InputArrayIterator (org.apache.lucene.search.suggest.InputArrayIterator)76 Input (org.apache.lucene.search.suggest.Input)71 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)67 Analyzer (org.apache.lucene.analysis.Analyzer)65 LookupResult (org.apache.lucene.search.suggest.Lookup.LookupResult)48 Directory (org.apache.lucene.store.Directory)43 BytesRef (org.apache.lucene.util.BytesRef)26 Path (java.nio.file.Path)17 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)11 Tokenizer (org.apache.lucene.analysis.Tokenizer)10 Reader (java.io.Reader)8 ArrayList (java.util.ArrayList)8 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)8 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)7 TokenStream (org.apache.lucene.analysis.TokenStream)6 HashSet (java.util.HashSet)5 CharArraySet (org.apache.lucene.analysis.CharArraySet)5 Token (org.apache.lucene.analysis.Token)5 InputStream (java.io.InputStream)4 OutputStream (java.io.OutputStream)4