Search in sources :

Example 46 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class AnalyzingInfixSuggesterTest method testHighlightAsObject.

@SuppressWarnings("unchecked")
public void testHighlightAsObject() throws Exception {
    Input[] keys = new Input[] { new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")) };
    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false) {

        @Override
        protected Object highlight(String text, Set<String> matchedTokens, String prefixToken) throws IOException {
            try (TokenStream ts = queryAnalyzer.tokenStream("text", new StringReader(text))) {
                CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
                OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
                ts.reset();
                List<LookupHighlightFragment> fragments = new ArrayList<>();
                int upto = 0;
                while (ts.incrementToken()) {
                    String token = termAtt.toString();
                    int startOffset = offsetAtt.startOffset();
                    int endOffset = offsetAtt.endOffset();
                    if (upto < startOffset) {
                        fragments.add(new LookupHighlightFragment(text.substring(upto, startOffset), false));
                        upto = startOffset;
                    } else if (upto > startOffset) {
                        continue;
                    }
                    if (matchedTokens.contains(token)) {
                        // Token matches.
                        fragments.add(new LookupHighlightFragment(text.substring(startOffset, endOffset), true));
                        upto = endOffset;
                    } else if (prefixToken != null && token.startsWith(prefixToken)) {
                        fragments.add(new LookupHighlightFragment(text.substring(startOffset, startOffset + prefixToken.length()), true));
                        if (prefixToken.length() < token.length()) {
                            fragments.add(new LookupHighlightFragment(text.substring(startOffset + prefixToken.length(), startOffset + token.length()), false));
                        }
                        upto = endOffset;
                    }
                }
                ts.end();
                int endOffset = offsetAtt.endOffset();
                if (upto < endOffset) {
                    fragments.add(new LookupHighlightFragment(text.substring(upto), false));
                }
                return fragments;
            }
        }
    };
    suggester.build(new InputArrayIterator(keys));
    List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("a penny saved is a penny <b>ear</b>ned", toString((List<LookupHighlightFragment>) results.get(0).highlightKey));
    assertEquals(10, results.get(0).value);
    assertEquals(new BytesRef("foobaz"), results.get(0).payload);
    suggester.close();
    a.close();
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) CharArraySet(org.apache.lucene.analysis.CharArraySet) HashSet(java.util.HashSet) Set(java.util.Set) ArrayList(java.util.ArrayList) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Input(org.apache.lucene.search.suggest.Input) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) StringReader(java.io.StringReader) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) ArrayList(java.util.ArrayList) List(java.util.List) BytesRef(org.apache.lucene.util.BytesRef)

Example 47 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class AnalyzingInfixSuggesterTest method testAfterLoad.

public void testAfterLoad() throws Exception {
    Input[] keys = new Input[] { new Input("lend me your ear", 8, new BytesRef("foobar")), new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")) };
    Path tempDir = createTempDir("AnalyzingInfixSuggesterTest");
    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
    suggester.build(new InputArrayIterator(keys));
    assertEquals(2, suggester.getCount());
    suggester.close();
    suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
    List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
    assertEquals(2, results.size());
    assertEquals("a penny saved is a penny earned", results.get(0).key);
    assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).highlightKey);
    assertEquals(10, results.get(0).value);
    assertEquals(new BytesRef("foobaz"), results.get(0).payload);
    assertEquals(2, suggester.getCount());
    suggester.close();
    a.close();
}
Also used : Path(java.nio.file.Path) Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BytesRef(org.apache.lucene.util.BytesRef)

Example 48 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testKeywordWithPayloads.

public void testKeywordWithPayloads() throws Exception {
    Iterable<Input> keys = shuffle(new Input("foo", 50, new BytesRef("hello")), new Input("bar", 10, new BytesRef("goodbye")), new Input("barbar", 12, new BytesRef("thank you")), new Input("bar", 9, new BytesRef("should be deduplicated")), new Input("bar", 8, new BytesRef("should also be deduplicated")), new Input("barbara", 6, new BytesRef("for all the fish")));
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
    Directory tempDir = getDirectory();
    AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
    suggester.build(new InputArrayIterator(keys));
    for (int i = 0; i < 2; i++) {
        // top N of 2, but only foo is available
        List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("f", random()), false, 2);
        assertEquals(1, results.size());
        assertEquals("foo", results.get(0).key.toString());
        assertEquals(50, results.get(0).value, 0.01F);
        assertEquals(new BytesRef("hello"), results.get(0).payload);
        // top N of 1 for 'bar': we return this even though
        // barbar is higher because exactFirst is enabled:
        results = suggester.lookup(TestUtil.stringToCharSequence("bar", random()), false, 1);
        assertEquals(1, results.size());
        assertEquals("bar", results.get(0).key.toString());
        assertEquals(10, results.get(0).value, 0.01F);
        assertEquals(new BytesRef("goodbye"), results.get(0).payload);
        // top N Of 2 for 'b'
        results = suggester.lookup(TestUtil.stringToCharSequence("b", random()), false, 2);
        assertEquals(2, results.size());
        assertEquals("barbar", results.get(0).key.toString());
        assertEquals(12, results.get(0).value, 0.01F);
        assertEquals(new BytesRef("thank you"), results.get(0).payload);
        assertEquals("bar", results.get(1).key.toString());
        assertEquals(10, results.get(1).value, 0.01F);
        assertEquals(new BytesRef("goodbye"), results.get(1).payload);
        // top N of 3 for 'ba'
        results = suggester.lookup(TestUtil.stringToCharSequence("ba", random()), false, 3);
        assertEquals(3, results.size());
        assertEquals("barbar", results.get(0).key.toString());
        assertEquals(12, results.get(0).value, 0.01F);
        assertEquals(new BytesRef("thank you"), results.get(0).payload);
        assertEquals("bar", results.get(1).key.toString());
        assertEquals(10, results.get(1).value, 0.01F);
        assertEquals(new BytesRef("goodbye"), results.get(1).payload);
        assertEquals("barbara", results.get(2).key.toString());
        assertEquals(6, results.get(2).value, 0.01F);
        assertEquals(new BytesRef("for all the fish"), results.get(2).payload);
    }
    IOUtils.close(analyzer, tempDir);
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 49 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testInputPathRequired.

public void testInputPathRequired() throws Exception {
    //  SynonymMap.Builder b = new SynonymMap.Builder(false);
    //  b.add(new CharsRef("ab"), new CharsRef("ba"), true);
    //  final SynonymMap map = b.build();
    //  The Analyzer below mimics the functionality of the SynonymAnalyzer
    //  using the above map, so that the suggest module does not need a dependency on the 
    //  synonym module 
    final Analyzer analyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            return new TokenStreamComponents(tokenizer) {

                int tokenStreamCounter = 0;

                final TokenStream[] tokenStreams = new TokenStream[] { new CannedTokenStream(new Token[] { token("ab", 1, 1), token("ba", 0, 1), token("xc", 1, 1) }), new CannedTokenStream(new Token[] { token("ba", 1, 1), token("xd", 1, 1) }), new CannedTokenStream(new Token[] { token("ab", 1, 1), token("ba", 0, 1), token("x", 1, 1) }) };

                @Override
                public TokenStream getTokenStream() {
                    TokenStream result = tokenStreams[tokenStreamCounter];
                    tokenStreamCounter++;
                    return result;
                }

                @Override
                protected void setReader(final Reader reader) {
                }
            };
        }
    };
    Input[] keys = new Input[] { new Input("ab xc", 50), new Input("ba xd", 50) };
    Directory tempDir = getDirectory();
    AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
    suggester.build(new InputArrayIterator(keys));
    List<LookupResult> results = suggester.lookup("ab x", false, 1);
    assertTrue(results.size() == 1);
    IOUtils.close(analyzer, tempDir);
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) CannedBinaryTokenStream(org.apache.lucene.analysis.CannedBinaryTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Reader(java.io.Reader) Token(org.apache.lucene.analysis.Token) BinaryToken(org.apache.lucene.analysis.CannedBinaryTokenStream.BinaryToken) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Directory(org.apache.lucene.store.Directory)

Example 50 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testGraphDups.

public void testGraphDups() throws Exception {
    final Analyzer analyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            return new TokenStreamComponents(tokenizer) {

                int tokenStreamCounter = 0;

                final TokenStream[] tokenStreams = new TokenStream[] { new CannedTokenStream(new Token[] { token("wifi", 1, 1), token("hotspot", 0, 2), token("network", 1, 1), token("is", 1, 1), token("slow", 1, 1) }), new CannedTokenStream(new Token[] { token("wi", 1, 1), token("hotspot", 0, 3), token("fi", 1, 1), token("network", 1, 1), token("is", 1, 1), token("fast", 1, 1) }), new CannedTokenStream(new Token[] { token("wifi", 1, 1), token("hotspot", 0, 2), token("network", 1, 1) }) };

                @Override
                public TokenStream getTokenStream() {
                    TokenStream result = tokenStreams[tokenStreamCounter];
                    tokenStreamCounter++;
                    return result;
                }

                @Override
                protected void setReader(final Reader reader) {
                }
            };
        }
    };
    Input[] keys = new Input[] { new Input("wifi network is slow", 50), new Input("wi fi network is fast", 10) };
    //AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer, AnalyzingSuggester.EXACT_FIRST, 256, -1);
    Directory tempDir = getDirectory();
    AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
    suggester.build(new InputArrayIterator(keys));
    List<LookupResult> results = suggester.lookup("wifi network", false, 10);
    if (VERBOSE) {
        System.out.println("Results: " + results);
    }
    assertEquals(2, results.size());
    assertEquals("wifi network is slow", results.get(0).key);
    assertEquals(50, results.get(0).value);
    assertEquals("wi fi network is fast", results.get(1).key);
    assertEquals(10, results.get(1).value);
    IOUtils.close(analyzer, tempDir);
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) CannedBinaryTokenStream(org.apache.lucene.analysis.CannedBinaryTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Reader(java.io.Reader) Token(org.apache.lucene.analysis.Token) BinaryToken(org.apache.lucene.analysis.CannedBinaryTokenStream.BinaryToken) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Directory(org.apache.lucene.store.Directory)

Aggregations

LookupResult (org.apache.lucene.search.suggest.Lookup.LookupResult)62 Input (org.apache.lucene.search.suggest.Input)48 InputArrayIterator (org.apache.lucene.search.suggest.InputArrayIterator)48 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)45 Analyzer (org.apache.lucene.analysis.Analyzer)43 Directory (org.apache.lucene.store.Directory)36 BytesRef (org.apache.lucene.util.BytesRef)21 ArrayList (java.util.ArrayList)12 Path (java.nio.file.Path)11 HashSet (java.util.HashSet)8 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)7 Reader (java.io.Reader)6 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)6 Tokenizer (org.apache.lucene.analysis.Tokenizer)6 Token (org.apache.lucene.analysis.Token)5 TokenStream (org.apache.lucene.analysis.TokenStream)5 HashMap (java.util.HashMap)4 CharArraySet (org.apache.lucene.analysis.CharArraySet)4 InputStream (java.io.InputStream)3 OutputStream (java.io.OutputStream)3