Search in sources :

Example 11 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class FuzzySuggesterTest method testExactFirst.

public void testExactFirst() throws Exception {
    Analyzer a = getUnusualAnalyzer();
    Directory tempDir = getDirectory();
    FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", a, a, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, true, 1, true, 1, 3, false);
    suggester.build(new InputArrayIterator(new Input[] { new Input("x y", 1), new Input("x y z", 3), new Input("x", 2), new Input("z z z", 20) }));
    for (int topN = 1; topN < 6; topN++) {
        List<LookupResult> results = suggester.lookup("x y", false, topN);
        //System.out.println("topN=" + topN + " " + results);
        assertEquals(Math.min(topN, 4), results.size());
        assertEquals("x y", results.get(0).key);
        assertEquals(1, results.get(0).value);
        if (topN > 1) {
            assertEquals("z z z", results.get(1).key);
            assertEquals(20, results.get(1).value);
            if (topN > 2) {
                assertEquals("x y z", results.get(2).key);
                assertEquals(3, results.get(2).value);
                if (topN > 3) {
                    assertEquals("x", results.get(3).key);
                    assertEquals(2, results.get(3).value);
                }
            }
        }
    }
    IOUtils.close(a, tempDir);
}
Also used : Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Directory(org.apache.lucene.store.Directory)

Example 12 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class FuzzySuggesterTest method testInputPathRequired.

public void testInputPathRequired() throws Exception {
    //  SynonymMap.Builder b = new SynonymMap.Builder(false);
    //  b.add(new CharsRef("ab"), new CharsRef("ba"), true);
    //  final SynonymMap map = b.build();
    //  The Analyzer below mimics the functionality of the SynonymAnalyzer
    //  using the above map, so that the suggest module does not need a dependency on the 
    //  synonym module 
    final Analyzer analyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            return new TokenStreamComponents(tokenizer) {

                int tokenStreamCounter = 0;

                final TokenStream[] tokenStreams = new TokenStream[] { new CannedTokenStream(new Token[] { token("ab", 1, 1), token("ba", 0, 1), token("xc", 1, 1) }), new CannedTokenStream(new Token[] { token("ba", 1, 1), token("xd", 1, 1) }), new CannedTokenStream(new Token[] { token("ab", 1, 1), token("ba", 0, 1), token("x", 1, 1) }) };

                @Override
                public TokenStream getTokenStream() {
                    TokenStream result = tokenStreams[tokenStreamCounter];
                    tokenStreamCounter++;
                    return result;
                }

                @Override
                protected void setReader(final Reader reader) {
                }
            };
        }
    };
    Input[] keys = new Input[] { new Input("ab xc", 50), new Input("ba xd", 50) };
    Directory tempDir = getDirectory();
    FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", analyzer);
    suggester.build(new InputArrayIterator(keys));
    List<LookupResult> results = suggester.lookup("ab x", false, 1);
    assertTrue(results.size() == 1);
    IOUtils.close(analyzer, tempDir);
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Reader(java.io.Reader) Token(org.apache.lucene.analysis.Token) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Directory(org.apache.lucene.store.Directory)

Example 13 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class TestFreeTextSuggester method toString.

private static String toString(List<LookupResult> results) {
    StringBuilder b = new StringBuilder();
    for (LookupResult result : results) {
        b.append(' ');
        b.append(result.key);
        b.append('/');
        b.append(String.format(Locale.ROOT, "%.2f", ((double) result.value) / Long.MAX_VALUE));
    }
    return b.toString().trim();
}
Also used : LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult)

Example 14 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class TestFreeTextSuggester method testWiki.

@Ignore
public void testWiki() throws Exception {
    final LineFileDocs lfd = new LineFileDocs(null, "/lucenedata/enwiki/enwiki-20120502-lines-1k.txt");
    // Skip header:
    lfd.nextDoc();
    Analyzer analyzer = new MockAnalyzer(random());
    FreeTextSuggester sug = new FreeTextSuggester(analyzer);
    sug.build(new InputIterator() {

        private int count;

        @Override
        public long weight() {
            return 1;
        }

        @Override
        public BytesRef next() {
            Document doc;
            try {
                doc = lfd.nextDoc();
            } catch (IOException ioe) {
                throw new RuntimeException(ioe);
            }
            if (doc == null) {
                return null;
            }
            if (count++ == 10000) {
                return null;
            }
            return new BytesRef(doc.get("body"));
        }

        @Override
        public BytesRef payload() {
            return null;
        }

        @Override
        public boolean hasPayloads() {
            return false;
        }

        @Override
        public Set<BytesRef> contexts() {
            return null;
        }

        @Override
        public boolean hasContexts() {
            return false;
        }
    });
    if (VERBOSE) {
        System.out.println(sug.ramBytesUsed() + " bytes");
        List<LookupResult> results = sug.lookup("general r", 10);
        System.out.println("results:");
        for (LookupResult result : results) {
            System.out.println("  " + result);
        }
    }
    analyzer.close();
}
Also used : CharArraySet(org.apache.lucene.analysis.CharArraySet) HashSet(java.util.HashSet) Set(java.util.Set) IOException(java.io.IOException) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) InputIterator(org.apache.lucene.search.suggest.InputIterator) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) BytesRef(org.apache.lucene.util.BytesRef) LineFileDocs(org.apache.lucene.util.LineFileDocs) Ignore(org.junit.Ignore)

Example 15 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class FuzzySuggesterTest method testRandom2.

public void testRandom2() throws Throwable {
    final int NUM = atLeast(200);
    final List<Input> answers = new ArrayList<>();
    final Set<String> seen = new HashSet<>();
    for (int i = 0; i < NUM; i++) {
        final String s = randomSimpleString(8);
        if (!seen.contains(s)) {
            answers.add(new Input(s, random().nextInt(1000)));
            seen.add(s);
        }
    }
    Collections.sort(answers, new Comparator<Input>() {

        @Override
        public int compare(Input a, Input b) {
            return a.term.compareTo(b.term);
        }
    });
    if (VERBOSE) {
        System.out.println("\nTEST: targets");
        for (Input tf : answers) {
            System.out.println("  " + tf.term.utf8ToString() + " freq=" + tf.v);
        }
    }
    Analyzer a = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
    int maxEdits = random().nextBoolean() ? 1 : 2;
    int prefixLen = random().nextInt(4);
    boolean transpositions = random().nextBoolean();
    // TODO: test graph analyzers
    // TODO: test exactFirst / preserveSep permutations
    Directory tempDir = getDirectory();
    FuzzySuggester suggest = new FuzzySuggester(tempDir, "fuzzy", a, a, 0, 256, -1, true, maxEdits, transpositions, prefixLen, prefixLen, false);
    if (VERBOSE) {
        System.out.println("TEST: maxEdits=" + maxEdits + " prefixLen=" + prefixLen + " transpositions=" + transpositions + " num=" + NUM);
    }
    Collections.shuffle(answers, random());
    suggest.build(new InputArrayIterator(answers.toArray(new Input[answers.size()])));
    final int ITERS = atLeast(100);
    for (int iter = 0; iter < ITERS; iter++) {
        final String frag = randomSimpleString(6);
        if (VERBOSE) {
            System.out.println("\nTEST: iter frag=" + frag);
        }
        final List<LookupResult> expected = slowFuzzyMatch(prefixLen, maxEdits, transpositions, answers, frag);
        if (VERBOSE) {
            System.out.println("  expected: " + expected.size());
            for (LookupResult c : expected) {
                System.out.println("    " + c);
            }
        }
        final List<LookupResult> actual = suggest.lookup(frag, false, NUM);
        if (VERBOSE) {
            System.out.println("  actual: " + actual.size());
            for (LookupResult c : actual) {
                System.out.println("    " + c);
            }
        }
        Collections.sort(actual, new CompareByCostThenAlpha());
        final int limit = Math.min(expected.size(), actual.size());
        for (int ans = 0; ans < limit; ans++) {
            final LookupResult c0 = expected.get(ans);
            final LookupResult c1 = actual.get(ans);
            assertEquals("expected " + c0.key + " but got " + c1.key, 0, CHARSEQUENCE_COMPARATOR.compare(c0.key, c1.key));
            assertEquals(c0.value, c1.value);
        }
        assertEquals(expected.size(), actual.size());
    }
    IOUtils.close(a, tempDir);
}
Also used : ArrayList(java.util.ArrayList) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) HashSet(java.util.HashSet) Directory(org.apache.lucene.store.Directory)

Aggregations

LookupResult (org.apache.lucene.search.suggest.Lookup.LookupResult)65 Input (org.apache.lucene.search.suggest.Input)48 InputArrayIterator (org.apache.lucene.search.suggest.InputArrayIterator)48 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)45 Analyzer (org.apache.lucene.analysis.Analyzer)43 Directory (org.apache.lucene.store.Directory)36 BytesRef (org.apache.lucene.util.BytesRef)22 ArrayList (java.util.ArrayList)14 Path (java.nio.file.Path)11 HashSet (java.util.HashSet)9 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)7 Reader (java.io.Reader)6 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)6 Tokenizer (org.apache.lucene.analysis.Tokenizer)6 HashMap (java.util.HashMap)5 Token (org.apache.lucene.analysis.Token)5 TokenStream (org.apache.lucene.analysis.TokenStream)5 LinkedList (java.util.LinkedList)4 CharArraySet (org.apache.lucene.analysis.CharArraySet)4 SuggesterResult (org.apache.solr.spelling.suggest.SuggesterResult)4