Search in sources :

Example 31 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class FuzzySuggesterTest method testRandomEdits.

public void testRandomEdits() throws IOException {
    List<Input> keys = new ArrayList<>();
    int numTerms = atLeast(100);
    for (int i = 0; i < numTerms; i++) {
        keys.add(new Input("boo" + TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
    }
    keys.add(new Input("foo bar boo far", 12));
    MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
    Directory tempDir = getDirectory();
    FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, 0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE);
    suggester.build(new InputArrayIterator(keys));
    int numIters = atLeast(10);
    for (int i = 0; i < numIters; i++) {
        String addRandomEdit = addRandomEdit("foo bar boo", FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX);
        List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence(addRandomEdit, random()), false, 2);
        assertEquals(addRandomEdit, 1, results.size());
        assertEquals("foo bar boo far", results.get(0).key.toString());
        assertEquals(12, results.get(0).value, 0.01F);
    }
    IOUtils.close(analyzer, tempDir);
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) ArrayList(java.util.ArrayList) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Directory(org.apache.lucene.store.Directory)

Example 32 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class FuzzySuggesterTest method slowFuzzyMatch.

private List<LookupResult> slowFuzzyMatch(int prefixLen, int maxEdits, boolean allowTransposition, List<Input> answers, String frag) {
    final List<LookupResult> results = new ArrayList<>();
    final int fragLen = frag.length();
    for (Input tf : answers) {
        //System.out.println("  check s=" + tf.term.utf8ToString());
        boolean prefixMatches = true;
        for (int i = 0; i < prefixLen; i++) {
            if (i == fragLen) {
                // Prefix still matches:
                break;
            }
            if (i == tf.term.length || tf.term.bytes[i] != (byte) frag.charAt(i)) {
                prefixMatches = false;
                break;
            }
        }
        if (prefixMatches) {
            final int len = tf.term.length;
            if (len >= fragLen - maxEdits) {
                // OK it's possible:
                //System.out.println("    possible");
                int d;
                final String s = tf.term.utf8ToString();
                if (fragLen == prefixLen) {
                    d = 0;
                } else if (false && len < fragLen) {
                    d = getDistance(frag, s, allowTransposition);
                } else {
                    //System.out.println("    try loop");
                    d = maxEdits + 1;
                    //for(int ed=-maxEdits;ed<=maxEdits;ed++) {
                    for (int ed = -maxEdits; ed <= maxEdits; ed++) {
                        if (s.length() < fragLen - ed) {
                            continue;
                        }
                        String check = s.substring(0, fragLen - ed);
                        d = getDistance(frag, check, allowTransposition);
                        //System.out.println("    sub check s=" + check + " d=" + d);
                        if (d <= maxEdits) {
                            break;
                        }
                    }
                }
                if (d <= maxEdits) {
                    results.add(new LookupResult(tf.term.utf8ToString(), tf.v));
                }
            }
        }
        Collections.sort(results, new CompareByCostThenAlpha());
    }
    return results;
}
Also used : Input(org.apache.lucene.search.suggest.Input) ArrayList(java.util.ArrayList) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult)

Example 33 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class FuzzySuggesterTest method testNoSeps.

public void testNoSeps() throws Exception {
    Input[] keys = new Input[] { new Input("ab cd", 0), new Input("abcd", 1) };
    int options = 0;
    Analyzer a = new MockAnalyzer(random());
    Directory tempDir = getDirectory();
    FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", a, a, options, 256, -1, true, 1, true, 1, 3, false);
    suggester.build(new InputArrayIterator(keys));
    // TODO: would be nice if "ab " would allow the test to
    // pass, and more generally if the analyzer can know
    // that the user's current query has ended at a word, 
    // but, analyzers don't produce SEP tokens!
    List<LookupResult> r = suggester.lookup(TestUtil.stringToCharSequence("ab c", random()), false, 2);
    assertEquals(2, r.size());
    // With no PRESERVE_SEPS specified, "ab c" should also
    // complete to "abcd", which has higher weight so should
    // appear first:
    assertEquals("abcd", r.get(0).key.toString());
    IOUtils.close(a, tempDir);
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Directory(org.apache.lucene.store.Directory)

Example 34 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class FuzzySuggesterTest method testStandard.

/**
   * basic "standardanalyzer" test with stopword removal
   */
public void testStandard() throws Exception {
    Input[] keys = new Input[] { new Input("the ghost of christmas past", 50) };
    Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
    Directory tempDir = getDirectory();
    FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", standard, standard, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, false, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE);
    suggester.build(new InputArrayIterator(keys));
    List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
    assertEquals(1, results.size());
    assertEquals("the ghost of christmas past", results.get(0).key.toString());
    assertEquals(50, results.get(0).value, 0.01F);
    // omit the 'the' since it's a stopword, it's suggested anyway
    results = suggester.lookup(TestUtil.stringToCharSequence("ghost of chris", random()), false, 1);
    assertEquals(1, results.size());
    assertEquals("the ghost of christmas past", results.get(0).key.toString());
    assertEquals(50, results.get(0).value, 0.01F);
    // omit the 'the' and 'of' since they are stopwords, it's suggested anyway
    results = suggester.lookup(TestUtil.stringToCharSequence("ghost chris", random()), false, 1);
    assertEquals(1, results.size());
    assertEquals("the ghost of christmas past", results.get(0).key.toString());
    assertEquals(50, results.get(0).value, 0.01F);
    IOUtils.close(standard, tempDir);
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Directory(org.apache.lucene.store.Directory)

Example 35 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class FuzzySuggesterTest method testNonLatinRandomEdits.

public void testNonLatinRandomEdits() throws IOException {
    List<Input> keys = new ArrayList<>();
    int numTerms = atLeast(100);
    for (int i = 0; i < numTerms; i++) {
        keys.add(new Input("буу" + TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
    }
    keys.add(new Input("фуу бар буу фар", 12));
    MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
    Directory tempDir = getDirectory();
    FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, 0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, true);
    suggester.build(new InputArrayIterator(keys));
    int numIters = atLeast(10);
    for (int i = 0; i < numIters; i++) {
        String addRandomEdit = addRandomEdit("фуу бар буу", 0);
        List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence(addRandomEdit, random()), false, 2);
        assertEquals(addRandomEdit, 1, results.size());
        assertEquals("фуу бар буу фар", results.get(0).key.toString());
        assertEquals(12, results.get(0).value, 0.01F);
    }
    IOUtils.close(analyzer, tempDir);
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) ArrayList(java.util.ArrayList) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Directory(org.apache.lucene.store.Directory)

Aggregations

LookupResult (org.apache.lucene.search.suggest.Lookup.LookupResult)65 Input (org.apache.lucene.search.suggest.Input)48 InputArrayIterator (org.apache.lucene.search.suggest.InputArrayIterator)48 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)45 Analyzer (org.apache.lucene.analysis.Analyzer)43 Directory (org.apache.lucene.store.Directory)36 BytesRef (org.apache.lucene.util.BytesRef)22 ArrayList (java.util.ArrayList)14 Path (java.nio.file.Path)11 HashSet (java.util.HashSet)9 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)7 Reader (java.io.Reader)6 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)6 Tokenizer (org.apache.lucene.analysis.Tokenizer)6 HashMap (java.util.HashMap)5 Token (org.apache.lucene.analysis.Token)5 TokenStream (org.apache.lucene.analysis.TokenStream)5 LinkedList (java.util.LinkedList)4 CharArraySet (org.apache.lucene.analysis.CharArraySet)4 SuggesterResult (org.apache.solr.spelling.suggest.SuggesterResult)4