Search in sources :

Example 1 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class WFSTCompletionTest method testRandom.

public void testRandom() throws Exception {
    int numWords = atLeast(1000);
    final TreeMap<String, Long> slowCompletor = new TreeMap<>();
    final TreeSet<String> allPrefixes = new TreeSet<>();
    Input[] keys = new Input[numWords];
    for (int i = 0; i < numWords; i++) {
        String s;
        while (true) {
            // TODO: would be nice to fix this slowCompletor/comparator to
            // use full range, but we might lose some coverage too...
            s = TestUtil.randomSimpleString(random());
            if (!slowCompletor.containsKey(s)) {
                break;
            }
        }
        for (int j = 1; j < s.length(); j++) {
            allPrefixes.add(s.substring(0, j));
        }
        // we can probably do Integer.MAX_VALUE here, but why worry.
        int weight = random().nextInt(1 << 24);
        slowCompletor.put(s, (long) weight);
        keys[i] = new Input(s, weight);
    }
    Directory tempDir = getDirectory();
    WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false);
    suggester.build(new InputArrayIterator(keys));
    assertEquals(numWords, suggester.getCount());
    Random random = new Random(random().nextLong());
    for (String prefix : allPrefixes) {
        final int topN = TestUtil.nextInt(random, 1, 10);
        List<LookupResult> r = suggester.lookup(TestUtil.stringToCharSequence(prefix, random), false, topN);
        // 2. go thru whole treemap (slowCompletor) and check it's actually the best suggestion
        final List<LookupResult> matches = new ArrayList<>();
        // TODO: could be faster... but it's slowCompletor for a reason
        for (Map.Entry<String, Long> e : slowCompletor.entrySet()) {
            if (e.getKey().startsWith(prefix)) {
                matches.add(new LookupResult(e.getKey(), e.getValue().longValue()));
            }
        }
        assertTrue(matches.size() > 0);
        Collections.sort(matches, new Comparator<LookupResult>() {

            @Override
            public int compare(LookupResult left, LookupResult right) {
                int cmp = Float.compare(right.value, left.value);
                if (cmp == 0) {
                    return left.compareTo(right);
                } else {
                    return cmp;
                }
            }
        });
        if (matches.size() > topN) {
            matches.subList(topN, matches.size()).clear();
        }
        assertEquals(matches.size(), r.size());
        for (int hit = 0; hit < r.size(); hit++) {
            //System.out.println("  check hit " + hit);
            assertEquals(matches.get(hit).key.toString(), r.get(hit).key.toString());
            assertEquals(matches.get(hit).value, r.get(hit).value, 0f);
        }
    }
    tempDir.close();
}
Also used : Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Directory(org.apache.lucene.store.Directory)

Example 2 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class FSTCompletionTest method testRandom.

public void testRandom() throws Exception {
    List<Input> freqs = new ArrayList<>();
    Random rnd = random();
    for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) {
        int weight = rnd.nextInt(100);
        freqs.add(new Input("" + rnd.nextLong(), weight));
    }
    Directory tempDir = getDirectory();
    FSTCompletionLookup lookup = new FSTCompletionLookup(tempDir, "fst");
    lookup.build(new InputArrayIterator(freqs.toArray(new Input[freqs.size()])));
    for (Input tf : freqs) {
        final String term = tf.term.utf8ToString();
        for (int i = 1; i < term.length(); i++) {
            String prefix = term.substring(0, i);
            for (LookupResult lr : lookup.lookup(stringToCharSequence(prefix), true, 10)) {
                assertTrue(lr.key.toString().startsWith(prefix));
            }
        }
    }
    tempDir.close();
}
Also used : LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Directory(org.apache.lucene.store.Directory)

Example 3 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class WFSTCompletionTest method testEmpty.

public void testEmpty() throws Exception {
    Directory tempDir = getDirectory();
    WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false);
    suggester.build(new InputArrayIterator(new Input[0]));
    assertEquals(0, suggester.getCount());
    List<LookupResult> result = suggester.lookup("a", false, 20);
    assertTrue(result.isEmpty());
    tempDir.close();
}
Also used : Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Directory(org.apache.lucene.store.Directory)

Example 4 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testDupSurfaceFormsMissingResults2.

public void testDupSurfaceFormsMissingResults2() throws Exception {
    Analyzer a = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            return new TokenStreamComponents(tokenizer) {

                int count;

                @Override
                public TokenStream getTokenStream() {
                    if (count == 0) {
                        count++;
                        return new CannedTokenStream(new Token[] { token("p", 1, 1), token("q", 1, 1), token("r", 0, 1), token("s", 0, 1) });
                    } else {
                        return new CannedTokenStream(new Token[] { token("p", 1, 1) });
                    }
                }

                @Override
                protected void setReader(final Reader reader) {
                }
            };
        }
    };
    Directory tempDir = getDirectory();
    AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, -1, true);
    suggester.build(new InputArrayIterator(new Input[] { new Input("a", 6), new Input("b", 5) }));
    List<LookupResult> results = suggester.lookup("a", false, 2);
    assertEquals(2, results.size());
    assertEquals("a", results.get(0).key);
    assertEquals(6, results.get(0).value);
    assertEquals("b", results.get(1).key);
    assertEquals(5, results.get(1).value);
    // Try again after save/load:
    Path tmpDir = createTempDir("AnalyzingSuggesterTest");
    Path path = tmpDir.resolve("suggester");
    OutputStream os = Files.newOutputStream(path);
    suggester.store(os);
    os.close();
    InputStream is = Files.newInputStream(path);
    suggester.load(is);
    is.close();
    results = suggester.lookup("a", false, 2);
    assertEquals(2, results.size());
    assertEquals("a", results.get(0).key);
    assertEquals(6, results.get(0).value);
    assertEquals("b", results.get(1).key);
    assertEquals(5, results.get(1).value);
    IOUtils.close(a, tempDir);
}
Also used : Path(java.nio.file.Path) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) Reader(java.io.Reader) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Directory(org.apache.lucene.store.Directory)

Example 5 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testKeyword.

/** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */
public void testKeyword() throws Exception {
    Iterable<Input> keys = shuffle(new Input("foo", 50), new Input("bar", 10), new Input("barbar", 10), new Input("barbar", 12), new Input("barbara", 6), new Input("bar", 5), new Input("barbara", 1));
    Directory tempDir = getDirectory();
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
    AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
    suggester.build(new InputArrayIterator(keys));
    // top N of 2, but only foo is available
    List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("f", random()), false, 2);
    assertEquals(1, results.size());
    assertEquals("foo", results.get(0).key.toString());
    assertEquals(50, results.get(0).value, 0.01F);
    // top N of 1 for 'bar': we return this even though
    // barbar is higher because exactFirst is enabled:
    results = suggester.lookup(TestUtil.stringToCharSequence("bar", random()), false, 1);
    assertEquals(1, results.size());
    assertEquals("bar", results.get(0).key.toString());
    assertEquals(10, results.get(0).value, 0.01F);
    // top N Of 2 for 'b'
    results = suggester.lookup(TestUtil.stringToCharSequence("b", random()), false, 2);
    assertEquals(2, results.size());
    assertEquals("barbar", results.get(0).key.toString());
    assertEquals(12, results.get(0).value, 0.01F);
    assertEquals("bar", results.get(1).key.toString());
    assertEquals(10, results.get(1).value, 0.01F);
    // top N of 3 for 'ba'
    results = suggester.lookup(TestUtil.stringToCharSequence("ba", random()), false, 3);
    assertEquals(3, results.size());
    assertEquals("barbar", results.get(0).key.toString());
    assertEquals(12, results.get(0).value, 0.01F);
    assertEquals("bar", results.get(1).key.toString());
    assertEquals(10, results.get(1).value, 0.01F);
    assertEquals("barbara", results.get(2).key.toString());
    assertEquals(6, results.get(2).value, 0.01F);
    IOUtils.close(analyzer, tempDir);
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Directory(org.apache.lucene.store.Directory)

Aggregations

LookupResult (org.apache.lucene.search.suggest.Lookup.LookupResult)65 Input (org.apache.lucene.search.suggest.Input)48 InputArrayIterator (org.apache.lucene.search.suggest.InputArrayIterator)48 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)45 Analyzer (org.apache.lucene.analysis.Analyzer)43 Directory (org.apache.lucene.store.Directory)36 BytesRef (org.apache.lucene.util.BytesRef)22 ArrayList (java.util.ArrayList)14 Path (java.nio.file.Path)11 HashSet (java.util.HashSet)9 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)7 Reader (java.io.Reader)6 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)6 Tokenizer (org.apache.lucene.analysis.Tokenizer)6 HashMap (java.util.HashMap)5 Token (org.apache.lucene.analysis.Token)5 TokenStream (org.apache.lucene.analysis.TokenStream)5 LinkedList (java.util.LinkedList)4 CharArraySet (org.apache.lucene.analysis.CharArraySet)4 SuggesterResult (org.apache.solr.spelling.suggest.SuggesterResult)4