Search in sources :

Example 1 with Input

use of org.apache.lucene.search.suggest.Input in project lucene-solr by apache.

the class WFSTCompletionTest method testRandom.

public void testRandom() throws Exception {
    int numWords = atLeast(1000);
    final TreeMap<String, Long> slowCompletor = new TreeMap<>();
    final TreeSet<String> allPrefixes = new TreeSet<>();
    Input[] keys = new Input[numWords];
    for (int i = 0; i < numWords; i++) {
        String s;
        while (true) {
            // TODO: would be nice to fix this slowCompletor/comparator to
            // use full range, but we might lose some coverage too...
            s = TestUtil.randomSimpleString(random());
            if (!slowCompletor.containsKey(s)) {
                break;
            }
        }
        for (int j = 1; j < s.length(); j++) {
            allPrefixes.add(s.substring(0, j));
        }
        // we can probably do Integer.MAX_VALUE here, but why worry.
        int weight = random().nextInt(1 << 24);
        slowCompletor.put(s, (long) weight);
        keys[i] = new Input(s, weight);
    }
    Directory tempDir = getDirectory();
    WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false);
    suggester.build(new InputArrayIterator(keys));
    assertEquals(numWords, suggester.getCount());
    Random random = new Random(random().nextLong());
    for (String prefix : allPrefixes) {
        final int topN = TestUtil.nextInt(random, 1, 10);
        List<LookupResult> r = suggester.lookup(TestUtil.stringToCharSequence(prefix, random), false, topN);
        // 2. go thru whole treemap (slowCompletor) and check it's actually the best suggestion
        final List<LookupResult> matches = new ArrayList<>();
        // TODO: could be faster... but it's slowCompletor for a reason
        for (Map.Entry<String, Long> e : slowCompletor.entrySet()) {
            if (e.getKey().startsWith(prefix)) {
                matches.add(new LookupResult(e.getKey(), e.getValue().longValue()));
            }
        }
        assertTrue(matches.size() > 0);
        Collections.sort(matches, new Comparator<LookupResult>() {

            @Override
            public int compare(LookupResult left, LookupResult right) {
                int cmp = Float.compare(right.value, left.value);
                if (cmp == 0) {
                    return left.compareTo(right);
                } else {
                    return cmp;
                }
            }
        });
        if (matches.size() > topN) {
            matches.subList(topN, matches.size()).clear();
        }
        assertEquals(matches.size(), r.size());
        for (int hit = 0; hit < r.size(); hit++) {
            //System.out.println("  check hit " + hit);
            assertEquals(matches.get(hit).key.toString(), r.get(hit).key.toString());
            assertEquals(matches.get(hit).value, r.get(hit).value, 0f);
        }
    }
    tempDir.close();
}
Also used : Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Directory(org.apache.lucene.store.Directory)

Example 2 with Input

use of org.apache.lucene.search.suggest.Input in project lucene-solr by apache.

the class WFSTCompletionTest method test0ByteKeys.

public void test0ByteKeys() throws Exception {
    BytesRef key1 = new BytesRef(4);
    key1.length = 4;
    BytesRef key2 = new BytesRef(3);
    key1.length = 3;
    Directory tempDir = getDirectory();
    WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false);
    suggester.build(new InputArrayIterator(new Input[] { new Input(key1, 50), new Input(key2, 50) }));
    tempDir.close();
}
Also used : Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 3 with Input

use of org.apache.lucene.search.suggest.Input in project lucene-solr by apache.

the class WFSTCompletionTest method testEmpty.

public void testEmpty() throws Exception {
    Directory tempDir = getDirectory();
    WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false);
    suggester.build(new InputArrayIterator(new Input[0]));
    assertEquals(0, suggester.getCount());
    List<LookupResult> result = suggester.lookup("a", false, 20);
    assertTrue(result.isEmpty());
    tempDir.close();
}
Also used : Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Directory(org.apache.lucene.store.Directory)

Example 4 with Input

use of org.apache.lucene.search.suggest.Input in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testDupSurfaceFormsMissingResults2.

public void testDupSurfaceFormsMissingResults2() throws Exception {
    Analyzer a = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            return new TokenStreamComponents(tokenizer) {

                int count;

                @Override
                public TokenStream getTokenStream() {
                    if (count == 0) {
                        count++;
                        return new CannedTokenStream(new Token[] { token("p", 1, 1), token("q", 1, 1), token("r", 0, 1), token("s", 0, 1) });
                    } else {
                        return new CannedTokenStream(new Token[] { token("p", 1, 1) });
                    }
                }

                @Override
                protected void setReader(final Reader reader) {
                }
            };
        }
    };
    Directory tempDir = getDirectory();
    AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, -1, true);
    suggester.build(new InputArrayIterator(new Input[] { new Input("a", 6), new Input("b", 5) }));
    List<LookupResult> results = suggester.lookup("a", false, 2);
    assertEquals(2, results.size());
    assertEquals("a", results.get(0).key);
    assertEquals(6, results.get(0).value);
    assertEquals("b", results.get(1).key);
    assertEquals(5, results.get(1).value);
    // Try again after save/load:
    Path tmpDir = createTempDir("AnalyzingSuggesterTest");
    Path path = tmpDir.resolve("suggester");
    OutputStream os = Files.newOutputStream(path);
    suggester.store(os);
    os.close();
    InputStream is = Files.newInputStream(path);
    suggester.load(is);
    is.close();
    results = suggester.lookup("a", false, 2);
    assertEquals(2, results.size());
    assertEquals("a", results.get(0).key);
    assertEquals(6, results.get(0).value);
    assertEquals("b", results.get(1).key);
    assertEquals(5, results.get(1).value);
    IOUtils.close(a, tempDir);
}
Also used : Path(java.nio.file.Path) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) Reader(java.io.Reader) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Directory(org.apache.lucene.store.Directory)

Example 5 with Input

use of org.apache.lucene.search.suggest.Input in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testKeyword.

/** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */
public void testKeyword() throws Exception {
    Iterable<Input> keys = shuffle(new Input("foo", 50), new Input("bar", 10), new Input("barbar", 10), new Input("barbar", 12), new Input("barbara", 6), new Input("bar", 5), new Input("barbara", 1));
    Directory tempDir = getDirectory();
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
    AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
    suggester.build(new InputArrayIterator(keys));
    // top N of 2, but only foo is available
    List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("f", random()), false, 2);
    assertEquals(1, results.size());
    assertEquals("foo", results.get(0).key.toString());
    assertEquals(50, results.get(0).value, 0.01F);
    // top N of 1 for 'bar': we return this even though
    // barbar is higher because exactFirst is enabled:
    results = suggester.lookup(TestUtil.stringToCharSequence("bar", random()), false, 1);
    assertEquals(1, results.size());
    assertEquals("bar", results.get(0).key.toString());
    assertEquals(10, results.get(0).value, 0.01F);
    // top N Of 2 for 'b'
    results = suggester.lookup(TestUtil.stringToCharSequence("b", random()), false, 2);
    assertEquals(2, results.size());
    assertEquals("barbar", results.get(0).key.toString());
    assertEquals(12, results.get(0).value, 0.01F);
    assertEquals("bar", results.get(1).key.toString());
    assertEquals(10, results.get(1).value, 0.01F);
    // top N of 3 for 'ba'
    results = suggester.lookup(TestUtil.stringToCharSequence("ba", random()), false, 3);
    assertEquals(3, results.size());
    assertEquals("barbar", results.get(0).key.toString());
    assertEquals(12, results.get(0).value, 0.01F);
    assertEquals("bar", results.get(1).key.toString());
    assertEquals(10, results.get(1).value, 0.01F);
    assertEquals("barbara", results.get(2).key.toString());
    assertEquals(6, results.get(2).value, 0.01F);
    IOUtils.close(analyzer, tempDir);
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Directory(org.apache.lucene.store.Directory)

Aggregations

Input (org.apache.lucene.search.suggest.Input)74 InputArrayIterator (org.apache.lucene.search.suggest.InputArrayIterator)71 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)65 Analyzer (org.apache.lucene.analysis.Analyzer)63 LookupResult (org.apache.lucene.search.suggest.Lookup.LookupResult)48 Directory (org.apache.lucene.store.Directory)42 BytesRef (org.apache.lucene.util.BytesRef)27 Path (java.nio.file.Path)16 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)11 ArrayList (java.util.ArrayList)10 Tokenizer (org.apache.lucene.analysis.Tokenizer)10 Reader (java.io.Reader)8 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)8 TokenStream (org.apache.lucene.analysis.TokenStream)6 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)6 HashSet (java.util.HashSet)5 CharArraySet (org.apache.lucene.analysis.CharArraySet)5 Token (org.apache.lucene.analysis.Token)5 InputStream (java.io.InputStream)4 OutputStream (java.io.OutputStream)4