Search in sources :

Example 6 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testNoSeps.

public void testNoSeps() throws Exception {
    Input[] keys = new Input[] { new Input("ab cd", 0), new Input("abcd", 1) };
    int options = 0;
    Analyzer a = new MockAnalyzer(random());
    Directory tempDir = getDirectory();
    AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, options, 256, -1, true);
    suggester.build(new InputArrayIterator(keys));
    // TODO: would be nice if "ab " would allow the test to
    // pass, and more generally if the analyzer can know
    // that the user's current query has ended at a word, 
    // but, analyzers don't produce SEP tokens!
    List<LookupResult> r = suggester.lookup(TestUtil.stringToCharSequence("ab c", random()), false, 2);
    assertEquals(2, r.size());
    // With no PRESERVE_SEPS specified, "ab c" should also
    // complete to "abcd", which has higher weight so should
    // appear first:
    assertEquals("abcd", r.get(0).key.toString());
    IOUtils.close(a, tempDir);
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Directory(org.apache.lucene.store.Directory)

Example 7 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testEmpty.

public void testEmpty() throws Exception {
    Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
    Directory tempDir = getDirectory();
    AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", standard);
    suggester.build(new InputArrayIterator(new Input[0]));
    List<LookupResult> result = suggester.lookup("a", false, 20);
    assertTrue(result.isEmpty());
    IOUtils.close(standard, tempDir);
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Directory(org.apache.lucene.store.Directory)

Example 8 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testStandard.

// TODO: more tests
/**
   * basic "standardanalyzer" test with stopword removal
   */
public void testStandard() throws Exception {
    Input[] keys = new Input[] { new Input("the ghost of christmas past", 50) };
    Directory tempDir = getDirectory();
    Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
    AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", standard, standard, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, false);
    suggester.build(new InputArrayIterator(keys));
    List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
    assertEquals(1, results.size());
    assertEquals("the ghost of christmas past", results.get(0).key.toString());
    assertEquals(50, results.get(0).value, 0.01F);
    // omit the 'the' since it's a stopword, it's suggested anyway
    results = suggester.lookup(TestUtil.stringToCharSequence("ghost of chris", random()), false, 1);
    assertEquals(1, results.size());
    assertEquals("the ghost of christmas past", results.get(0).key.toString());
    assertEquals(50, results.get(0).value, 0.01F);
    // omit the 'the' and 'of' since they are stopwords, it's suggested anyway
    results = suggester.lookup(TestUtil.stringToCharSequence("ghost chris", random()), false, 1);
    assertEquals(1, results.size());
    assertEquals("the ghost of christmas past", results.get(0).key.toString());
    assertEquals(50, results.get(0).value, 0.01F);
    IOUtils.close(standard, tempDir);
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Directory(org.apache.lucene.store.Directory)

Example 9 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testExactFirstMissingResult.

public void testExactFirstMissingResult() throws Exception {
    Analyzer a = new MockAnalyzer(random());
    Directory tempDir = getDirectory();
    AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1, true);
    suggester.build(new InputArrayIterator(new Input[] { new Input("a", 5), new Input("a b", 3), new Input("a c", 4) }));
    assertEquals(3, suggester.getCount());
    List<LookupResult> results = suggester.lookup("a", false, 3);
    assertEquals(3, results.size());
    assertEquals("a", results.get(0).key);
    assertEquals(5, results.get(0).value);
    assertEquals("a c", results.get(1).key);
    assertEquals(4, results.get(1).value);
    assertEquals("a b", results.get(2).key);
    assertEquals(3, results.get(2).value);
    // Try again after save/load:
    Path tmpDir = createTempDir("AnalyzingSuggesterTest");
    Path path = tmpDir.resolve("suggester");
    OutputStream os = Files.newOutputStream(path);
    suggester.store(os);
    os.close();
    InputStream is = Files.newInputStream(path);
    suggester.load(is);
    is.close();
    assertEquals(3, suggester.getCount());
    results = suggester.lookup("a", false, 3);
    assertEquals(3, results.size());
    assertEquals("a", results.get(0).key);
    assertEquals(5, results.get(0).value);
    assertEquals("a c", results.get(1).key);
    assertEquals(4, results.get(1).value);
    assertEquals("a b", results.get(2).key);
    assertEquals(3, results.get(2).value);
    IOUtils.close(a, tempDir);
}
Also used : Path(java.nio.file.Path) Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Directory(org.apache.lucene.store.Directory)

Example 10 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class BlendedInfixSuggesterTest method testBlendedSort.

/**
   * Test the weight transformation depending on the position
   * of the matching term.
   */
public void testBlendedSort() throws IOException {
    BytesRef payload = new BytesRef("star");
    Input[] keys = new Input[] { new Input("star wars: episode v - the empire strikes back", 8, payload) };
    Path tempDir = createTempDir("BlendedInfixSuggesterTest");
    Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
    BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a, AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_LINEAR, BlendedInfixSuggester.DEFAULT_NUM_FACTOR, false);
    suggester.build(new InputArrayIterator(keys));
    // we query for star wars and check that the weight
    // is smaller when we search for tokens that are far from the beginning
    long w0 = getInResults(suggester, "star ", payload, 1);
    long w1 = getInResults(suggester, "war", payload, 1);
    long w2 = getInResults(suggester, "empire ba", payload, 1);
    long w3 = getInResults(suggester, "back", payload, 1);
    long w4 = getInResults(suggester, "bacc", payload, 1);
    assertTrue(w0 > w1);
    assertTrue(w1 > w2);
    assertTrue(w2 > w3);
    assertTrue(w4 < 0);
    suggester.close();
}
Also used : Path(java.nio.file.Path) Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

InputArrayIterator (org.apache.lucene.search.suggest.InputArrayIterator)76 Input (org.apache.lucene.search.suggest.Input)71 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)67 Analyzer (org.apache.lucene.analysis.Analyzer)65 LookupResult (org.apache.lucene.search.suggest.Lookup.LookupResult)48 Directory (org.apache.lucene.store.Directory)43 BytesRef (org.apache.lucene.util.BytesRef)26 Path (java.nio.file.Path)17 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)11 Tokenizer (org.apache.lucene.analysis.Tokenizer)10 Reader (java.io.Reader)8 ArrayList (java.util.ArrayList)8 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)8 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)7 TokenStream (org.apache.lucene.analysis.TokenStream)6 HashSet (java.util.HashSet)5 CharArraySet (org.apache.lucene.analysis.CharArraySet)5 Token (org.apache.lucene.analysis.Token)5 InputStream (java.io.InputStream)4 OutputStream (java.io.OutputStream)4