Search in sources :

Example 41 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class BlendedInfixSuggesterTest method testBlendingType.

/**
   * Verify the different flavours of the blender types
   */
public void testBlendingType() throws IOException {
    BytesRef pl = new BytesRef("lake");
    long w = 20;
    Input[] keys = new Input[] { new Input("top of the lake", w, pl) };
    Path tempDir = createTempDir("BlendedInfixSuggesterTest");
    Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
    // BlenderType.LINEAR is used by default (remove position*10%)
    BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a);
    suggester.build(new InputArrayIterator(keys));
    assertEquals(w, getInResults(suggester, "top", pl, 1));
    assertEquals((int) (w * (1 - 0.10 * 2)), getInResults(suggester, "the", pl, 1));
    assertEquals((int) (w * (1 - 0.10 * 3)), getInResults(suggester, "lake", pl, 1));
    suggester.close();
    // BlenderType.RECIPROCAL is using 1/(1+p) * w where w is weight and p the position of the word
    suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a, AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1, false);
    suggester.build(new InputArrayIterator(keys));
    assertEquals(w, getInResults(suggester, "top", pl, 1));
    assertEquals((int) (w * 1 / (1 + 2)), getInResults(suggester, "the", pl, 1));
    assertEquals((int) (w * 1 / (1 + 3)), getInResults(suggester, "lake", pl, 1));
    suggester.close();
    // BlenderType.EXPONENTIAL_RECIPROCAL is using 1/(pow(1+p, exponent)) * w where w is weight and p the position of the word
    suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a, AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_EXPONENTIAL_RECIPROCAL, 1, 4.0, false, true, false);
    suggester.build(new InputArrayIterator(keys));
    assertEquals(w, getInResults(suggester, "top", pl, 1));
    assertEquals((int) (w * 1 / (Math.pow(1 + 2, 4.0))), getInResults(suggester, "the", pl, 1));
    assertEquals((int) (w * 1 / (Math.pow(1 + 3, 4.0))), getInResults(suggester, "lake", pl, 1));
    suggester.close();
}
Also used : Path(java.nio.file.Path) Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) BytesRef(org.apache.lucene.util.BytesRef)

Example 42 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class BlendedInfixSuggesterTest method testRequiresMore.

/**
   * Assert that the factor is important to get results that might be lower in term of weight but
   * would be pushed up after the blending transformation
   */
public void testRequiresMore() throws IOException {
    BytesRef lake = new BytesRef("lake");
    BytesRef star = new BytesRef("star");
    BytesRef ret = new BytesRef("ret");
    Input[] keys = new Input[] { new Input("top of the lake", 18, lake), new Input("star wars: episode v - the empire strikes back", 12, star), new Input("the returned", 10, ret) };
    Path tempDir = createTempDir("BlendedInfixSuggesterTest");
    Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
    // if factor is small, we don't get the expected element
    BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a, AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 1, false);
    suggester.build(new InputArrayIterator(keys));
    // we don't find it for in the 2 first
    assertEquals(2, suggester.lookup("the", 2, true, false).size());
    long w0 = getInResults(suggester, "the", ret, 2);
    assertTrue(w0 < 0);
    // but it's there if we search for 3 elements
    assertEquals(3, suggester.lookup("the", 3, true, false).size());
    long w1 = getInResults(suggester, "the", ret, 3);
    assertTrue(w1 > 0);
    suggester.close();
    // if we increase the factor we have it
    suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a, AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL, 2, false);
    suggester.build(new InputArrayIterator(keys));
    // we have it
    long w2 = getInResults(suggester, "the", ret, 2);
    assertTrue(w2 > 0);
    // but we don't have the other
    long w3 = getInResults(suggester, "the", star, 2);
    assertTrue(w3 < 0);
    suggester.close();
}
Also used : Path(java.nio.file.Path) Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) BytesRef(org.apache.lucene.util.BytesRef)

Example 43 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class FuzzySuggesterTest method testNoSeps.

public void testNoSeps() throws Exception {
    Input[] keys = new Input[] { new Input("ab cd", 0), new Input("abcd", 1) };
    int options = 0;
    Analyzer a = new MockAnalyzer(random());
    Directory tempDir = getDirectory();
    FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", a, a, options, 256, -1, true, 1, true, 1, 3, false);
    suggester.build(new InputArrayIterator(keys));
    // TODO: would be nice if "ab " would allow the test to
    // pass, and more generally if the analyzer can know
    // that the user's current query has ended at a word, 
    // but, analyzers don't produce SEP tokens!
    List<LookupResult> r = suggester.lookup(TestUtil.stringToCharSequence("ab c", random()), false, 2);
    assertEquals(2, r.size());
    // With no PRESERVE_SEPS specified, "ab c" should also
    // complete to "abcd", which has higher weight so should
    // appear first:
    assertEquals("abcd", r.get(0).key.toString());
    IOUtils.close(a, tempDir);
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Directory(org.apache.lucene.store.Directory)

Example 44 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class FuzzySuggesterTest method testStandard.

/**
   * basic "standardanalyzer" test with stopword removal
   */
public void testStandard() throws Exception {
    Input[] keys = new Input[] { new Input("the ghost of christmas past", 50) };
    Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
    Directory tempDir = getDirectory();
    FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", standard, standard, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, false, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE);
    suggester.build(new InputArrayIterator(keys));
    List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
    assertEquals(1, results.size());
    assertEquals("the ghost of christmas past", results.get(0).key.toString());
    assertEquals(50, results.get(0).value, 0.01F);
    // omit the 'the' since it's a stopword, it's suggested anyway
    results = suggester.lookup(TestUtil.stringToCharSequence("ghost of chris", random()), false, 1);
    assertEquals(1, results.size());
    assertEquals("the ghost of christmas past", results.get(0).key.toString());
    assertEquals(50, results.get(0).value, 0.01F);
    // omit the 'the' and 'of' since they are stopwords, it's suggested anyway
    results = suggester.lookup(TestUtil.stringToCharSequence("ghost chris", random()), false, 1);
    assertEquals(1, results.size());
    assertEquals("the ghost of christmas past", results.get(0).key.toString());
    assertEquals(50, results.get(0).value, 0.01F);
    IOUtils.close(standard, tempDir);
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Directory(org.apache.lucene.store.Directory)

Example 45 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class FuzzySuggesterTest method testNonLatinRandomEdits.

public void testNonLatinRandomEdits() throws IOException {
    List<Input> keys = new ArrayList<>();
    int numTerms = atLeast(100);
    for (int i = 0; i < numTerms; i++) {
        keys.add(new Input("буу" + TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
    }
    keys.add(new Input("фуу бар буу фар", 12));
    MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
    Directory tempDir = getDirectory();
    FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, 0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, true);
    suggester.build(new InputArrayIterator(keys));
    int numIters = atLeast(10);
    for (int i = 0; i < numIters; i++) {
        String addRandomEdit = addRandomEdit("фуу бар буу", 0);
        List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence(addRandomEdit, random()), false, 2);
        assertEquals(addRandomEdit, 1, results.size());
        assertEquals("фуу бар буу фар", results.get(0).key.toString());
        assertEquals(12, results.get(0).value, 0.01F);
    }
    IOUtils.close(analyzer, tempDir);
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) ArrayList(java.util.ArrayList) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Directory(org.apache.lucene.store.Directory)

Aggregations

InputArrayIterator (org.apache.lucene.search.suggest.InputArrayIterator)76 Input (org.apache.lucene.search.suggest.Input)71 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)67 Analyzer (org.apache.lucene.analysis.Analyzer)65 LookupResult (org.apache.lucene.search.suggest.Lookup.LookupResult)48 Directory (org.apache.lucene.store.Directory)43 BytesRef (org.apache.lucene.util.BytesRef)26 Path (java.nio.file.Path)17 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)11 Tokenizer (org.apache.lucene.analysis.Tokenizer)10 Reader (java.io.Reader)8 ArrayList (java.util.ArrayList)8 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)8 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)7 TokenStream (org.apache.lucene.analysis.TokenStream)6 HashSet (java.util.HashSet)5 CharArraySet (org.apache.lucene.analysis.CharArraySet)5 Token (org.apache.lucene.analysis.Token)5 InputStream (java.io.InputStream)4 OutputStream (java.io.OutputStream)4