Search in sources :

Example 26 with Token

use of org.apache.lucene.analysis.Token in project lucene-solr by apache.

the class IndexBasedSpellCheckerTest method testSpelling.

@Test
public void testSpelling() throws Exception {
    IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
    NamedList spellchecker = new NamedList();
    spellchecker.add("classname", IndexBasedSpellChecker.class.getName());
    File indexDir = createTempDir().toFile();
    spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
    spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
    spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
    SolrCore core = h.getCore();
    String dictName = checker.init(spellchecker, core);
    assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME, dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
    RefCounted<SolrIndexSearcher> holder = core.getSearcher();
    SolrIndexSearcher searcher = holder.get();
    try {
        checker.build(core, searcher);
        IndexReader reader = searcher.getIndexReader();
        Collection<Token> tokens = queryConverter.convert("documemt");
        SpellingOptions spellOpts = new SpellingOptions(tokens, reader);
        SpellingResult result = checker.getSuggestions(spellOpts);
        assertTrue("result is null and it shouldn't be", result != null);
        //should be lowercased, b/c we are using a lowercasing analyzer
        Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
        assertTrue("documemt is null and it shouldn't be", suggestions != null);
        assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
        Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
        assertTrue(entry.getKey() + " is not equal to " + "document", entry.getKey().equals("document") == true);
        assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
        //test something not in the spell checker
        spellOpts.tokens = queryConverter.convert("super");
        result = checker.getSuggestions(spellOpts);
        assertTrue("result is null and it shouldn't be", result != null);
        suggestions = result.get(spellOpts.tokens.iterator().next());
        assertTrue("suggestions size should be 0", suggestions.size() == 0);
        //test something that is spelled correctly
        spellOpts.tokens = queryConverter.convert("document");
        result = checker.getSuggestions(spellOpts);
        assertTrue("result is null and it shouldn't be", result != null);
        suggestions = result.get(spellOpts.tokens.iterator().next());
        assertTrue("suggestions is null and it shouldn't be", suggestions == null);
        //Has multiple possibilities, but the exact exists, so that should be returned
        spellOpts.tokens = queryConverter.convert("red");
        spellOpts.count = 2;
        result = checker.getSuggestions(spellOpts);
        assertNotNull(result);
        suggestions = result.get(spellOpts.tokens.iterator().next());
        assertTrue("suggestions is not null and it should be", suggestions == null);
        //Try out something which should have multiple suggestions
        spellOpts.tokens = queryConverter.convert("bug");
        result = checker.getSuggestions(spellOpts);
        assertNotNull(result);
        suggestions = result.get(spellOpts.tokens.iterator().next());
        assertNotNull(suggestions);
        assertTrue("suggestions Size: " + suggestions.size() + " is not: " + 2, suggestions.size() == 2);
        entry = suggestions.entrySet().iterator().next();
        assertTrue(entry.getKey() + " is equal to " + "bug and it shouldn't be", entry.getKey().equals("bug") == false);
        assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
        entry = suggestions.entrySet().iterator().next();
        assertTrue(entry.getKey() + " is equal to " + "bug and it shouldn't be", entry.getKey().equals("bug") == false);
        assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
    } finally {
        holder.decref();
    }
}
Also used : NamedList(org.apache.solr.common.util.NamedList) SolrCore(org.apache.solr.core.SolrCore) Token(org.apache.lucene.analysis.Token) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) IndexReader(org.apache.lucene.index.IndexReader) File(java.io.File) Map(java.util.Map) Test(org.junit.Test)

Example 27 with Token

use of org.apache.lucene.analysis.Token in project lucene-solr by apache.

the class IndexBasedSpellCheckerTest method testExtendedResults.

@Test
public void testExtendedResults() throws Exception {
    IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
    NamedList spellchecker = new NamedList();
    spellchecker.add("classname", IndexBasedSpellChecker.class.getName());
    File indexDir = createTempDir().toFile();
    indexDir.mkdirs();
    spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
    spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
    spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
    SolrCore core = h.getCore();
    String dictName = checker.init(spellchecker, core);
    assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME, dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
    RefCounted<SolrIndexSearcher> holder = core.getSearcher();
    SolrIndexSearcher searcher = holder.get();
    try {
        checker.build(core, searcher);
        IndexReader reader = searcher.getIndexReader();
        Collection<Token> tokens = queryConverter.convert("documemt");
        SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
        SpellingResult result = checker.getSuggestions(spellOpts);
        assertTrue("result is null and it shouldn't be", result != null);
        //should be lowercased, b/c we are using a lowercasing analyzer
        Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
        assertTrue("documemt is null and it shouldn't be", suggestions != null);
        assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
        Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
        assertTrue(entry.getKey() + " is not equal to " + "document", entry.getKey().equals("document") == true);
        assertTrue(entry.getValue() + " does not equal: " + 2, entry.getValue() == 2);
        //test something not in the spell checker
        spellOpts.tokens = queryConverter.convert("super");
        result = checker.getSuggestions(spellOpts);
        assertTrue("result is null and it shouldn't be", result != null);
        suggestions = result.get(spellOpts.tokens.iterator().next());
        assertTrue("suggestions size should be 0", suggestions.size() == 0);
        spellOpts.tokens = queryConverter.convert("document");
        result = checker.getSuggestions(spellOpts);
        assertTrue("result is null and it shouldn't be", result != null);
        suggestions = result.get(spellOpts.tokens.iterator().next());
        assertTrue("suggestions is not null and it should be", suggestions == null);
    } finally {
        holder.decref();
    }
}
Also used : NamedList(org.apache.solr.common.util.NamedList) SolrCore(org.apache.solr.core.SolrCore) Token(org.apache.lucene.analysis.Token) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) IndexReader(org.apache.lucene.index.IndexReader) File(java.io.File) Map(java.util.Map) Test(org.junit.Test)

Example 28 with Token

use of org.apache.lucene.analysis.Token in project lucene-solr by apache.

the class SpellPossibilityIteratorTest method testOverlappingTokens.

@Test
public void testOverlappingTokens() throws Exception {
    Map<Token, LinkedHashMap<String, Integer>> overlappingSuggestions = new LinkedHashMap<>();
    overlappingSuggestions.put(TOKEN_AYE, AYE);
    overlappingSuggestions.put(TOKEN_BEE, BEE);
    overlappingSuggestions.put(TOKEN_AYE_BEE, AYE_BEE);
    overlappingSuggestions.put(TOKEN_CEE, CEE);
    PossibilityIterator iter = new PossibilityIterator(overlappingSuggestions, Integer.MAX_VALUE, Integer.MAX_VALUE, true);
    int aCount = 0;
    int abCount = 0;
    Set<PossibilityIterator.RankedSpellPossibility> dupChecker = new HashSet<>();
    while (iter.hasNext()) {
        PossibilityIterator.RankedSpellPossibility rsp = iter.next();
        Token a = null;
        Token b = null;
        Token ab = null;
        Token c = null;
        for (SpellCheckCorrection scc : rsp.corrections) {
            if (scc.getOriginal().equals(TOKEN_AYE)) {
                a = scc.getOriginal();
            } else if (scc.getOriginal().equals(TOKEN_BEE)) {
                b = scc.getOriginal();
            } else if (scc.getOriginal().equals(TOKEN_AYE_BEE)) {
                ab = scc.getOriginal();
            } else if (scc.getOriginal().equals(TOKEN_CEE)) {
                c = scc.getOriginal();
            }
            if (ab != null) {
                abCount++;
            } else {
                aCount++;
            }
        }
        assertTrue(c != null);
        assertTrue(ab != null || (a != null && b != null));
        assertTrue(ab == null || (a == null && b == null));
        assertTrue(dupChecker.add(rsp));
    }
    assertTrue(aCount == 2160);
    assertTrue(abCount == 180);
}
Also used : Token(org.apache.lucene.analysis.Token) PossibilityIterator(org.apache.solr.spelling.PossibilityIterator) LinkedHashMap(java.util.LinkedHashMap) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 29 with Token

use of org.apache.lucene.analysis.Token in project lucene-solr by apache.

the class SpellPossibilityIteratorTest method testSpellPossibilityIterator.

@Test
public void testSpellPossibilityIterator() throws Exception {
    Map<Token, LinkedHashMap<String, Integer>> suggestions = new LinkedHashMap<>();
    suggestions.put(TOKEN_AYE, AYE);
    suggestions.put(TOKEN_BEE, BEE);
    suggestions.put(TOKEN_CEE, CEE);
    PossibilityIterator iter = new PossibilityIterator(suggestions, 1000, 10000, false);
    int count = 0;
    while (iter.hasNext()) {
        PossibilityIterator.RankedSpellPossibility rsp = iter.next();
        if (count == 0) {
            assertTrue("I".equals(rsp.corrections.get(0).getCorrection()));
            assertTrue("alpha".equals(rsp.corrections.get(1).getCorrection()));
            assertTrue("one".equals(rsp.corrections.get(2).getCorrection()));
        }
        count++;
    }
    assertTrue(("Three maps (8*9*10) should return 720 iterations but instead returned " + count), count == 720);
    suggestions.remove(TOKEN_CEE);
    iter = new PossibilityIterator(suggestions, 100, 10000, false);
    count = 0;
    while (iter.hasNext()) {
        iter.next();
        count++;
    }
    assertTrue(("Two maps (8*9) should return 72 iterations but instead returned " + count), count == 72);
    suggestions.remove(TOKEN_BEE);
    iter = new PossibilityIterator(suggestions, 5, 10000, false);
    count = 0;
    while (iter.hasNext()) {
        iter.next();
        count++;
    }
    assertTrue(("We requested 5 suggestions but got " + count), count == 5);
    suggestions.remove(TOKEN_AYE);
    iter = new PossibilityIterator(suggestions, Integer.MAX_VALUE, 10000, false);
    count = 0;
    while (iter.hasNext()) {
        iter.next();
        count++;
    }
    assertTrue(("No maps should return 0 iterations but instead returned " + count), count == 0);
}
Also used : Token(org.apache.lucene.analysis.Token) PossibilityIterator(org.apache.solr.spelling.PossibilityIterator) LinkedHashMap(java.util.LinkedHashMap) Test(org.junit.Test)

Example 30 with Token

use of org.apache.lucene.analysis.Token in project lucene-solr by apache.

the class SpellingQueryConverterTest method testMultipleClauses.

@Test
public void testMultipleClauses() {
    SpellingQueryConverter converter = new SpellingQueryConverter();
    converter.init(new NamedList());
    converter.setAnalyzer(new WhitespaceAnalyzer());
    // two field:value pairs should give two tokens
    Collection<Token> tokens = converter.convert("买text_field:我购买了道具和服装。 field2:bar");
    assertTrue("tokens is null and it shouldn't be", tokens != null);
    assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
    // a field:value pair and a search term should give two tokens
    tokens = converter.convert("text_field:我购买了道具和服装。 bar");
    assertTrue("tokens is null and it shouldn't be", tokens != null);
    assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) NamedList(org.apache.solr.common.util.NamedList) Token(org.apache.lucene.analysis.Token) Test(org.junit.Test)

Aggregations

Token (org.apache.lucene.analysis.Token)100 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)39 TokenStream (org.apache.lucene.analysis.TokenStream)31 Directory (org.apache.lucene.store.Directory)24 Test (org.junit.Test)23 Document (org.apache.lucene.document.Document)19 TextField (org.apache.lucene.document.TextField)19 BytesRef (org.apache.lucene.util.BytesRef)16 NamedList (org.apache.solr.common.util.NamedList)16 StringReader (java.io.StringReader)15 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)15 Analyzer (org.apache.lucene.analysis.Analyzer)14 ArrayList (java.util.ArrayList)13 Map (java.util.Map)13 Field (org.apache.lucene.document.Field)13 FieldType (org.apache.lucene.document.FieldType)11 IndexReader (org.apache.lucene.index.IndexReader)11 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)10 Tokenizer (org.apache.lucene.analysis.Tokenizer)9 Date (java.util.Date)8