use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class IndexBasedSpellCheckerTest method testSpelling.
@Test
public void testSpelling() throws Exception {
IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
NamedList spellchecker = new NamedList();
spellchecker.add("classname", IndexBasedSpellChecker.class.getName());
File indexDir = createTempDir().toFile();
spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
SolrCore core = h.getCore();
String dictName = checker.init(spellchecker, core);
assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME, dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
RefCounted<SolrIndexSearcher> holder = core.getSearcher();
SolrIndexSearcher searcher = holder.get();
try {
checker.build(core, searcher);
IndexReader reader = searcher.getIndexReader();
Collection<Token> tokens = queryConverter.convert("documemt");
SpellingOptions spellOpts = new SpellingOptions(tokens, reader);
SpellingResult result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
//should be lowercased, b/c we are using a lowercasing analyzer
Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("documemt is null and it shouldn't be", suggestions != null);
assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
assertTrue(entry.getKey() + " is not equal to " + "document", entry.getKey().equals("document") == true);
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
//test something not in the spell checker
spellOpts.tokens = queryConverter.convert("super");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions size should be 0", suggestions.size() == 0);
//test something that is spelled correctly
spellOpts.tokens = queryConverter.convert("document");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is null and it shouldn't be", suggestions == null);
//Has multiple possibilities, but the exact exists, so that should be returned
spellOpts.tokens = queryConverter.convert("red");
spellOpts.count = 2;
result = checker.getSuggestions(spellOpts);
assertNotNull(result);
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
//Try out something which should have multiple suggestions
spellOpts.tokens = queryConverter.convert("bug");
result = checker.getSuggestions(spellOpts);
assertNotNull(result);
suggestions = result.get(spellOpts.tokens.iterator().next());
assertNotNull(suggestions);
assertTrue("suggestions Size: " + suggestions.size() + " is not: " + 2, suggestions.size() == 2);
entry = suggestions.entrySet().iterator().next();
assertTrue(entry.getKey() + " is equal to " + "bug and it shouldn't be", entry.getKey().equals("bug") == false);
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
entry = suggestions.entrySet().iterator().next();
assertTrue(entry.getKey() + " is equal to " + "bug and it shouldn't be", entry.getKey().equals("bug") == false);
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
} finally {
holder.decref();
}
}
use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class IndexBasedSpellCheckerTest method testExtendedResults.
@Test
public void testExtendedResults() throws Exception {
IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
NamedList spellchecker = new NamedList();
spellchecker.add("classname", IndexBasedSpellChecker.class.getName());
File indexDir = createTempDir().toFile();
indexDir.mkdirs();
spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
SolrCore core = h.getCore();
String dictName = checker.init(spellchecker, core);
assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME, dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
RefCounted<SolrIndexSearcher> holder = core.getSearcher();
SolrIndexSearcher searcher = holder.get();
try {
checker.build(core, searcher);
IndexReader reader = searcher.getIndexReader();
Collection<Token> tokens = queryConverter.convert("documemt");
SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
SpellingResult result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
//should be lowercased, b/c we are using a lowercasing analyzer
Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("documemt is null and it shouldn't be", suggestions != null);
assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
assertTrue(entry.getKey() + " is not equal to " + "document", entry.getKey().equals("document") == true);
assertTrue(entry.getValue() + " does not equal: " + 2, entry.getValue() == 2);
//test something not in the spell checker
spellOpts.tokens = queryConverter.convert("super");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions size should be 0", suggestions.size() == 0);
spellOpts.tokens = queryConverter.convert("document");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
} finally {
holder.decref();
}
}
use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class SpellPossibilityIteratorTest method testOverlappingTokens.
@Test
public void testOverlappingTokens() throws Exception {
Map<Token, LinkedHashMap<String, Integer>> overlappingSuggestions = new LinkedHashMap<>();
overlappingSuggestions.put(TOKEN_AYE, AYE);
overlappingSuggestions.put(TOKEN_BEE, BEE);
overlappingSuggestions.put(TOKEN_AYE_BEE, AYE_BEE);
overlappingSuggestions.put(TOKEN_CEE, CEE);
PossibilityIterator iter = new PossibilityIterator(overlappingSuggestions, Integer.MAX_VALUE, Integer.MAX_VALUE, true);
int aCount = 0;
int abCount = 0;
Set<PossibilityIterator.RankedSpellPossibility> dupChecker = new HashSet<>();
while (iter.hasNext()) {
PossibilityIterator.RankedSpellPossibility rsp = iter.next();
Token a = null;
Token b = null;
Token ab = null;
Token c = null;
for (SpellCheckCorrection scc : rsp.corrections) {
if (scc.getOriginal().equals(TOKEN_AYE)) {
a = scc.getOriginal();
} else if (scc.getOriginal().equals(TOKEN_BEE)) {
b = scc.getOriginal();
} else if (scc.getOriginal().equals(TOKEN_AYE_BEE)) {
ab = scc.getOriginal();
} else if (scc.getOriginal().equals(TOKEN_CEE)) {
c = scc.getOriginal();
}
if (ab != null) {
abCount++;
} else {
aCount++;
}
}
assertTrue(c != null);
assertTrue(ab != null || (a != null && b != null));
assertTrue(ab == null || (a == null && b == null));
assertTrue(dupChecker.add(rsp));
}
assertTrue(aCount == 2160);
assertTrue(abCount == 180);
}
use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class SpellPossibilityIteratorTest method testSpellPossibilityIterator.
@Test
public void testSpellPossibilityIterator() throws Exception {
Map<Token, LinkedHashMap<String, Integer>> suggestions = new LinkedHashMap<>();
suggestions.put(TOKEN_AYE, AYE);
suggestions.put(TOKEN_BEE, BEE);
suggestions.put(TOKEN_CEE, CEE);
PossibilityIterator iter = new PossibilityIterator(suggestions, 1000, 10000, false);
int count = 0;
while (iter.hasNext()) {
PossibilityIterator.RankedSpellPossibility rsp = iter.next();
if (count == 0) {
assertTrue("I".equals(rsp.corrections.get(0).getCorrection()));
assertTrue("alpha".equals(rsp.corrections.get(1).getCorrection()));
assertTrue("one".equals(rsp.corrections.get(2).getCorrection()));
}
count++;
}
assertTrue(("Three maps (8*9*10) should return 720 iterations but instead returned " + count), count == 720);
suggestions.remove(TOKEN_CEE);
iter = new PossibilityIterator(suggestions, 100, 10000, false);
count = 0;
while (iter.hasNext()) {
iter.next();
count++;
}
assertTrue(("Two maps (8*9) should return 72 iterations but instead returned " + count), count == 72);
suggestions.remove(TOKEN_BEE);
iter = new PossibilityIterator(suggestions, 5, 10000, false);
count = 0;
while (iter.hasNext()) {
iter.next();
count++;
}
assertTrue(("We requested 5 suggestions but got " + count), count == 5);
suggestions.remove(TOKEN_AYE);
iter = new PossibilityIterator(suggestions, Integer.MAX_VALUE, 10000, false);
count = 0;
while (iter.hasNext()) {
iter.next();
count++;
}
assertTrue(("No maps should return 0 iterations but instead returned " + count), count == 0);
}
use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class SpellingQueryConverterTest method testMultipleClauses.
@Test
public void testMultipleClauses() {
SpellingQueryConverter converter = new SpellingQueryConverter();
converter.init(new NamedList());
converter.setAnalyzer(new WhitespaceAnalyzer());
// two field:value pairs should give two tokens
Collection<Token> tokens = converter.convert("买text_field:我购买了道具和服装。 field2:bar");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
// a field:value pair and a search term should give two tokens
tokens = converter.convert("text_field:我购买了道具和服装。 bar");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
}
Aggregations