use of org.apache.solr.spelling.PossibilityIterator in project lucene-solr by apache.
the class SpellPossibilityIteratorTest method testOverlappingTokens.
@Test
public void testOverlappingTokens() throws Exception {
Map<Token, LinkedHashMap<String, Integer>> overlappingSuggestions = new LinkedHashMap<>();
overlappingSuggestions.put(TOKEN_AYE, AYE);
overlappingSuggestions.put(TOKEN_BEE, BEE);
overlappingSuggestions.put(TOKEN_AYE_BEE, AYE_BEE);
overlappingSuggestions.put(TOKEN_CEE, CEE);
PossibilityIterator iter = new PossibilityIterator(overlappingSuggestions, Integer.MAX_VALUE, Integer.MAX_VALUE, true);
int aCount = 0;
int abCount = 0;
Set<PossibilityIterator.RankedSpellPossibility> dupChecker = new HashSet<>();
while (iter.hasNext()) {
PossibilityIterator.RankedSpellPossibility rsp = iter.next();
Token a = null;
Token b = null;
Token ab = null;
Token c = null;
for (SpellCheckCorrection scc : rsp.corrections) {
if (scc.getOriginal().equals(TOKEN_AYE)) {
a = scc.getOriginal();
} else if (scc.getOriginal().equals(TOKEN_BEE)) {
b = scc.getOriginal();
} else if (scc.getOriginal().equals(TOKEN_AYE_BEE)) {
ab = scc.getOriginal();
} else if (scc.getOriginal().equals(TOKEN_CEE)) {
c = scc.getOriginal();
}
if (ab != null) {
abCount++;
} else {
aCount++;
}
}
assertTrue(c != null);
assertTrue(ab != null || (a != null && b != null));
assertTrue(ab == null || (a == null && b == null));
assertTrue(dupChecker.add(rsp));
}
assertTrue(aCount == 2160);
assertTrue(abCount == 180);
}
use of org.apache.solr.spelling.PossibilityIterator in project lucene-solr by apache.
the class SpellPossibilityIteratorTest method testSpellPossibilityIterator.
@Test
public void testSpellPossibilityIterator() throws Exception {
Map<Token, LinkedHashMap<String, Integer>> suggestions = new LinkedHashMap<>();
suggestions.put(TOKEN_AYE, AYE);
suggestions.put(TOKEN_BEE, BEE);
suggestions.put(TOKEN_CEE, CEE);
PossibilityIterator iter = new PossibilityIterator(suggestions, 1000, 10000, false);
int count = 0;
while (iter.hasNext()) {
PossibilityIterator.RankedSpellPossibility rsp = iter.next();
if (count == 0) {
assertTrue("I".equals(rsp.corrections.get(0).getCorrection()));
assertTrue("alpha".equals(rsp.corrections.get(1).getCorrection()));
assertTrue("one".equals(rsp.corrections.get(2).getCorrection()));
}
count++;
}
assertTrue(("Three maps (8*9*10) should return 720 iterations but instead returned " + count), count == 720);
suggestions.remove(TOKEN_CEE);
iter = new PossibilityIterator(suggestions, 100, 10000, false);
count = 0;
while (iter.hasNext()) {
iter.next();
count++;
}
assertTrue(("Two maps (8*9) should return 72 iterations but instead returned " + count), count == 72);
suggestions.remove(TOKEN_BEE);
iter = new PossibilityIterator(suggestions, 5, 10000, false);
count = 0;
while (iter.hasNext()) {
iter.next();
count++;
}
assertTrue(("We requested 5 suggestions but got " + count), count == 5);
suggestions.remove(TOKEN_AYE);
iter = new PossibilityIterator(suggestions, Integer.MAX_VALUE, 10000, false);
count = 0;
while (iter.hasNext()) {
iter.next();
count++;
}
assertTrue(("No maps should return 0 iterations but instead returned " + count), count == 0);
}
use of org.apache.solr.spelling.PossibilityIterator in project lucene-solr by apache.
the class SpellPossibilityIteratorTest method testScalability.
@Test
public void testScalability() throws Exception {
Map<Token, LinkedHashMap<String, Integer>> lotsaSuggestions = new LinkedHashMap<>();
lotsaSuggestions.put(TOKEN_AYE, AYE);
lotsaSuggestions.put(TOKEN_BEE, BEE);
lotsaSuggestions.put(TOKEN_CEE, CEE);
lotsaSuggestions.put(new Token("AYE1", 0, 3), AYE);
lotsaSuggestions.put(new Token("BEE1", 4, 7), BEE);
lotsaSuggestions.put(new Token("CEE1", 8, 11), CEE);
lotsaSuggestions.put(new Token("AYE2", 0, 3), AYE);
lotsaSuggestions.put(new Token("BEE2", 4, 7), BEE);
lotsaSuggestions.put(new Token("CEE2", 8, 11), CEE);
lotsaSuggestions.put(new Token("AYE3", 0, 3), AYE);
lotsaSuggestions.put(new Token("BEE3", 4, 7), BEE);
lotsaSuggestions.put(new Token("CEE3", 8, 11), CEE);
lotsaSuggestions.put(new Token("AYE4", 0, 3), AYE);
lotsaSuggestions.put(new Token("BEE4", 4, 7), BEE);
lotsaSuggestions.put(new Token("CEE4", 8, 11), CEE);
PossibilityIterator iter = new PossibilityIterator(lotsaSuggestions, 1000, 10000, false);
int count = 0;
while (iter.hasNext()) {
PossibilityIterator.RankedSpellPossibility rsp = iter.next();
count++;
}
assertTrue(count == 1000);
lotsaSuggestions.put(new Token("AYE_BEE1", 0, 7), AYE_BEE);
lotsaSuggestions.put(new Token("AYE_BEE2", 0, 7), AYE_BEE);
lotsaSuggestions.put(new Token("AYE_BEE3", 0, 7), AYE_BEE);
lotsaSuggestions.put(new Token("AYE_BEE4", 0, 7), AYE_BEE);
iter = new PossibilityIterator(lotsaSuggestions, 1000, 10000, true);
count = 0;
while (iter.hasNext()) {
PossibilityIterator.RankedSpellPossibility rsp = iter.next();
count++;
}
assertTrue(count < 100);
}
use of org.apache.solr.spelling.PossibilityIterator in project SearchServices by Alfresco.
the class AlfrescoSpellCheckCollator method collate.
public List<AlfrescoSpellCheckCollation> collate(SpellingResult result, String originalQuery, ResponseBuilder ultimateResponse) {
List<AlfrescoSpellCheckCollation> collations = new ArrayList<>();
QueryComponent queryComponent = null;
if (ultimateResponse.components != null) {
for (SearchComponent sc : ultimateResponse.components) {
if (sc instanceof QueryComponent) {
queryComponent = (QueryComponent) sc;
break;
}
}
}
boolean verifyCandidateWithQuery = true;
int maxTries = maxCollationTries;
int maxNumberToIterate = maxTries;
if (maxTries < 1) {
maxTries = 1;
maxNumberToIterate = maxCollations;
verifyCandidateWithQuery = false;
}
if (queryComponent == null && verifyCandidateWithQuery) {
LOG.info("Could not find an instance of QueryComponent. Disabling collation verification against the index.");
maxTries = 1;
verifyCandidateWithQuery = false;
}
docCollectionLimit = docCollectionLimit > 0 ? docCollectionLimit : 0;
int maxDocId = -1;
if (verifyCandidateWithQuery && docCollectionLimit > 0) {
IndexReader reader = ultimateResponse.req.getSearcher().getIndexReader();
maxDocId = reader.maxDoc();
}
JSONObject alfrescoJSON = (JSONObject) ultimateResponse.req.getContext().get(AbstractQParser.ALFRESCO_JSON);
String originalAftsQuery = alfrescoJSON != null ? alfrescoJSON.getString("query") : ultimateResponse.getQueryString();
int tryNo = 0;
int collNo = 0;
PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions(), maxNumberToIterate, maxCollationEvaluations, suggestionsMayOverlap);
while (tryNo < maxTries && collNo < maxCollations && possibilityIter.hasNext()) {
PossibilityIterator.RankedSpellPossibility possibility = possibilityIter.next();
String collationQueryStr = getCollation(originalQuery, possibility.corrections);
int hits = 0;
String aftsQuery = null;
if (verifyCandidateWithQuery) {
tryNo++;
SolrQueryRequest req = ultimateResponse.req;
SolrParams origParams = req.getParams();
ModifiableSolrParams params = new ModifiableSolrParams(origParams);
Iterator<String> origParamIterator = origParams.getParameterNamesIterator();
int pl = SpellingParams.SPELLCHECK_COLLATE_PARAM_OVERRIDE.length();
while (origParamIterator.hasNext()) {
String origParamName = origParamIterator.next();
if (origParamName.startsWith(SpellingParams.SPELLCHECK_COLLATE_PARAM_OVERRIDE) && origParamName.length() > pl) {
String[] val = origParams.getParams(origParamName);
if (val.length == 1 && val[0].length() == 0) {
params.set(origParamName.substring(pl), (String[]) null);
} else {
params.set(origParamName.substring(pl), val);
}
}
}
// we don't set the 'q' param, as we'll pass the query via JSON.
// params.set(CommonParams.Q, collationQueryStr);
params.remove(CommonParams.START);
params.set(CommonParams.ROWS, "" + docCollectionLimit);
// we don't want any stored fields
params.set(CommonParams.FL, "id");
// we'll sort by doc id to ensure no scoring is done.
params.set(CommonParams.SORT, "_docid_ asc");
// If a dismax query, don't add unnecessary clauses for scoring
params.remove(DisMaxParams.TIE);
params.remove(DisMaxParams.PF);
params.remove(DisMaxParams.PF2);
params.remove(DisMaxParams.PF3);
params.remove(DisMaxParams.BQ);
params.remove(DisMaxParams.BF);
// Collate testing does not support Grouping (see SOLR-2577)
params.remove(GroupParams.GROUP);
boolean useQStr = true;
if (alfrescoJSON != null) {
try {
aftsQuery = originalAftsQuery.replaceAll(Pattern.quote(originalQuery), Matcher.quoteReplacement(collationQueryStr));
alfrescoJSON.put("query", aftsQuery);
req.getContext().put(AbstractQParser.ALFRESCO_JSON, alfrescoJSON);
useQStr = false;
} catch (JSONException e) {
LOG.warn("Exception trying to get/set the query from/to ALFRESCO_JSON.]" + e);
}
} else {
aftsQuery = collationQueryStr;
}
req.setParams(params);
// creating a request here... make sure to close it!
ResponseBuilder checkResponse = new ResponseBuilder(req, new SolrQueryResponse(), Arrays.<SearchComponent>asList(queryComponent));
checkResponse.setQparser(ultimateResponse.getQparser());
checkResponse.setFilters(ultimateResponse.getFilters());
checkResponse.components = Arrays.<SearchComponent>asList(queryComponent);
if (useQStr) {
checkResponse.setQueryString(collationQueryStr);
}
try {
queryComponent.prepare(checkResponse);
if (docCollectionLimit > 0) {
int f = checkResponse.getFieldFlags();
checkResponse.setFieldFlags(f |= SolrIndexSearcher.TERMINATE_EARLY);
}
queryComponent.process(checkResponse);
hits = (Integer) checkResponse.rsp.getToLog().get("hits");
} catch (EarlyTerminatingCollectorException etce) {
assert (docCollectionLimit > 0);
assert 0 < etce.getNumberScanned();
assert 0 < etce.getNumberCollected();
if (etce.getNumberScanned() == maxDocId) {
hits = etce.getNumberCollected();
} else {
hits = (int) (((float) (maxDocId * etce.getNumberCollected())) / (float) etce.getNumberScanned());
}
} catch (Exception e) {
LOG.warn("Exception trying to re-query to check if a spell check possibility would return any hits." + e);
} finally {
checkResponse.req.close();
}
}
if (hits > 0 || !verifyCandidateWithQuery) {
collNo++;
AlfrescoSpellCheckCollation collation = new AlfrescoSpellCheckCollation();
collation.setCollationQuery(aftsQuery);
collation.setCollationQueryString(collationQueryStr);
collation.setHits(hits);
collation.setInternalRank(suggestionsMayOverlap ? ((possibility.rank * 1000) + possibility.index) : possibility.rank);
NamedList<String> misspellingsAndCorrections = new NamedList<>();
for (SpellCheckCorrection corr : possibility.corrections) {
misspellingsAndCorrections.add(corr.getOriginal().toString(), corr.getCorrection());
}
collation.setMisspellingsAndCorrections(misspellingsAndCorrections);
collations.add(collation);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Collation: " + aftsQuery + (verifyCandidateWithQuery ? (" will return " + hits + " hits.") : ""));
}
}
return collations;
}
Aggregations