use of org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option in project elasticsearch by elastic.
the class PhraseSuggester method innerExecute.
/*
* More Ideas:
* - add ability to find whitespace problems -> we can build a poor mans decompounder with our index based on a automaton?
* - add ability to build different error models maybe based on a confusion matrix?
* - try to combine a token with its subsequent token to find / detect word splits (optional)
* - for this to work we need some way to defined the position length of a candidate
* - phonetic filters could be interesting here too for candidate selection
*/
@Override
public Suggestion<? extends Entry<? extends Option>> innerExecute(String name, PhraseSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
double realWordErrorLikelihood = suggestion.realworldErrorLikelyhood();
final PhraseSuggestion response = new PhraseSuggestion(name, suggestion.getSize());
final IndexReader indexReader = searcher.getIndexReader();
List<PhraseSuggestionContext.DirectCandidateGenerator> generators = suggestion.generators();
final int numGenerators = generators.size();
final List<CandidateGenerator> gens = new ArrayList<>(generators.size());
for (int i = 0; i < numGenerators; i++) {
PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i);
DirectSpellChecker directSpellChecker = generator.createDirectSpellChecker();
Terms terms = MultiFields.getTerms(indexReader, generator.field());
if (terms != null) {
gens.add(new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(), indexReader, realWordErrorLikelihood, generator.size(), generator.preFilter(), generator.postFilter(), terms));
}
}
final String suggestField = suggestion.getField();
final Terms suggestTerms = MultiFields.getTerms(indexReader, suggestField);
if (gens.size() > 0 && suggestTerms != null) {
final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker(realWordErrorLikelihood, suggestion.getRequireUnigram(), suggestion.getTokenLimit());
final BytesRef separator = suggestion.separator();
WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestTerms, suggestField, realWordErrorLikelihood, separator);
Result checkerResult;
try (TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField())) {
checkerResult = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(), gens.toArray(new CandidateGenerator[gens.size()])), suggestion.maxErrors(), suggestion.getShardSize(), wordScorer, suggestion.confidence(), suggestion.gramSize());
}
PhraseSuggestion.Entry resultEntry = buildResultEntry(suggestion, spare, checkerResult.cutoffScore);
response.addTerm(resultEntry);
final BytesRefBuilder byteSpare = new BytesRefBuilder();
final Function<Map<String, Object>, ExecutableScript> collateScript = suggestion.getCollateQueryScript();
final boolean collatePrune = (collateScript != null) && suggestion.collatePrune();
for (int i = 0; i < checkerResult.corrections.length; i++) {
Correction correction = checkerResult.corrections[i];
spare.copyUTF8Bytes(correction.join(SEPARATOR, byteSpare, null, null));
boolean collateMatch = true;
if (collateScript != null) {
// Checks if the template query collateScript yields any documents
// from the index for a correction, collateMatch is updated
final Map<String, Object> vars = suggestion.getCollateScriptParams();
vars.put(SUGGESTION_TEMPLATE_VAR_NAME, spare.toString());
QueryShardContext shardContext = suggestion.getShardContext();
final ExecutableScript executable = collateScript.apply(vars);
final BytesReference querySource = (BytesReference) executable.run();
try (XContentParser parser = XContentFactory.xContent(querySource).createParser(shardContext.getXContentRegistry(), querySource)) {
QueryBuilder innerQueryBuilder = shardContext.newParseContext(parser).parseInnerQueryBuilder();
final ParsedQuery parsedQuery = shardContext.toQuery(innerQueryBuilder);
collateMatch = Lucene.exists(searcher, parsedQuery.query());
}
}
if (!collateMatch && !collatePrune) {
continue;
}
Text phrase = new Text(spare.toString());
Text highlighted = null;
if (suggestion.getPreTag() != null) {
spare.copyUTF8Bytes(correction.join(SEPARATOR, byteSpare, suggestion.getPreTag(), suggestion.getPostTag()));
highlighted = new Text(spare.toString());
}
if (collatePrune) {
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score), collateMatch));
} else {
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
}
}
} else {
response.addTerm(buildResultEntry(suggestion, spare, Double.MIN_VALUE));
}
return response;
}
use of org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option in project elasticsearch by elastic.
the class SuggestTests method testToXContent.
public void testToXContent() throws IOException {
Option option = new Option(new Text("someText"), new Text("somethingHighlighted"), 1.3f, true);
Entry<Option> entry = new Entry<>(new Text("entryText"), 42, 313);
entry.addOption(option);
Suggestion<Entry<Option>> suggestion = new Suggestion<>("suggestionName", 5);
suggestion.addTerm(entry);
Suggest suggest = new Suggest(Collections.singletonList(suggestion));
BytesReference xContent = toXContent(suggest, XContentType.JSON, randomBoolean());
assertEquals("{\"suggest\":" + "{\"suggestionName\":" + "[{\"text\":\"entryText\"," + "\"offset\":42," + "\"length\":313," + "\"options\":[{\"text\":\"someText\"," + "\"highlighted\":\"somethingHighlighted\"," + "\"score\":1.3," + "\"collate_match\":true}]" + "}]" + "}" + "}", xContent.utf8ToString());
}
use of org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option in project elasticsearch by elastic.
the class SuggestionEntryTests method createTestItem.
/**
* Create a randomized Suggestion.Entry
*/
@SuppressWarnings("unchecked")
public static <O extends Option> Entry<O> createTestItem(Class<? extends Entry> entryType) {
Text entryText = new Text(randomAsciiOfLengthBetween(5, 15));
int offset = randomInt();
int length = randomInt();
Entry entry;
Supplier<Option> supplier;
if (entryType == TermSuggestion.Entry.class) {
entry = new TermSuggestion.Entry(entryText, offset, length);
supplier = TermSuggestionOptionTests::createTestItem;
} else if (entryType == PhraseSuggestion.Entry.class) {
entry = new PhraseSuggestion.Entry(entryText, offset, length, randomDouble());
supplier = SuggestionOptionTests::createTestItem;
} else if (entryType == CompletionSuggestion.Entry.class) {
entry = new CompletionSuggestion.Entry(entryText, offset, length);
supplier = CompletionSuggestionOptionTests::createTestItem;
} else {
throw new UnsupportedOperationException("entryType not supported [" + entryType + "]");
}
int numOptions = randomIntBetween(0, 5);
for (int i = 0; i < numOptions; i++) {
entry.addOption(supplier.get());
}
return entry;
}
use of org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option in project elasticsearch by elastic.
the class SuggestionEntryTests method testToXContent.
public void testToXContent() throws IOException {
Option option = new Option(new Text("someText"), new Text("somethingHighlighted"), 1.3f, true);
Entry<Option> entry = new Entry<>(new Text("entryText"), 42, 313);
entry.addOption(option);
BytesReference xContent = toXContent(entry, XContentType.JSON, randomBoolean());
assertEquals("{\"text\":\"entryText\"," + "\"offset\":42," + "\"length\":313," + "\"options\":[" + "{\"text\":\"someText\"," + "\"highlighted\":\"somethingHighlighted\"," + "\"score\":1.3," + "\"collate_match\":true}" + "]}", xContent.utf8ToString());
org.elasticsearch.search.suggest.term.TermSuggestion.Entry.Option termOption = new org.elasticsearch.search.suggest.term.TermSuggestion.Entry.Option(new Text("termSuggestOption"), 42, 3.13f);
entry = new Entry<>(new Text("entryText"), 42, 313);
entry.addOption(termOption);
xContent = toXContent(entry, XContentType.JSON, randomBoolean());
assertEquals("{\"text\":\"entryText\"," + "\"offset\":42," + "\"length\":313," + "\"options\":[" + "{\"text\":\"termSuggestOption\"," + "\"score\":3.13," + "\"freq\":42}" + "]}", xContent.utf8ToString());
org.elasticsearch.search.suggest.completion.CompletionSuggestion.Entry.Option completionOption = new org.elasticsearch.search.suggest.completion.CompletionSuggestion.Entry.Option(-1, new Text("completionOption"), 3.13f, Collections.singletonMap("key", Collections.singleton("value")));
entry = new Entry<>(new Text("entryText"), 42, 313);
entry.addOption(completionOption);
xContent = toXContent(entry, XContentType.JSON, randomBoolean());
assertEquals("{\"text\":\"entryText\"," + "\"offset\":42," + "\"length\":313," + "\"options\":[" + "{\"text\":\"completionOption\"," + "\"score\":3.13," + "\"contexts\":{\"key\":[\"value\"]}" + "}" + "]}", xContent.utf8ToString());
}
use of org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option in project elasticsearch by elastic.
the class SuggestionOptionTests method testFromXContent.
public void testFromXContent() throws IOException {
Option option = createTestItem();
XContentType xContentType = randomFrom(XContentType.values());
boolean humanReadable = randomBoolean();
BytesReference originalBytes = toXContent(option, xContentType, humanReadable);
Option parsed;
try (XContentParser parser = createParser(xContentType.xContent(), originalBytes)) {
ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser::getTokenLocation);
parsed = Option.fromXContent(parser);
assertEquals(XContentParser.Token.END_OBJECT, parser.currentToken());
assertNull(parser.nextToken());
}
assertEquals(option.getText(), parsed.getText());
assertEquals(option.getHighlighted(), parsed.getHighlighted());
assertEquals(option.getScore(), parsed.getScore(), Float.MIN_VALUE);
assertEquals(option.collateMatch(), parsed.collateMatch());
assertToXContentEquivalent(originalBytes, toXContent(parsed, xContentType, humanReadable), xContentType);
}
Aggregations