Search in sources :

Example 6 with Entry

use of org.elasticsearch.search.suggest.Suggest.Suggestion.Entry in project elasticsearch by elastic.

the class PhraseSuggester method innerExecute.

/*
     * More Ideas:
     *   - add ability to find whitespace problems -> we can build a poor mans decompounder with our index based on a automaton?
     *   - add ability to build different error models maybe based on a confusion matrix?
     *   - try to combine a token with its subsequent token to find / detect word splits (optional)
     *      - for this to work we need some way to defined the position length of a candidate
     *   - phonetic filters could be interesting here too for candidate selection
     */
@Override
public Suggestion<? extends Entry<? extends Option>> innerExecute(String name, PhraseSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
    double realWordErrorLikelihood = suggestion.realworldErrorLikelyhood();
    final PhraseSuggestion response = new PhraseSuggestion(name, suggestion.getSize());
    final IndexReader indexReader = searcher.getIndexReader();
    List<PhraseSuggestionContext.DirectCandidateGenerator> generators = suggestion.generators();
    final int numGenerators = generators.size();
    final List<CandidateGenerator> gens = new ArrayList<>(generators.size());
    for (int i = 0; i < numGenerators; i++) {
        PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i);
        DirectSpellChecker directSpellChecker = generator.createDirectSpellChecker();
        Terms terms = MultiFields.getTerms(indexReader, generator.field());
        if (terms != null) {
            gens.add(new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(), indexReader, realWordErrorLikelihood, generator.size(), generator.preFilter(), generator.postFilter(), terms));
        }
    }
    final String suggestField = suggestion.getField();
    final Terms suggestTerms = MultiFields.getTerms(indexReader, suggestField);
    if (gens.size() > 0 && suggestTerms != null) {
        final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker(realWordErrorLikelihood, suggestion.getRequireUnigram(), suggestion.getTokenLimit());
        final BytesRef separator = suggestion.separator();
        WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestTerms, suggestField, realWordErrorLikelihood, separator);
        Result checkerResult;
        try (TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField())) {
            checkerResult = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(), gens.toArray(new CandidateGenerator[gens.size()])), suggestion.maxErrors(), suggestion.getShardSize(), wordScorer, suggestion.confidence(), suggestion.gramSize());
        }
        PhraseSuggestion.Entry resultEntry = buildResultEntry(suggestion, spare, checkerResult.cutoffScore);
        response.addTerm(resultEntry);
        final BytesRefBuilder byteSpare = new BytesRefBuilder();
        final Function<Map<String, Object>, ExecutableScript> collateScript = suggestion.getCollateQueryScript();
        final boolean collatePrune = (collateScript != null) && suggestion.collatePrune();
        for (int i = 0; i < checkerResult.corrections.length; i++) {
            Correction correction = checkerResult.corrections[i];
            spare.copyUTF8Bytes(correction.join(SEPARATOR, byteSpare, null, null));
            boolean collateMatch = true;
            if (collateScript != null) {
                // Checks if the template query collateScript yields any documents
                // from the index for a correction, collateMatch is updated
                final Map<String, Object> vars = suggestion.getCollateScriptParams();
                vars.put(SUGGESTION_TEMPLATE_VAR_NAME, spare.toString());
                QueryShardContext shardContext = suggestion.getShardContext();
                final ExecutableScript executable = collateScript.apply(vars);
                final BytesReference querySource = (BytesReference) executable.run();
                try (XContentParser parser = XContentFactory.xContent(querySource).createParser(shardContext.getXContentRegistry(), querySource)) {
                    QueryBuilder innerQueryBuilder = shardContext.newParseContext(parser).parseInnerQueryBuilder();
                    final ParsedQuery parsedQuery = shardContext.toQuery(innerQueryBuilder);
                    collateMatch = Lucene.exists(searcher, parsedQuery.query());
                }
            }
            if (!collateMatch && !collatePrune) {
                continue;
            }
            Text phrase = new Text(spare.toString());
            Text highlighted = null;
            if (suggestion.getPreTag() != null) {
                spare.copyUTF8Bytes(correction.join(SEPARATOR, byteSpare, suggestion.getPreTag(), suggestion.getPostTag()));
                highlighted = new Text(spare.toString());
            }
            if (collatePrune) {
                resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score), collateMatch));
            } else {
                resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
            }
        }
    } else {
        response.addTerm(buildResultEntry(suggestion, spare, Double.MIN_VALUE));
    }
    return response;
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) ParsedQuery(org.elasticsearch.index.query.ParsedQuery) ArrayList(java.util.ArrayList) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) Result(org.elasticsearch.search.suggest.phrase.NoisyChannelSpellChecker.Result) Entry(org.elasticsearch.search.suggest.Suggest.Suggestion.Entry) ExecutableScript(org.elasticsearch.script.ExecutableScript) QueryShardContext(org.elasticsearch.index.query.QueryShardContext) DirectSpellChecker(org.apache.lucene.search.spell.DirectSpellChecker) BytesRef(org.apache.lucene.util.BytesRef) BytesReference(org.elasticsearch.common.bytes.BytesReference) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) Terms(org.apache.lucene.index.Terms) Text(org.elasticsearch.common.text.Text) IndexReader(org.apache.lucene.index.IndexReader) Option(org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option) Map(java.util.Map) XContentParser(org.elasticsearch.common.xcontent.XContentParser)

Example 7 with Entry

use of org.elasticsearch.search.suggest.Suggest.Suggestion.Entry in project elasticsearch by elastic.

the class SuggestTests method testToXContent.

public void testToXContent() throws IOException {
    Option option = new Option(new Text("someText"), new Text("somethingHighlighted"), 1.3f, true);
    Entry<Option> entry = new Entry<>(new Text("entryText"), 42, 313);
    entry.addOption(option);
    Suggestion<Entry<Option>> suggestion = new Suggestion<>("suggestionName", 5);
    suggestion.addTerm(entry);
    Suggest suggest = new Suggest(Collections.singletonList(suggestion));
    BytesReference xContent = toXContent(suggest, XContentType.JSON, randomBoolean());
    assertEquals("{\"suggest\":" + "{\"suggestionName\":" + "[{\"text\":\"entryText\"," + "\"offset\":42," + "\"length\":313," + "\"options\":[{\"text\":\"someText\"," + "\"highlighted\":\"somethingHighlighted\"," + "\"score\":1.3," + "\"collate_match\":true}]" + "}]" + "}" + "}", xContent.utf8ToString());
}
Also used : BytesReference(org.elasticsearch.common.bytes.BytesReference) CompletionSuggestion(org.elasticsearch.search.suggest.completion.CompletionSuggestion) Suggestion(org.elasticsearch.search.suggest.Suggest.Suggestion) PhraseSuggestion(org.elasticsearch.search.suggest.phrase.PhraseSuggestion) TermSuggestion(org.elasticsearch.search.suggest.term.TermSuggestion) Entry(org.elasticsearch.search.suggest.Suggest.Suggestion.Entry) Option(org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option) Text(org.elasticsearch.common.text.Text)

Example 8 with Entry

use of org.elasticsearch.search.suggest.Suggest.Suggestion.Entry in project elasticsearch by elastic.

the class SuggestionEntryTests method createTestItem.

/**
     * Create a randomized Suggestion.Entry
     */
@SuppressWarnings("unchecked")
public static <O extends Option> Entry<O> createTestItem(Class<? extends Entry> entryType) {
    Text entryText = new Text(randomAsciiOfLengthBetween(5, 15));
    int offset = randomInt();
    int length = randomInt();
    Entry entry;
    Supplier<Option> supplier;
    if (entryType == TermSuggestion.Entry.class) {
        entry = new TermSuggestion.Entry(entryText, offset, length);
        supplier = TermSuggestionOptionTests::createTestItem;
    } else if (entryType == PhraseSuggestion.Entry.class) {
        entry = new PhraseSuggestion.Entry(entryText, offset, length, randomDouble());
        supplier = SuggestionOptionTests::createTestItem;
    } else if (entryType == CompletionSuggestion.Entry.class) {
        entry = new CompletionSuggestion.Entry(entryText, offset, length);
        supplier = CompletionSuggestionOptionTests::createTestItem;
    } else {
        throw new UnsupportedOperationException("entryType not supported [" + entryType + "]");
    }
    int numOptions = randomIntBetween(0, 5);
    for (int i = 0; i < numOptions; i++) {
        entry.addOption(supplier.get());
    }
    return entry;
}
Also used : Entry(org.elasticsearch.search.suggest.Suggest.Suggestion.Entry) CompletionSuggestion(org.elasticsearch.search.suggest.completion.CompletionSuggestion) TermSuggestion(org.elasticsearch.search.suggest.term.TermSuggestion) Text(org.elasticsearch.common.text.Text) Option(org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option)

Example 9 with Entry

use of org.elasticsearch.search.suggest.Suggest.Suggestion.Entry in project elasticsearch by elastic.

the class SuggestionEntryTests method testToXContent.

public void testToXContent() throws IOException {
    Option option = new Option(new Text("someText"), new Text("somethingHighlighted"), 1.3f, true);
    Entry<Option> entry = new Entry<>(new Text("entryText"), 42, 313);
    entry.addOption(option);
    BytesReference xContent = toXContent(entry, XContentType.JSON, randomBoolean());
    assertEquals("{\"text\":\"entryText\"," + "\"offset\":42," + "\"length\":313," + "\"options\":[" + "{\"text\":\"someText\"," + "\"highlighted\":\"somethingHighlighted\"," + "\"score\":1.3," + "\"collate_match\":true}" + "]}", xContent.utf8ToString());
    org.elasticsearch.search.suggest.term.TermSuggestion.Entry.Option termOption = new org.elasticsearch.search.suggest.term.TermSuggestion.Entry.Option(new Text("termSuggestOption"), 42, 3.13f);
    entry = new Entry<>(new Text("entryText"), 42, 313);
    entry.addOption(termOption);
    xContent = toXContent(entry, XContentType.JSON, randomBoolean());
    assertEquals("{\"text\":\"entryText\"," + "\"offset\":42," + "\"length\":313," + "\"options\":[" + "{\"text\":\"termSuggestOption\"," + "\"score\":3.13," + "\"freq\":42}" + "]}", xContent.utf8ToString());
    org.elasticsearch.search.suggest.completion.CompletionSuggestion.Entry.Option completionOption = new org.elasticsearch.search.suggest.completion.CompletionSuggestion.Entry.Option(-1, new Text("completionOption"), 3.13f, Collections.singletonMap("key", Collections.singleton("value")));
    entry = new Entry<>(new Text("entryText"), 42, 313);
    entry.addOption(completionOption);
    xContent = toXContent(entry, XContentType.JSON, randomBoolean());
    assertEquals("{\"text\":\"entryText\"," + "\"offset\":42," + "\"length\":313," + "\"options\":[" + "{\"text\":\"completionOption\"," + "\"score\":3.13," + "\"contexts\":{\"key\":[\"value\"]}" + "}" + "]}", xContent.utf8ToString());
}
Also used : BytesReference(org.elasticsearch.common.bytes.BytesReference) CompletionSuggestion(org.elasticsearch.search.suggest.completion.CompletionSuggestion) Text(org.elasticsearch.common.text.Text) Entry(org.elasticsearch.search.suggest.Suggest.Suggestion.Entry) TermSuggestion(org.elasticsearch.search.suggest.term.TermSuggestion) Option(org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option)

Example 10 with Entry

use of org.elasticsearch.search.suggest.Suggest.Suggestion.Entry in project elasticsearch by elastic.

the class SuggestionTests method createTestItem.

@SuppressWarnings({ "unchecked", "rawtypes" })
public static Suggestion<? extends Entry<? extends Option>> createTestItem(Class<? extends Suggestion> type) {
    String name = randomAsciiOfLengthBetween(5, 10);
    // note: size will not be rendered via "toXContent", only passed on internally on transport layer
    int size = randomInt();
    Supplier<Entry> entrySupplier = null;
    Suggestion suggestion = null;
    if (type == TermSuggestion.class) {
        suggestion = new TermSuggestion(name, size, randomFrom(SortBy.values()));
        entrySupplier = () -> SuggestionEntryTests.createTestItem(TermSuggestion.Entry.class);
    } else if (type == PhraseSuggestion.class) {
        suggestion = new PhraseSuggestion(name, size);
        entrySupplier = () -> SuggestionEntryTests.createTestItem(PhraseSuggestion.Entry.class);
    } else if (type == CompletionSuggestion.class) {
        suggestion = new CompletionSuggestion(name, size);
        entrySupplier = () -> SuggestionEntryTests.createTestItem(CompletionSuggestion.Entry.class);
    } else {
        throw new UnsupportedOperationException("type not supported [" + type + "]");
    }
    int numEntries;
    if (frequently()) {
        if (type == CompletionSuggestion.class) {
            // CompletionSuggestion can have max. one entry
            numEntries = 1;
        } else {
            numEntries = randomIntBetween(1, 5);
        }
    } else {
        // also occasionally test zero entries
        numEntries = 0;
    }
    for (int i = 0; i < numEntries; i++) {
        suggestion.addTerm(entrySupplier.get());
    }
    return suggestion;
}
Also used : CompletionSuggestion(org.elasticsearch.search.suggest.completion.CompletionSuggestion) Suggestion(org.elasticsearch.search.suggest.Suggest.Suggestion) PhraseSuggestion(org.elasticsearch.search.suggest.phrase.PhraseSuggestion) TermSuggestion(org.elasticsearch.search.suggest.term.TermSuggestion) Entry(org.elasticsearch.search.suggest.Suggest.Suggestion.Entry) PhraseSuggestion(org.elasticsearch.search.suggest.phrase.PhraseSuggestion) CompletionSuggestion(org.elasticsearch.search.suggest.completion.CompletionSuggestion) TermSuggestion(org.elasticsearch.search.suggest.term.TermSuggestion)

Aggregations

Entry (org.elasticsearch.search.suggest.Suggest.Suggestion.Entry)10 CompletionSuggestion (org.elasticsearch.search.suggest.completion.CompletionSuggestion)8 Option (org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option)7 Suggestion (org.elasticsearch.search.suggest.Suggest.Suggestion)6 TermSuggestion (org.elasticsearch.search.suggest.term.TermSuggestion)6 ArrayList (java.util.ArrayList)5 BytesReference (org.elasticsearch.common.bytes.BytesReference)4 Text (org.elasticsearch.common.text.Text)4 PhraseSuggestion (org.elasticsearch.search.suggest.phrase.PhraseSuggestion)4 List (java.util.List)3 Map (java.util.Map)3 IntArrayList (com.carrotsearch.hppc.IntArrayList)2 HashMap (java.util.HashMap)2 AtomicArray (org.elasticsearch.common.util.concurrent.AtomicArray)2 XContentParser (org.elasticsearch.common.xcontent.XContentParser)2 QuerySearchResult (org.elasticsearch.search.query.QuerySearchResult)2 Suggest (org.elasticsearch.search.suggest.Suggest)2 ObjectObjectHashMap (com.carrotsearch.hppc.ObjectObjectHashMap)1 IOException (java.io.IOException)1 TokenStream (org.apache.lucene.analysis.TokenStream)1