Search in sources :

Example 1 with SuggestWord

use of org.apache.lucene.search.spell.SuggestWord in project elasticsearch by elastic.

the class TermSuggester method innerExecute.

@Override
public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
    DirectSpellChecker directSpellChecker = suggestion.getDirectSpellCheckerSettings().createDirectSpellChecker();
    final IndexReader indexReader = searcher.getIndexReader();
    TermSuggestion response = new TermSuggestion(name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort());
    List<Token> tokens = queryTerms(suggestion, spare);
    for (Token token : tokens) {
        // TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef
        SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar(token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode());
        Text key = new Text(new BytesArray(token.term.bytes()));
        TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset);
        for (SuggestWord suggestWord : suggestedWords) {
            Text word = new Text(suggestWord.string);
            resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score));
        }
        response.addTerm(resultEntry);
    }
    return response;
}
Also used : BytesArray(org.elasticsearch.common.bytes.BytesArray) IndexReader(org.apache.lucene.index.IndexReader) SuggestWord(org.apache.lucene.search.spell.SuggestWord) Text(org.elasticsearch.common.text.Text) DirectSpellChecker(org.apache.lucene.search.spell.DirectSpellChecker)

Example 2 with SuggestWord

use of org.apache.lucene.search.spell.SuggestWord in project elasticsearch by elastic.

the class DirectSpellcheckerSettings method createDirectSpellChecker.

public DirectSpellChecker createDirectSpellChecker() {
    DirectSpellChecker directSpellChecker = new DirectSpellChecker();
    directSpellChecker.setAccuracy(accuracy());
    Comparator<SuggestWord> comparator;
    switch(sort()) {
        case SCORE:
            comparator = SCORE_COMPARATOR;
            break;
        case FREQUENCY:
            comparator = LUCENE_FREQUENCY;
            break;
        default:
            throw new IllegalArgumentException("Illegal suggest sort: " + sort());
    }
    directSpellChecker.setComparator(comparator);
    directSpellChecker.setDistance(stringDistance());
    directSpellChecker.setMaxEdits(maxEdits());
    directSpellChecker.setMaxInspections(maxInspections());
    directSpellChecker.setMaxQueryFrequency(maxTermFreq());
    directSpellChecker.setMinPrefix(prefixLength());
    directSpellChecker.setMinQueryLength(minWordLength());
    directSpellChecker.setThresholdFrequency(minDocFreq());
    directSpellChecker.setLowerCaseTerms(false);
    return directSpellChecker;
}
Also used : SuggestWord(org.apache.lucene.search.spell.SuggestWord) DirectSpellChecker(org.apache.lucene.search.spell.DirectSpellChecker)

Example 3 with SuggestWord

use of org.apache.lucene.search.spell.SuggestWord in project elasticsearch by elastic.

the class DirectCandidateGenerator method drawCandidates.

@Override
public CandidateSet drawCandidates(CandidateSet set) throws IOException {
    Candidate original = set.originalTerm;
    BytesRef term = preFilter(original.term, spare, byteSpare);
    final long frequency = original.frequency;
    spellchecker.setThresholdFrequency(this.suggestMode == SuggestMode.SUGGEST_ALWAYS ? 0 : thresholdFrequency(frequency, dictSize));
    SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode);
    List<Candidate> candidates = new ArrayList<>(suggestSimilar.length);
    for (int i = 0; i < suggestSimilar.length; i++) {
        SuggestWord suggestWord = suggestSimilar[i];
        BytesRef candidate = new BytesRef(suggestWord.string);
        postFilter(new Candidate(candidate, internalFrequency(candidate), suggestWord.score, score(suggestWord.freq, suggestWord.score, dictSize), false), spare, byteSpare, candidates);
    }
    set.addCandidates(candidates);
    return set;
}
Also used : SuggestWord(org.apache.lucene.search.spell.SuggestWord) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) BytesRef(org.apache.lucene.util.BytesRef)

Example 4 with SuggestWord

use of org.apache.lucene.search.spell.SuggestWord in project lucene-solr by apache.

the class SpellCheckComponent method collectShardSuggestions.

@SuppressWarnings("unchecked")
private void collectShardSuggestions(NamedList nl, SpellCheckMergeData mergeData) {
    SpellCheckResponse spellCheckResp = new SpellCheckResponse(nl);
    Iterable<Object> originalTermStrings = (Iterable<Object>) nl.get("originalTerms");
    if (originalTermStrings != null) {
        mergeData.originalTerms = new HashSet<>();
        for (Object originalTermObj : originalTermStrings) {
            mergeData.originalTerms.add(originalTermObj.toString());
        }
    }
    for (SpellCheckResponse.Suggestion suggestion : spellCheckResp.getSuggestions()) {
        mergeData.origVsSuggestion.put(suggestion.getToken(), suggestion);
        HashSet<String> suggested = mergeData.origVsSuggested.get(suggestion.getToken());
        if (suggested == null) {
            suggested = new HashSet<>();
            mergeData.origVsSuggested.put(suggestion.getToken(), suggested);
        }
        // sum up original frequency          
        int origFreq = 0;
        Integer o = mergeData.origVsFreq.get(suggestion.getToken());
        if (o != null)
            origFreq += o;
        origFreq += suggestion.getOriginalFrequency();
        mergeData.origVsFreq.put(suggestion.getToken(), origFreq);
        //# shards reporting
        Integer origShards = mergeData.origVsShards.get(suggestion.getToken());
        if (origShards == null) {
            mergeData.origVsShards.put(suggestion.getToken(), 1);
        } else {
            mergeData.origVsShards.put(suggestion.getToken(), ++origShards);
        }
        // find best suggestions
        for (int i = 0; i < suggestion.getNumFound(); i++) {
            String alternative = suggestion.getAlternatives().get(i);
            suggested.add(alternative);
            SuggestWord sug = mergeData.suggestedVsWord.get(alternative);
            if (sug == null) {
                sug = new SuggestWord();
                mergeData.suggestedVsWord.put(alternative, sug);
            }
            sug.string = alternative;
            // alternative frequency is present only for extendedResults=true
            if (suggestion.getAlternativeFrequencies() != null && suggestion.getAlternativeFrequencies().size() > 0) {
                Integer freq = suggestion.getAlternativeFrequencies().get(i);
                if (freq != null)
                    sug.freq += freq;
            }
        }
    }
}
Also used : SuggestWord(org.apache.lucene.search.spell.SuggestWord) SpellCheckResponse(org.apache.solr.client.solrj.response.SpellCheckResponse)

Example 5 with SuggestWord

use of org.apache.lucene.search.spell.SuggestWord in project lucene-solr by apache.

the class AbstractLuceneSpellChecker method init.

@Override
public String init(NamedList config, SolrCore core) {
    super.init(config, core);
    indexDir = (String) config.get(INDEX_DIR);
    String accuracy = (String) config.get(ACCURACY);
    //If indexDir is relative then create index inside core.getDataDir()
    if (indexDir != null) {
        if (!new File(indexDir).isAbsolute()) {
            indexDir = core.getDataDir() + File.separator + indexDir;
        }
    }
    sourceLocation = (String) config.get(LOCATION);
    String compClass = (String) config.get(COMPARATOR_CLASS);
    Comparator<SuggestWord> comp = null;
    if (compClass != null) {
        if (compClass.equalsIgnoreCase(SCORE_COMP)) {
            comp = SuggestWordQueue.DEFAULT_COMPARATOR;
        } else if (compClass.equalsIgnoreCase(FREQ_COMP)) {
            comp = new SuggestWordFrequencyComparator();
        } else {
            //must be a FQCN
            comp = (Comparator<SuggestWord>) core.getResourceLoader().newInstance(compClass, Comparator.class);
        }
    } else {
        comp = SuggestWordQueue.DEFAULT_COMPARATOR;
    }
    String strDistanceName = (String) config.get(STRING_DISTANCE);
    if (strDistanceName != null) {
        sd = core.getResourceLoader().newInstance(strDistanceName, StringDistance.class);
    //TODO: Figure out how to configure options.  Where's Spring when you need it?  Or at least BeanUtils...
    } else {
        sd = new LevensteinDistance();
    }
    try {
        initIndex();
        spellChecker = new SpellChecker(index, sd, comp);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    if (accuracy != null) {
        try {
            this.accuracy = Float.parseFloat(accuracy);
            spellChecker.setAccuracy(this.accuracy);
        } catch (NumberFormatException e) {
            throw new RuntimeException("Unparseable accuracy given for dictionary: " + name, e);
        }
    }
    return name;
}
Also used : SuggestWordFrequencyComparator(org.apache.lucene.search.spell.SuggestWordFrequencyComparator) StringDistance(org.apache.lucene.search.spell.StringDistance) SuggestWord(org.apache.lucene.search.spell.SuggestWord) SpellChecker(org.apache.lucene.search.spell.SpellChecker) IOException(java.io.IOException) File(java.io.File) LevensteinDistance(org.apache.lucene.search.spell.LevensteinDistance) SuggestWordFrequencyComparator(org.apache.lucene.search.spell.SuggestWordFrequencyComparator) Comparator(java.util.Comparator)

Aggregations

SuggestWord (org.apache.lucene.search.spell.SuggestWord)12 ArrayList (java.util.ArrayList)4 IndexReader (org.apache.lucene.index.IndexReader)4 IOException (java.io.IOException)3 Token (org.apache.lucene.analysis.Token)3 Term (org.apache.lucene.index.Term)3 SuggestWordFrequencyComparator (org.apache.lucene.search.spell.SuggestWordFrequencyComparator)3 AbstractIterator (com.google.common.collect.AbstractIterator)2 Deque (java.util.Deque)2 HashSet (java.util.HashSet)2 Map (java.util.Map)2 Set (java.util.Set)2 PathStoredFieldVisitor (org.apache.jackrabbit.oak.plugins.index.lucene.util.PathStoredFieldVisitor)2 SpellcheckHelper (org.apache.jackrabbit.oak.plugins.index.lucene.util.SpellcheckHelper)2 SuggestHelper (org.apache.jackrabbit.oak.plugins.index.lucene.util.SuggestHelper)2 Filter (org.apache.jackrabbit.oak.spi.query.Filter)2 PropertyRestriction (org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction)2 QueryLimits (org.apache.jackrabbit.oak.spi.query.QueryLimits)2 Analyzer (org.apache.lucene.analysis.Analyzer)2 Document (org.apache.lucene.document.Document)2