Search in sources :

Example 31 with FuzzyQuery

use of org.apache.lucene.search.FuzzyQuery in project lucene-solr by apache.

the class MultiTermHighlighting method extractAutomata.

/**
   * Extracts MultiTermQueries that match the provided field predicate.
   * Returns equivalent automata that will match terms.
   */
public static CharacterRunAutomaton[] extractAutomata(Query query, Predicate<String> fieldMatcher, boolean lookInSpan, Function<Query, Collection<Query>> preRewriteFunc) {
    // TODO Lucene needs a Query visitor API!  LUCENE-3041
    List<CharacterRunAutomaton> list = new ArrayList<>();
    Collection<Query> customSubQueries = preRewriteFunc.apply(query);
    if (customSubQueries != null) {
        for (Query sub : customSubQueries) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (query instanceof BooleanQuery) {
        for (BooleanClause clause : (BooleanQuery) query) {
            if (!clause.isProhibited()) {
                list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
            }
        }
    } else if (query instanceof ConstantScoreQuery) {
        list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof BoostQuery) {
        list.addAll(Arrays.asList(extractAutomata(((BoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof DisjunctionMaxQuery) {
        for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanOrQuery) {
        for (Query sub : ((SpanOrQuery) query).getClauses()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanNearQuery) {
        for (Query sub : ((SpanNearQuery) query).getClauses()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanNotQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanBoostQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanBoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
        list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof PrefixQuery) {
        final PrefixQuery pq = (PrefixQuery) query;
        Term prefix = pq.getPrefix();
        if (fieldMatcher.test(prefix.field())) {
            list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()), Automata.makeAnyString())) {

                @Override
                public String toString() {
                    return pq.toString();
                }
            });
        }
    } else if (query instanceof FuzzyQuery) {
        final FuzzyQuery fq = (FuzzyQuery) query;
        if (fieldMatcher.test(fq.getField())) {
            String utf16 = fq.getTerm().text();
            int[] termText = new int[utf16.codePointCount(0, utf16.length())];
            for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
                termText[j++] = cp = utf16.codePointAt(i);
            }
            int termLength = termText.length;
            int prefixLength = Math.min(fq.getPrefixLength(), termLength);
            String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
            LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
            String prefix = UnicodeUtil.newString(termText, 0, prefixLength);
            Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix);
            list.add(new CharacterRunAutomaton(automaton) {

                @Override
                public String toString() {
                    return fq.toString();
                }
            });
        }
    } else if (query instanceof TermRangeQuery) {
        final TermRangeQuery tq = (TermRangeQuery) query;
        if (fieldMatcher.test(tq.getField())) {
            final CharsRef lowerBound;
            if (tq.getLowerTerm() == null) {
                lowerBound = null;
            } else {
                lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString());
            }
            final CharsRef upperBound;
            if (tq.getUpperTerm() == null) {
                upperBound = null;
            } else {
                upperBound = new CharsRef(tq.getUpperTerm().utf8ToString());
            }
            final boolean includeLower = tq.includesLower();
            final boolean includeUpper = tq.includesUpper();
            final CharsRef scratch = new CharsRef();
            @SuppressWarnings("deprecation") final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();
            // this is *not* an automaton, but its very simple
            list.add(new CharacterRunAutomaton(Automata.makeEmpty()) {

                @Override
                public boolean run(char[] s, int offset, int length) {
                    scratch.chars = s;
                    scratch.offset = offset;
                    scratch.length = length;
                    if (lowerBound != null) {
                        int cmp = comparator.compare(scratch, lowerBound);
                        if (cmp < 0 || (!includeLower && cmp == 0)) {
                            return false;
                        }
                    }
                    if (upperBound != null) {
                        int cmp = comparator.compare(scratch, upperBound);
                        if (cmp > 0 || (!includeUpper && cmp == 0)) {
                            return false;
                        }
                    }
                    return true;
                }

                @Override
                public String toString() {
                    return tq.toString();
                }
            });
        }
    } else if (query instanceof AutomatonQuery) {
        final AutomatonQuery aq = (AutomatonQuery) query;
        if (fieldMatcher.test(aq.getField())) {
            list.add(new CharacterRunAutomaton(aq.getAutomaton()) {

                @Override
                public String toString() {
                    return aq.toString();
                }
            });
        }
    }
    return list.toArray(new CharacterRunAutomaton[list.size()]);
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) SpanPositionCheckQuery(org.apache.lucene.search.spans.SpanPositionCheckQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) ArrayList(java.util.ArrayList) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) BoostQuery(org.apache.lucene.search.BoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) Comparator(java.util.Comparator) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) Automaton(org.apache.lucene.util.automaton.Automaton) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) LevenshteinAutomata(org.apache.lucene.util.automaton.LevenshteinAutomata) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) CharsRef(org.apache.lucene.util.CharsRef) BooleanClause(org.apache.lucene.search.BooleanClause) SpanPositionCheckQuery(org.apache.lucene.search.spans.SpanPositionCheckQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 32 with FuzzyQuery

use of org.apache.lucene.search.FuzzyQuery in project lucene-solr by apache.

the class SimpleQueryParser method newFuzzyQuery.

/**
   * Factory method to generate a fuzzy query.
   */
protected Query newFuzzyQuery(String text, int fuzziness) {
    BooleanQuery.Builder bq = new BooleanQuery.Builder();
    for (Map.Entry<String, Float> entry : weights.entrySet()) {
        final String fieldName = entry.getKey();
        final BytesRef term = getAnalyzer().normalize(fieldName, text);
        Query q = new FuzzyQuery(new Term(fieldName, term), fuzziness);
        float boost = entry.getValue();
        if (boost != 1f) {
            q = new BoostQuery(q, boost);
        }
        bq.add(q, BooleanClause.Occur.SHOULD);
    }
    return simplify(bq.build());
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) QueryBuilder(org.apache.lucene.util.QueryBuilder) Term(org.apache.lucene.index.Term) BoostQuery(org.apache.lucene.search.BoostQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) Map(java.util.Map) BytesRef(org.apache.lucene.util.BytesRef)

Example 33 with FuzzyQuery

use of org.apache.lucene.search.FuzzyQuery in project lucene-solr by apache.

the class TestUnifiedHighlighter method testFieldMatcherMultiTermQuery.

public void testFieldMatcherMultiTermQuery() throws Exception {
    IndexReader ir = indexSomeFields();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighterNoFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer) {

        @Override
        protected Predicate<String> getFieldMatcher(String field) {
            // requireFieldMatch=false
            return (qf) -> true;
        }
    };
    UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer);
    BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder().add(new FuzzyQuery(new Term("text", "sime"), 1), BooleanClause.Occur.SHOULD).add(new PrefixQuery(new Term("text", "fie")), BooleanClause.Occur.SHOULD).add(new PrefixQuery(new Term("text", "thi")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("title", "is")), BooleanClause.Occur.SHOULD).add(new PrefixQuery(new Term("title", "thi")), BooleanClause.Occur.SHOULD).add(new PrefixQuery(new Term("category", "thi")), BooleanClause.Occur.SHOULD).add(new FuzzyQuery(new Term("category", "sime"), 1), BooleanClause.Occur.SHOULD).add(new PrefixQuery(new Term("category", "categ")), BooleanClause.Occur.SHOULD);
    Query query = queryBuilder.build();
    // title
    {
        TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
        assertEquals(1, topDocs.totalHits);
        String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> <b>is</b> the title <b>field</b>.", snippets[0]);
        snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
        highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
        snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> is the title <b>field</b>.", snippets[0]);
        highlighterFieldMatch.setFieldMatcher(null);
    }
    // text
    {
        TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
        assertEquals(1, topDocs.totalHits);
        String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> <b>is</b> the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
        snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> is the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
        highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
        snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> <b>is</b> the text field. ", snippets[0]);
        highlighterFieldMatch.setFieldMatcher(null);
    }
    // category
    {
        TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
        assertEquals(1, topDocs.totalHits);
        String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> <b>is</b> the <b>category</b> <b>field</b>.", snippets[0]);
        snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> is the <b>category</b> field.", snippets[0]);
        highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
        snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
        highlighterFieldMatch.setFieldMatcher(null);
    }
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) Arrays(java.util.Arrays) ParametersFactory(com.carrotsearch.randomizedtesting.annotations.ParametersFactory) ScoreDoc(org.apache.lucene.search.ScoreDoc) SuppressCodecs(org.apache.lucene.util.LuceneTestCase.SuppressCodecs) FieldType(org.apache.lucene.document.FieldType) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Term(org.apache.lucene.index.Term) PhraseQuery(org.apache.lucene.search.PhraseQuery) Document(org.apache.lucene.document.Document) Map(java.util.Map) Directory(org.apache.lucene.store.Directory) After(org.junit.After) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) Before(org.junit.Before) TopDocs(org.apache.lucene.search.TopDocs) Predicate(java.util.function.Predicate) Sort(org.apache.lucene.search.Sort) PrefixQuery(org.apache.lucene.search.PrefixQuery) IOException(java.io.IOException) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) InputStreamReader(java.io.InputStreamReader) StandardCharsets(java.nio.charset.StandardCharsets) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BooleanClause(org.apache.lucene.search.BooleanClause) List(java.util.List) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BreakIterator(java.text.BreakIterator) Field(org.apache.lucene.document.Field) LuceneTestCase(org.apache.lucene.util.LuceneTestCase) BufferedReader(java.io.BufferedReader) IndexOptions(org.apache.lucene.index.IndexOptions) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Collections(java.util.Collections) IndexReader(org.apache.lucene.index.IndexReader) IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Term(org.apache.lucene.index.Term) TopDocs(org.apache.lucene.search.TopDocs) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) IndexReader(org.apache.lucene.index.IndexReader)

Example 34 with FuzzyQuery

use of org.apache.lucene.search.FuzzyQuery in project fess by codelibs.

the class QueryHelper method convertFuzzyQuery.

protected QueryBuilder convertFuzzyQuery(final QueryContext context, final FuzzyQuery fuzzyQuery, final float boost) {
    final Term term = fuzzyQuery.getTerm();
    final String field = term.field();
    // TODO fuzzy value
    if (Constants.DEFAULT_FIELD.equals(field)) {
        context.addFieldLog(field, term.text());
        return buildDefaultQueryBuilder((f, b) -> QueryBuilders.fuzzyQuery(f, term.text()).fuzziness(Fuzziness.fromEdits(fuzzyQuery.getMaxEdits())).boost(b * boost));
    } else if (isSearchField(field)) {
        context.addFieldLog(field, term.text());
        return QueryBuilders.fuzzyQuery(field, term.text()).boost(boost).fuzziness(Fuzziness.fromEdits(fuzzyQuery.getMaxEdits()));
    } else {
        final String origQuery = fuzzyQuery.toString();
        context.addFieldLog(Constants.DEFAULT_FIELD, origQuery);
        context.addHighlightedQuery(origQuery);
        return buildDefaultQueryBuilder((f, b) -> QueryBuilders.fuzzyQuery(f, origQuery).fuzziness(Fuzziness.fromEdits(fuzzyQuery.getMaxEdits())).boost(b * boost));
    }
}
Also used : Query(org.apache.lucene.search.Query) Constants(org.codelibs.fess.Constants) SortBuilders(org.elasticsearch.search.sort.SortBuilders) OptionalThing(org.dbflute.optional.OptionalThing) Term(org.apache.lucene.index.Term) PhraseQuery(org.apache.lucene.search.PhraseQuery) QueryBuilders(org.elasticsearch.index.query.QueryBuilders) ScoreFunctionBuilders(org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders) GeoInfo(org.codelibs.fess.entity.GeoInfo) QueryContext(org.codelibs.fess.entity.QueryContext) FessConfig(org.codelibs.fess.mylasta.direction.FessConfig) Fuzziness(org.elasticsearch.common.unit.Fuzziness) Locale(java.util.Locale) Map(java.util.Map) BytesRef(org.apache.lucene.util.BytesRef) RangeQueryBuilder(org.elasticsearch.index.query.RangeQueryBuilder) Resource(javax.annotation.Resource) Set(java.util.Set) PrefixQuery(org.apache.lucene.search.PrefixQuery) UUID(java.util.UUID) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) FieldSortBuilder(org.elasticsearch.search.sort.FieldSortBuilder) WildcardQuery(org.apache.lucene.search.WildcardQuery) List(java.util.List) Stream(java.util.stream.Stream) ComponentUtil(org.codelibs.fess.util.ComponentUtil) PostConstruct(javax.annotation.PostConstruct) SortOrder(org.elasticsearch.search.sort.SortOrder) BoolQueryBuilder(org.elasticsearch.index.query.BoolQueryBuilder) ParseException(org.apache.lucene.queryparser.classic.ParseException) HashMap(java.util.HashMap) LaRequestUtil(org.lastaflute.web.util.LaRequestUtil) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SearchRequestType(org.codelibs.fess.entity.SearchRequestParams.SearchRequestType) FacetInfo(org.codelibs.fess.entity.FacetInfo) SortBuilder(org.elasticsearch.search.sort.SortBuilder) UserMessages(org.lastaflute.core.message.UserMessages) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) StreamUtil.stream(org.codelibs.core.stream.StreamUtil.stream) StringUtil(org.codelibs.core.lang.StringUtil) BooleanClause(org.apache.lucene.search.BooleanClause) Consumer(java.util.function.Consumer) TermQuery(org.apache.lucene.search.TermQuery) FilterFunctionBuilder(org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder.FilterFunctionBuilder) BooleanQuery(org.apache.lucene.search.BooleanQuery) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) BoostQuery(org.apache.lucene.search.BoostQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) InvalidQueryException(org.codelibs.fess.exception.InvalidQueryException) Term(org.apache.lucene.index.Term)

Aggregations

FuzzyQuery (org.apache.lucene.search.FuzzyQuery)34 Term (org.apache.lucene.index.Term)26 PrefixQuery (org.apache.lucene.search.PrefixQuery)20 BooleanQuery (org.apache.lucene.search.BooleanQuery)17 BoostQuery (org.apache.lucene.search.BoostQuery)16 Query (org.apache.lucene.search.Query)16 TermQuery (org.apache.lucene.search.TermQuery)12 WildcardQuery (org.apache.lucene.search.WildcardQuery)12 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)8 PhraseQuery (org.apache.lucene.search.PhraseQuery)8 RegexpQuery (org.apache.lucene.search.RegexpQuery)7 MatchNoDocsQuery (org.apache.lucene.search.MatchNoDocsQuery)6 TermRangeQuery (org.apache.lucene.search.TermRangeQuery)6 BooleanClause (org.apache.lucene.search.BooleanClause)5 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)5 DisjunctionMaxQuery (org.apache.lucene.search.DisjunctionMaxQuery)5 TopDocs (org.apache.lucene.search.TopDocs)5 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)5 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)5 Map (java.util.Map)4