Search in sources :

Example 1 with AutomatonQuery

use of org.apache.lucene.search.AutomatonQuery in project lucene-solr by apache.

the class TestReversedWildcardFilterFactory method wasReversed.

/** fragile assert: depends on our implementation, but cleanest way to check for now */
private boolean wasReversed(SolrQueryParser qp, String query) throws Exception {
    Query q = qp.parse(query);
    if (!(q instanceof AutomatonQuery)) {
        return false;
    }
    Automaton automaton = ((AutomatonQuery) q).getAutomaton();
    String prefix = Operations.getCommonPrefix(Operations.determinize(automaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES));
    return prefix.length() > 0 && prefix.charAt(0) == '';
}
Also used : AutomatonQuery(org.apache.lucene.search.AutomatonQuery) Query(org.apache.lucene.search.Query) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) Automaton(org.apache.lucene.util.automaton.Automaton)

Example 2 with AutomatonQuery

use of org.apache.lucene.search.AutomatonQuery in project lucene-solr by apache.

the class TestTermsEnum2 method testFiniteVersusInfinite.

/** tests a pre-intersected automaton against the original */
public void testFiniteVersusInfinite() throws Exception {
    for (int i = 0; i < numIterations; i++) {
        String reg = AutomatonTestUtil.randomRegexp(random());
        Automaton automaton = Operations.determinize(new RegExp(reg, RegExp.NONE).toAutomaton(), DEFAULT_MAX_DETERMINIZED_STATES);
        final List<BytesRef> matchedTerms = new ArrayList<>();
        for (BytesRef t : terms) {
            if (Operations.run(automaton, t.utf8ToString())) {
                matchedTerms.add(t);
            }
        }
        Automaton alternate = Automata.makeStringUnion(matchedTerms);
        //System.out.println("match " + matchedTerms.size() + " " + alternate.getNumberOfStates() + " states, sigma=" + alternate.getStartPoints().length);
        //AutomatonTestUtil.minimizeSimple(alternate);
        //System.out.println("minimize done");
        AutomatonQuery a1 = new AutomatonQuery(new Term("field", ""), automaton);
        AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate, Integer.MAX_VALUE);
        ScoreDoc[] origHits = searcher.search(a1, 25).scoreDocs;
        ScoreDoc[] newHits = searcher.search(a2, 25).scoreDocs;
        CheckHits.checkEqual(a1, origHits, newHits);
    }
}
Also used : AutomatonQuery(org.apache.lucene.search.AutomatonQuery) ArrayList(java.util.ArrayList) BytesRef(org.apache.lucene.util.BytesRef) ScoreDoc(org.apache.lucene.search.ScoreDoc)

Example 3 with AutomatonQuery

use of org.apache.lucene.search.AutomatonQuery in project lucene-solr by apache.

the class MultiTermHighlighting method extractAutomata.

/**
   * Extracts MultiTermQueries that match the provided field predicate.
   * Returns equivalent automata that will match terms.
   */
public static CharacterRunAutomaton[] extractAutomata(Query query, Predicate<String> fieldMatcher, boolean lookInSpan, Function<Query, Collection<Query>> preRewriteFunc) {
    // TODO Lucene needs a Query visitor API!  LUCENE-3041
    List<CharacterRunAutomaton> list = new ArrayList<>();
    Collection<Query> customSubQueries = preRewriteFunc.apply(query);
    if (customSubQueries != null) {
        for (Query sub : customSubQueries) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (query instanceof BooleanQuery) {
        for (BooleanClause clause : (BooleanQuery) query) {
            if (!clause.isProhibited()) {
                list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
            }
        }
    } else if (query instanceof ConstantScoreQuery) {
        list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof BoostQuery) {
        list.addAll(Arrays.asList(extractAutomata(((BoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof DisjunctionMaxQuery) {
        for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanOrQuery) {
        for (Query sub : ((SpanOrQuery) query).getClauses()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanNearQuery) {
        for (Query sub : ((SpanNearQuery) query).getClauses()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanNotQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanBoostQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanBoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
        list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof PrefixQuery) {
        final PrefixQuery pq = (PrefixQuery) query;
        Term prefix = pq.getPrefix();
        if (fieldMatcher.test(prefix.field())) {
            list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()), Automata.makeAnyString())) {

                @Override
                public String toString() {
                    return pq.toString();
                }
            });
        }
    } else if (query instanceof FuzzyQuery) {
        final FuzzyQuery fq = (FuzzyQuery) query;
        if (fieldMatcher.test(fq.getField())) {
            String utf16 = fq.getTerm().text();
            int[] termText = new int[utf16.codePointCount(0, utf16.length())];
            for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
                termText[j++] = cp = utf16.codePointAt(i);
            }
            int termLength = termText.length;
            int prefixLength = Math.min(fq.getPrefixLength(), termLength);
            String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
            LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
            String prefix = UnicodeUtil.newString(termText, 0, prefixLength);
            Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix);
            list.add(new CharacterRunAutomaton(automaton) {

                @Override
                public String toString() {
                    return fq.toString();
                }
            });
        }
    } else if (query instanceof TermRangeQuery) {
        final TermRangeQuery tq = (TermRangeQuery) query;
        if (fieldMatcher.test(tq.getField())) {
            final CharsRef lowerBound;
            if (tq.getLowerTerm() == null) {
                lowerBound = null;
            } else {
                lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString());
            }
            final CharsRef upperBound;
            if (tq.getUpperTerm() == null) {
                upperBound = null;
            } else {
                upperBound = new CharsRef(tq.getUpperTerm().utf8ToString());
            }
            final boolean includeLower = tq.includesLower();
            final boolean includeUpper = tq.includesUpper();
            final CharsRef scratch = new CharsRef();
            @SuppressWarnings("deprecation") final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();
            // this is *not* an automaton, but its very simple
            list.add(new CharacterRunAutomaton(Automata.makeEmpty()) {

                @Override
                public boolean run(char[] s, int offset, int length) {
                    scratch.chars = s;
                    scratch.offset = offset;
                    scratch.length = length;
                    if (lowerBound != null) {
                        int cmp = comparator.compare(scratch, lowerBound);
                        if (cmp < 0 || (!includeLower && cmp == 0)) {
                            return false;
                        }
                    }
                    if (upperBound != null) {
                        int cmp = comparator.compare(scratch, upperBound);
                        if (cmp > 0 || (!includeUpper && cmp == 0)) {
                            return false;
                        }
                    }
                    return true;
                }

                @Override
                public String toString() {
                    return tq.toString();
                }
            });
        }
    } else if (query instanceof AutomatonQuery) {
        final AutomatonQuery aq = (AutomatonQuery) query;
        if (fieldMatcher.test(aq.getField())) {
            list.add(new CharacterRunAutomaton(aq.getAutomaton()) {

                @Override
                public String toString() {
                    return aq.toString();
                }
            });
        }
    }
    return list.toArray(new CharacterRunAutomaton[list.size()]);
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) SpanPositionCheckQuery(org.apache.lucene.search.spans.SpanPositionCheckQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) ArrayList(java.util.ArrayList) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) BoostQuery(org.apache.lucene.search.BoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) Comparator(java.util.Comparator) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) Automaton(org.apache.lucene.util.automaton.Automaton) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) LevenshteinAutomata(org.apache.lucene.util.automaton.LevenshteinAutomata) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) CharsRef(org.apache.lucene.util.CharsRef) BooleanClause(org.apache.lucene.search.BooleanClause) SpanPositionCheckQuery(org.apache.lucene.search.spans.SpanPositionCheckQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 4 with AutomatonQuery

use of org.apache.lucene.search.AutomatonQuery in project lucene-solr by apache.

the class SolrQueryParserBase method getWildcardQuery.

// called from parser
protected Query getWildcardQuery(String field, String termStr) throws SyntaxError {
    checkNullField(field);
    // *:* -> MatchAllDocsQuery
    if ("*".equals(termStr)) {
        if ("*".equals(field) || getExplicitField() == null) {
            return newMatchAllDocsQuery();
        }
    }
    FieldType fieldType = schema.getFieldType(field);
    termStr = analyzeIfMultitermTermText(field, termStr, fieldType);
    // can we use reversed wildcards in this field?
    ReversedWildcardFilterFactory factory = getReversedWildcardFilterFactory(fieldType);
    if (factory != null) {
        Term term = new Term(field, termStr);
        // fsa representing the query
        Automaton automaton = WildcardQuery.toAutomaton(term);
        // TODO: we should likely use the automaton to calculate shouldReverse, too.
        if (factory.shouldReverse(termStr)) {
            automaton = Operations.concatenate(automaton, Automata.makeChar(factory.getMarkerChar()));
            automaton = Operations.reverse(automaton);
        } else {
            // reverse wildcardfilter is active: remove false positives
            // fsa representing false positives (markerChar*)
            Automaton falsePositives = Operations.concatenate(Automata.makeChar(factory.getMarkerChar()), Automata.makeAnyString());
            // subtract these away
            automaton = Operations.minus(automaton, falsePositives, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
        }
        return new AutomatonQuery(term, automaton) {

            // override toString so it's completely transparent
            @Override
            public String toString(String field) {
                StringBuilder buffer = new StringBuilder();
                if (!getField().equals(field)) {
                    buffer.append(getField());
                    buffer.append(":");
                }
                buffer.append(term.text());
                return buffer.toString();
            }
        };
    }
    // Solr has always used constant scoring for wildcard queries.  This should return constant scoring by default.
    return newWildcardQuery(new Term(field, termStr));
}
Also used : AutomatonQuery(org.apache.lucene.search.AutomatonQuery) Automaton(org.apache.lucene.util.automaton.Automaton) ReversedWildcardFilterFactory(org.apache.solr.analysis.ReversedWildcardFilterFactory) Term(org.apache.lucene.index.Term) FieldType(org.apache.solr.schema.FieldType)

Aggregations

AutomatonQuery (org.apache.lucene.search.AutomatonQuery)4 Automaton (org.apache.lucene.util.automaton.Automaton)3 ArrayList (java.util.ArrayList)2 Term (org.apache.lucene.index.Term)2 Query (org.apache.lucene.search.Query)2 Comparator (java.util.Comparator)1 BooleanClause (org.apache.lucene.search.BooleanClause)1 BooleanQuery (org.apache.lucene.search.BooleanQuery)1 BoostQuery (org.apache.lucene.search.BoostQuery)1 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)1 DisjunctionMaxQuery (org.apache.lucene.search.DisjunctionMaxQuery)1 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)1 PrefixQuery (org.apache.lucene.search.PrefixQuery)1 ScoreDoc (org.apache.lucene.search.ScoreDoc)1 TermRangeQuery (org.apache.lucene.search.TermRangeQuery)1 SpanBoostQuery (org.apache.lucene.search.spans.SpanBoostQuery)1 SpanMultiTermQueryWrapper (org.apache.lucene.search.spans.SpanMultiTermQueryWrapper)1 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)1 SpanNotQuery (org.apache.lucene.search.spans.SpanNotQuery)1 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)1