Search in sources :

Example 81 with BoostQuery

use of org.apache.lucene.search.BoostQuery in project lucene-solr by apache.

the class SolrPluginUtils method flattenBooleanQuery.

private static void flattenBooleanQuery(BooleanQuery.Builder to, BooleanQuery from, float fromBoost) {
    for (BooleanClause clause : from.clauses()) {
        Query cq = clause.getQuery();
        float boost = fromBoost;
        while (cq instanceof BoostQuery) {
            BoostQuery bq = (BoostQuery) cq;
            cq = bq.getQuery();
            boost *= bq.getBoost();
        }
        if (cq instanceof BooleanQuery && !clause.isRequired() && !clause.isProhibited()) {
            /* we can recurse */
            flattenBooleanQuery(to, (BooleanQuery) cq, boost);
        } else {
            to.add(clause);
        }
    }
}
Also used : BooleanClause(org.apache.lucene.search.BooleanClause) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) BoostQuery(org.apache.lucene.search.BoostQuery)

Example 82 with BoostQuery

use of org.apache.lucene.search.BoostQuery in project lucene-solr by apache.

the class MultiTermHighlighting method extractAutomata.

/**
   * Extracts MultiTermQueries that match the provided field predicate.
   * Returns equivalent automata that will match terms.
   */
public static CharacterRunAutomaton[] extractAutomata(Query query, Predicate<String> fieldMatcher, boolean lookInSpan, Function<Query, Collection<Query>> preRewriteFunc) {
    // TODO Lucene needs a Query visitor API!  LUCENE-3041
    List<CharacterRunAutomaton> list = new ArrayList<>();
    Collection<Query> customSubQueries = preRewriteFunc.apply(query);
    if (customSubQueries != null) {
        for (Query sub : customSubQueries) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (query instanceof BooleanQuery) {
        for (BooleanClause clause : (BooleanQuery) query) {
            if (!clause.isProhibited()) {
                list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
            }
        }
    } else if (query instanceof ConstantScoreQuery) {
        list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof BoostQuery) {
        list.addAll(Arrays.asList(extractAutomata(((BoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof DisjunctionMaxQuery) {
        for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanOrQuery) {
        for (Query sub : ((SpanOrQuery) query).getClauses()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanNearQuery) {
        for (Query sub : ((SpanNearQuery) query).getClauses()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanNotQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanBoostQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanBoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
        list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof PrefixQuery) {
        final PrefixQuery pq = (PrefixQuery) query;
        Term prefix = pq.getPrefix();
        if (fieldMatcher.test(prefix.field())) {
            list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()), Automata.makeAnyString())) {

                @Override
                public String toString() {
                    return pq.toString();
                }
            });
        }
    } else if (query instanceof FuzzyQuery) {
        final FuzzyQuery fq = (FuzzyQuery) query;
        if (fieldMatcher.test(fq.getField())) {
            String utf16 = fq.getTerm().text();
            int[] termText = new int[utf16.codePointCount(0, utf16.length())];
            for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
                termText[j++] = cp = utf16.codePointAt(i);
            }
            int termLength = termText.length;
            int prefixLength = Math.min(fq.getPrefixLength(), termLength);
            String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
            LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
            String prefix = UnicodeUtil.newString(termText, 0, prefixLength);
            Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix);
            list.add(new CharacterRunAutomaton(automaton) {

                @Override
                public String toString() {
                    return fq.toString();
                }
            });
        }
    } else if (query instanceof TermRangeQuery) {
        final TermRangeQuery tq = (TermRangeQuery) query;
        if (fieldMatcher.test(tq.getField())) {
            final CharsRef lowerBound;
            if (tq.getLowerTerm() == null) {
                lowerBound = null;
            } else {
                lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString());
            }
            final CharsRef upperBound;
            if (tq.getUpperTerm() == null) {
                upperBound = null;
            } else {
                upperBound = new CharsRef(tq.getUpperTerm().utf8ToString());
            }
            final boolean includeLower = tq.includesLower();
            final boolean includeUpper = tq.includesUpper();
            final CharsRef scratch = new CharsRef();
            @SuppressWarnings("deprecation") final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();
            // this is *not* an automaton, but its very simple
            list.add(new CharacterRunAutomaton(Automata.makeEmpty()) {

                @Override
                public boolean run(char[] s, int offset, int length) {
                    scratch.chars = s;
                    scratch.offset = offset;
                    scratch.length = length;
                    if (lowerBound != null) {
                        int cmp = comparator.compare(scratch, lowerBound);
                        if (cmp < 0 || (!includeLower && cmp == 0)) {
                            return false;
                        }
                    }
                    if (upperBound != null) {
                        int cmp = comparator.compare(scratch, upperBound);
                        if (cmp > 0 || (!includeUpper && cmp == 0)) {
                            return false;
                        }
                    }
                    return true;
                }

                @Override
                public String toString() {
                    return tq.toString();
                }
            });
        }
    } else if (query instanceof AutomatonQuery) {
        final AutomatonQuery aq = (AutomatonQuery) query;
        if (fieldMatcher.test(aq.getField())) {
            list.add(new CharacterRunAutomaton(aq.getAutomaton()) {

                @Override
                public String toString() {
                    return aq.toString();
                }
            });
        }
    }
    return list.toArray(new CharacterRunAutomaton[list.size()]);
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) SpanPositionCheckQuery(org.apache.lucene.search.spans.SpanPositionCheckQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) ArrayList(java.util.ArrayList) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) BoostQuery(org.apache.lucene.search.BoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) Comparator(java.util.Comparator) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) Automaton(org.apache.lucene.util.automaton.Automaton) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) LevenshteinAutomata(org.apache.lucene.util.automaton.LevenshteinAutomata) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) CharsRef(org.apache.lucene.util.CharsRef) BooleanClause(org.apache.lucene.search.BooleanClause) SpanPositionCheckQuery(org.apache.lucene.search.spans.SpanPositionCheckQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 83 with BoostQuery

use of org.apache.lucene.search.BoostQuery in project lucene-solr by apache.

the class DisjunctionMaxQueryBuilder method getQuery.

/* (non-Javadoc)
    * @see org.apache.lucene.xmlparser.QueryObjectBuilder#process(org.w3c.dom.Element)
    */
@Override
public Query getQuery(Element e) throws ParserException {
    float tieBreaker = DOMUtils.getAttribute(e, "tieBreaker", 0.0f);
    List<Query> disjuncts = new ArrayList<>();
    NodeList nl = e.getChildNodes();
    final int nlLen = nl.getLength();
    for (int i = 0; i < nlLen; i++) {
        Node node = nl.item(i);
        if (node instanceof Element) {
            // all elements are disjuncts.
            Element queryElem = (Element) node;
            Query q = factory.getQuery(queryElem);
            disjuncts.add(q);
        }
    }
    Query q = new DisjunctionMaxQuery(disjuncts, tieBreaker);
    float boost = DOMUtils.getAttribute(e, "boost", 1.0f);
    if (boost != 1f) {
        q = new BoostQuery(q, boost);
    }
    return q;
}
Also used : Query(org.apache.lucene.search.Query) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) BoostQuery(org.apache.lucene.search.BoostQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) NodeList(org.w3c.dom.NodeList) Node(org.w3c.dom.Node) Element(org.w3c.dom.Element) ArrayList(java.util.ArrayList) BoostQuery(org.apache.lucene.search.BoostQuery)

Example 84 with BoostQuery

use of org.apache.lucene.search.BoostQuery in project lucene-solr by apache.

the class FuzzyLikeThisQueryBuilder method getQuery.

@Override
public Query getQuery(Element e) throws ParserException {
    NodeList nl = e.getElementsByTagName("Field");
    int maxNumTerms = DOMUtils.getAttribute(e, "maxNumTerms", DEFAULT_MAX_NUM_TERMS);
    FuzzyLikeThisQuery fbq = new FuzzyLikeThisQuery(maxNumTerms, analyzer);
    fbq.setIgnoreTF(DOMUtils.getAttribute(e, "ignoreTF", DEFAULT_IGNORE_TF));
    final int nlLen = nl.getLength();
    for (int i = 0; i < nlLen; i++) {
        Element fieldElem = (Element) nl.item(i);
        float minSimilarity = DOMUtils.getAttribute(fieldElem, "minSimilarity", DEFAULT_MIN_SIMILARITY);
        int prefixLength = DOMUtils.getAttribute(fieldElem, "prefixLength", DEFAULT_PREFIX_LENGTH);
        String fieldName = DOMUtils.getAttributeWithInheritance(fieldElem, "fieldName");
        String value = DOMUtils.getText(fieldElem);
        fbq.addTerms(value, fieldName, minSimilarity, prefixLength);
    }
    Query q = fbq;
    float boost = DOMUtils.getAttribute(e, "boost", 1.0f);
    if (boost != 1f) {
        q = new BoostQuery(fbq, boost);
    }
    return q;
}
Also used : FuzzyLikeThisQuery(org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery) Query(org.apache.lucene.search.Query) BoostQuery(org.apache.lucene.search.BoostQuery) FuzzyLikeThisQuery(org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) NodeList(org.w3c.dom.NodeList) Element(org.w3c.dom.Element) BoostQuery(org.apache.lucene.search.BoostQuery)

Example 85 with BoostQuery

use of org.apache.lucene.search.BoostQuery in project lucene-solr by apache.

the class LikeThisQueryBuilder method getQuery.

/* (non-Javadoc)
    * @see org.apache.lucene.xmlparser.QueryObjectBuilder#process(org.w3c.dom.Element)
    */
@Override
public Query getQuery(Element e) throws ParserException {
    //a comma-delimited list of fields
    String fieldsList = e.getAttribute("fieldNames");
    String[] fields = defaultFieldNames;
    if ((fieldsList != null) && (fieldsList.trim().length() > 0)) {
        fields = fieldsList.trim().split(",");
        //trim the fieldnames
        for (int i = 0; i < fields.length; i++) {
            fields[i] = fields[i].trim();
        }
    }
    //Parse any "stopWords" attribute
    //TODO MoreLikeThis needs to ideally have per-field stopWords lists - until then
    //I use all analyzers/fields to generate multi-field compatible stop list
    String stopWords = e.getAttribute("stopWords");
    Set<String> stopWordsSet = null;
    if ((stopWords != null) && (fields != null)) {
        stopWordsSet = new HashSet<>();
        for (String field : fields) {
            try (TokenStream ts = analyzer.tokenStream(field, stopWords)) {
                CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
                ts.reset();
                while (ts.incrementToken()) {
                    stopWordsSet.add(termAtt.toString());
                }
                ts.end();
            } catch (IOException ioe) {
                throw new ParserException("IoException parsing stop words list in " + getClass().getName() + ":" + ioe.getLocalizedMessage());
            }
        }
    }
    MoreLikeThisQuery mlt = new MoreLikeThisQuery(DOMUtils.getText(e), fields, analyzer, fields[0]);
    mlt.setMaxQueryTerms(DOMUtils.getAttribute(e, "maxQueryTerms", DEFAULT_MAX_QUERY_TERMS));
    mlt.setMinTermFrequency(DOMUtils.getAttribute(e, "minTermFrequency", DEFAULT_MIN_TERM_FREQUENCY));
    mlt.setPercentTermsToMatch(DOMUtils.getAttribute(e, "percentTermsToMatch", DEFAULT_PERCENT_TERMS_TO_MATCH) / 100);
    mlt.setStopWords(stopWordsSet);
    int minDocFreq = DOMUtils.getAttribute(e, "minDocFreq", -1);
    if (minDocFreq >= 0) {
        mlt.setMinDocFreq(minDocFreq);
    }
    Query q = mlt;
    float boost = DOMUtils.getAttribute(e, "boost", 1.0f);
    if (boost != 1f) {
        q = new BoostQuery(mlt, boost);
    }
    return q;
}
Also used : ParserException(org.apache.lucene.queryparser.xml.ParserException) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) MoreLikeThisQuery(org.apache.lucene.queries.mlt.MoreLikeThisQuery) BoostQuery(org.apache.lucene.search.BoostQuery) IOException(java.io.IOException) BoostQuery(org.apache.lucene.search.BoostQuery) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) MoreLikeThisQuery(org.apache.lucene.queries.mlt.MoreLikeThisQuery)

Aggregations

BoostQuery (org.apache.lucene.search.BoostQuery)128 Query (org.apache.lucene.search.Query)107 BooleanQuery (org.apache.lucene.search.BooleanQuery)96 TermQuery (org.apache.lucene.search.TermQuery)84 Term (org.apache.lucene.index.Term)54 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)45 PhraseQuery (org.apache.lucene.search.PhraseQuery)35 DisjunctionMaxQuery (org.apache.lucene.search.DisjunctionMaxQuery)32 PrefixQuery (org.apache.lucene.search.PrefixQuery)29 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)27 MatchNoDocsQuery (org.apache.lucene.search.MatchNoDocsQuery)24 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)23 BooleanClause (org.apache.lucene.search.BooleanClause)20 MultiPhraseQuery (org.apache.lucene.search.MultiPhraseQuery)20 SynonymQuery (org.apache.lucene.search.SynonymQuery)19 WildcardQuery (org.apache.lucene.search.WildcardQuery)19 ArrayList (java.util.ArrayList)18 TermRangeQuery (org.apache.lucene.search.TermRangeQuery)18 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)17 MultiTermQuery (org.apache.lucene.search.MultiTermQuery)16