Search in sources :

Example 41 with DisjunctionMaxQuery

use of org.apache.lucene.search.DisjunctionMaxQuery in project lucene-solr by apache.

the class SolrPluginUtils method setMinShouldMatch.

/**
   * Checks the number of optional clauses in the query, and compares it
   * with the specification string to determine the proper value to use.
   * <p>
   * If mmAutoRelax=true, we'll perform auto relaxation of mm if tokens
   * are removed from some but not all DisMax clauses, as can happen when
   * stopwords or punctuation tokens are removed in analysis.
   * </p>
   * <p>
   * Details about the specification format can be found
   * <a href="doc-files/min-should-match.html">here</a>
   * </p>
   *
   * <p>A few important notes...</p>
   * <ul>
   * <li>
   * If the calculations based on the specification determine that no
   * optional clauses are needed, BooleanQuerysetMinMumberShouldMatch
   * will never be called, but the usual rules about BooleanQueries
   * still apply at search time (a BooleanQuery containing no required
   * clauses must still match at least one optional clause)
   * <li>
   * <li>
   * No matter what number the calculation arrives at,
   * BooleanQuery.setMinShouldMatch() will never be called with a
   * value greater then the number of optional clauses (or less then 1)
   * </li>
   * </ul>
   *
   * <p>:TODO: should optimize the case where number is same
   * as clauses to just make them all "required"
   * </p>
   *
   * @param q The query as a BooleanQuery.Builder
   * @param spec The mm spec
   * @param mmAutoRelax whether to perform auto relaxation of mm if tokens are removed from some but not all DisMax clauses
   */
public static void setMinShouldMatch(BooleanQuery.Builder q, String spec, boolean mmAutoRelax) {
    int optionalClauses = 0;
    int maxDisjunctsSize = 0;
    int optionalDismaxClauses = 0;
    for (BooleanClause c : q.build().clauses()) {
        if (c.getOccur() == Occur.SHOULD) {
            if (mmAutoRelax && c.getQuery() instanceof DisjunctionMaxQuery) {
                int numDisjuncts = ((DisjunctionMaxQuery) c.getQuery()).getDisjuncts().size();
                if (numDisjuncts > maxDisjunctsSize) {
                    maxDisjunctsSize = numDisjuncts;
                    optionalDismaxClauses = 1;
                } else if (numDisjuncts == maxDisjunctsSize) {
                    optionalDismaxClauses++;
                }
            } else {
                optionalClauses++;
            }
        }
    }
    int msm = calculateMinShouldMatch(optionalClauses + optionalDismaxClauses, spec);
    if (0 < msm) {
        q.setMinimumNumberShouldMatch(msm);
    }
}
Also used : BooleanClause(org.apache.lucene.search.BooleanClause) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery)

Example 42 with DisjunctionMaxQuery

use of org.apache.lucene.search.DisjunctionMaxQuery in project lucene-solr by apache.

the class MultiTermHighlighting method extractAutomata.

/**
   * Extracts MultiTermQueries that match the provided field predicate.
   * Returns equivalent automata that will match terms.
   */
public static CharacterRunAutomaton[] extractAutomata(Query query, Predicate<String> fieldMatcher, boolean lookInSpan, Function<Query, Collection<Query>> preRewriteFunc) {
    // TODO Lucene needs a Query visitor API!  LUCENE-3041
    List<CharacterRunAutomaton> list = new ArrayList<>();
    Collection<Query> customSubQueries = preRewriteFunc.apply(query);
    if (customSubQueries != null) {
        for (Query sub : customSubQueries) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (query instanceof BooleanQuery) {
        for (BooleanClause clause : (BooleanQuery) query) {
            if (!clause.isProhibited()) {
                list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
            }
        }
    } else if (query instanceof ConstantScoreQuery) {
        list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof BoostQuery) {
        list.addAll(Arrays.asList(extractAutomata(((BoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof DisjunctionMaxQuery) {
        for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanOrQuery) {
        for (Query sub : ((SpanOrQuery) query).getClauses()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanNearQuery) {
        for (Query sub : ((SpanNearQuery) query).getClauses()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanNotQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanBoostQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanBoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
        list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof PrefixQuery) {
        final PrefixQuery pq = (PrefixQuery) query;
        Term prefix = pq.getPrefix();
        if (fieldMatcher.test(prefix.field())) {
            list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()), Automata.makeAnyString())) {

                @Override
                public String toString() {
                    return pq.toString();
                }
            });
        }
    } else if (query instanceof FuzzyQuery) {
        final FuzzyQuery fq = (FuzzyQuery) query;
        if (fieldMatcher.test(fq.getField())) {
            String utf16 = fq.getTerm().text();
            int[] termText = new int[utf16.codePointCount(0, utf16.length())];
            for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
                termText[j++] = cp = utf16.codePointAt(i);
            }
            int termLength = termText.length;
            int prefixLength = Math.min(fq.getPrefixLength(), termLength);
            String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
            LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
            String prefix = UnicodeUtil.newString(termText, 0, prefixLength);
            Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix);
            list.add(new CharacterRunAutomaton(automaton) {

                @Override
                public String toString() {
                    return fq.toString();
                }
            });
        }
    } else if (query instanceof TermRangeQuery) {
        final TermRangeQuery tq = (TermRangeQuery) query;
        if (fieldMatcher.test(tq.getField())) {
            final CharsRef lowerBound;
            if (tq.getLowerTerm() == null) {
                lowerBound = null;
            } else {
                lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString());
            }
            final CharsRef upperBound;
            if (tq.getUpperTerm() == null) {
                upperBound = null;
            } else {
                upperBound = new CharsRef(tq.getUpperTerm().utf8ToString());
            }
            final boolean includeLower = tq.includesLower();
            final boolean includeUpper = tq.includesUpper();
            final CharsRef scratch = new CharsRef();
            @SuppressWarnings("deprecation") final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();
            // this is *not* an automaton, but its very simple
            list.add(new CharacterRunAutomaton(Automata.makeEmpty()) {

                @Override
                public boolean run(char[] s, int offset, int length) {
                    scratch.chars = s;
                    scratch.offset = offset;
                    scratch.length = length;
                    if (lowerBound != null) {
                        int cmp = comparator.compare(scratch, lowerBound);
                        if (cmp < 0 || (!includeLower && cmp == 0)) {
                            return false;
                        }
                    }
                    if (upperBound != null) {
                        int cmp = comparator.compare(scratch, upperBound);
                        if (cmp > 0 || (!includeUpper && cmp == 0)) {
                            return false;
                        }
                    }
                    return true;
                }

                @Override
                public String toString() {
                    return tq.toString();
                }
            });
        }
    } else if (query instanceof AutomatonQuery) {
        final AutomatonQuery aq = (AutomatonQuery) query;
        if (fieldMatcher.test(aq.getField())) {
            list.add(new CharacterRunAutomaton(aq.getAutomaton()) {

                @Override
                public String toString() {
                    return aq.toString();
                }
            });
        }
    }
    return list.toArray(new CharacterRunAutomaton[list.size()]);
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) SpanPositionCheckQuery(org.apache.lucene.search.spans.SpanPositionCheckQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) ArrayList(java.util.ArrayList) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) BoostQuery(org.apache.lucene.search.BoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) Comparator(java.util.Comparator) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) Automaton(org.apache.lucene.util.automaton.Automaton) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) LevenshteinAutomata(org.apache.lucene.util.automaton.LevenshteinAutomata) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) CharsRef(org.apache.lucene.util.CharsRef) BooleanClause(org.apache.lucene.search.BooleanClause) SpanPositionCheckQuery(org.apache.lucene.search.spans.SpanPositionCheckQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 43 with DisjunctionMaxQuery

use of org.apache.lucene.search.DisjunctionMaxQuery in project lucene-solr by apache.

the class DisjunctionMaxQueryBuilder method getQuery.

/* (non-Javadoc)
    * @see org.apache.lucene.xmlparser.QueryObjectBuilder#process(org.w3c.dom.Element)
    */
@Override
public Query getQuery(Element e) throws ParserException {
    float tieBreaker = DOMUtils.getAttribute(e, "tieBreaker", 0.0f);
    List<Query> disjuncts = new ArrayList<>();
    NodeList nl = e.getChildNodes();
    final int nlLen = nl.getLength();
    for (int i = 0; i < nlLen; i++) {
        Node node = nl.item(i);
        if (node instanceof Element) {
            // all elements are disjuncts.
            Element queryElem = (Element) node;
            Query q = factory.getQuery(queryElem);
            disjuncts.add(q);
        }
    }
    Query q = new DisjunctionMaxQuery(disjuncts, tieBreaker);
    float boost = DOMUtils.getAttribute(e, "boost", 1.0f);
    if (boost != 1f) {
        q = new BoostQuery(q, boost);
    }
    return q;
}
Also used : Query(org.apache.lucene.search.Query) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) BoostQuery(org.apache.lucene.search.BoostQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) NodeList(org.w3c.dom.NodeList) Node(org.w3c.dom.Node) Element(org.w3c.dom.Element) ArrayList(java.util.ArrayList) BoostQuery(org.apache.lucene.search.BoostQuery)

Example 44 with DisjunctionMaxQuery

use of org.apache.lucene.search.DisjunctionMaxQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterMTQ method testWildcardInDisjunctionMax.

public void testWildcardInDisjunctionMax() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "", fieldType);
    Document doc = new Document();
    doc.add(body);
    body.setStringValue("This is a test.");
    iw.addDocument(doc);
    body.setStringValue("Test a one sentence document.");
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    DisjunctionMaxQuery query = new DisjunctionMaxQuery(Collections.singleton(new WildcardQuery(new Term("body", "te*"))), 0);
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("This is a <b>test</b>.", snippets[0]);
    assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) WildcardQuery(org.apache.lucene.search.WildcardQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) IndexReader(org.apache.lucene.index.IndexReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 45 with DisjunctionMaxQuery

use of org.apache.lucene.search.DisjunctionMaxQuery in project lucene-solr by apache.

the class MaxScoreQParser method parse.

/**
   * Parses the query exactly like the Lucene parser does, but
   * delegates all SHOULD clauses to DisjunctionMaxQuery with
   * meaning only the clause with the max score will contribute
   * to the overall score, unless the tie parameter is specified.
   * <br>
   * The max() is only calculated from the SHOULD clauses.
   * Any MUST clauses will be passed through as separate
   * BooleanClauses and thus always contribute to the score.
   * @return the resulting Query
   * @throws org.apache.solr.search.SyntaxError if parsing fails
   */
@Override
public Query parse() throws SyntaxError {
    Query q = super.parse();
    float boost = 1f;
    if (q instanceof BoostQuery) {
        BoostQuery bq = (BoostQuery) q;
        boost = bq.getBoost();
        q = bq.getQuery();
    }
    if (q instanceof BooleanQuery == false) {
        if (boost != 1f) {
            q = new BoostQuery(q, boost);
        }
        return q;
    }
    BooleanQuery obq = (BooleanQuery) q;
    Collection<Query> should = new ArrayList<>();
    Collection<BooleanClause> prohibOrReq = new ArrayList<>();
    BooleanQuery.Builder newqb = new BooleanQuery.Builder();
    for (BooleanClause clause : obq) {
        if (clause.isProhibited() || clause.isRequired()) {
            prohibOrReq.add(clause);
        } else {
            BooleanQuery.Builder bq = new BooleanQuery.Builder();
            bq.add(clause);
            should.add(bq.build());
        }
    }
    if (should.size() > 0) {
        DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(should, tie);
        newqb.add(dmq, BooleanClause.Occur.SHOULD);
    }
    for (BooleanClause c : prohibOrReq) {
        newqb.add(c);
    }
    Query newq = newqb.build();
    if (boost != 1f) {
        newq = new BoostQuery(newq, boost);
    }
    return newq;
}
Also used : BooleanClause(org.apache.lucene.search.BooleanClause) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) ArrayList(java.util.ArrayList) BoostQuery(org.apache.lucene.search.BoostQuery)

Aggregations

DisjunctionMaxQuery (org.apache.lucene.search.DisjunctionMaxQuery)49 Query (org.apache.lucene.search.Query)38 BooleanQuery (org.apache.lucene.search.BooleanQuery)34 BoostQuery (org.apache.lucene.search.BoostQuery)30 TermQuery (org.apache.lucene.search.TermQuery)26 Term (org.apache.lucene.index.Term)25 ArrayList (java.util.ArrayList)22 PhraseQuery (org.apache.lucene.search.PhraseQuery)20 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)17 MatchNoDocsQuery (org.apache.lucene.search.MatchNoDocsQuery)16 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)15 SynonymQuery (org.apache.lucene.search.SynonymQuery)15 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)15 BooleanClause (org.apache.lucene.search.BooleanClause)13 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)13 SpanQuery (org.apache.lucene.search.spans.SpanQuery)12 MultiPhraseQuery (org.apache.lucene.search.MultiPhraseQuery)10 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)9 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)9 TopDocs (org.apache.lucene.search.TopDocs)9