Search in sources :

Example 41 with BooleanClause

use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.

the class AnalyzingInfixSuggester method lookup.

/**
   * This is an advanced method providing the capability to send down to the suggester any 
   * arbitrary lucene query to be used to filter the result of the suggester
   * 
   * @param key the keyword being looked for
   * @param contextQuery an arbitrary Lucene query to be used to filter the result of the suggester. {@link #addContextToQuery} could be used to build this contextQuery.
   * @param num number of items to return
   * @param allTermsRequired all searched terms must match or not
   * @param doHighlight if true, the matching term will be highlighted in the search result
   * @return the result of the suggester
   * @throws IOException f the is IO exception while reading data from the index
   */
public List<LookupResult> lookup(CharSequence key, BooleanQuery contextQuery, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
    if (searcherMgr == null) {
        throw new IllegalStateException("suggester was not built");
    }
    final BooleanClause.Occur occur;
    if (allTermsRequired) {
        occur = BooleanClause.Occur.MUST;
    } else {
        occur = BooleanClause.Occur.SHOULD;
    }
    BooleanQuery.Builder query;
    Set<String> matchedTokens;
    String prefixToken = null;
    try (TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()))) {
        //long t0 = System.currentTimeMillis();
        ts.reset();
        final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        String lastToken = null;
        query = new BooleanQuery.Builder();
        int maxEndOffset = -1;
        matchedTokens = new HashSet<>();
        while (ts.incrementToken()) {
            if (lastToken != null) {
                matchedTokens.add(lastToken);
                query.add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur);
            }
            lastToken = termAtt.toString();
            if (lastToken != null) {
                maxEndOffset = Math.max(maxEndOffset, offsetAtt.endOffset());
            }
        }
        ts.end();
        if (lastToken != null) {
            Query lastQuery;
            if (maxEndOffset == offsetAtt.endOffset()) {
                // Use PrefixQuery (or the ngram equivalent) when
                // there was no trailing discarded chars in the
                // string (e.g. whitespace), so that if query does
                // not end with a space we show prefix matches for
                // that token:
                lastQuery = getLastTokenQuery(lastToken);
                prefixToken = lastToken;
            } else {
                // Use TermQuery for an exact match if there were
                // trailing discarded chars (e.g. whitespace), so
                // that if query ends with a space we only show
                // exact matches for that term:
                matchedTokens.add(lastToken);
                lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken));
            }
            if (lastQuery != null) {
                query.add(lastQuery, occur);
            }
        }
        if (contextQuery != null) {
            boolean allMustNot = true;
            for (BooleanClause clause : contextQuery.clauses()) {
                if (clause.getOccur() != BooleanClause.Occur.MUST_NOT) {
                    allMustNot = false;
                    break;
                }
            }
            if (allMustNot) {
                // All are MUST_NOT: add the contextQuery to the main query instead (not as sub-query)
                for (BooleanClause clause : contextQuery.clauses()) {
                    query.add(clause);
                }
            } else if (allTermsRequired == false) {
                // We must carefully upgrade the query clauses to MUST:
                BooleanQuery.Builder newQuery = new BooleanQuery.Builder();
                newQuery.add(query.build(), BooleanClause.Occur.MUST);
                newQuery.add(contextQuery, BooleanClause.Occur.MUST);
                query = newQuery;
            } else {
                // Add contextQuery as sub-query
                query.add(contextQuery, BooleanClause.Occur.MUST);
            }
        }
    }
    // TODO: we could allow blended sort here, combining
    // weight w/ score.  Now we ignore score and sort only
    // by weight:
    Query finalQuery = finishQuery(query, allTermsRequired);
    //System.out.println("finalQuery=" + finalQuery);
    // Sort by weight, descending:
    TopFieldCollector c = TopFieldCollector.create(SORT, num, true, false, false);
    // We sorted postings by weight during indexing, so we
    // only retrieve the first num hits now:
    Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
    List<LookupResult> results = null;
    SearcherManager mgr;
    IndexSearcher searcher;
    synchronized (searcherMgrLock) {
        // acquire & release on same SearcherManager, via local reference
        mgr = searcherMgr;
        searcher = mgr.acquire();
    }
    try {
        //System.out.println("got searcher=" + searcher);
        searcher.search(finalQuery, c2);
        TopFieldDocs hits = c.topDocs();
        // Slower way if postings are not pre-sorted by weight:
        // hits = searcher.search(query, null, num, SORT);
        results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken);
    } finally {
        mgr.release(searcher);
    }
    return results;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) PrefixQuery(org.apache.lucene.search.PrefixQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) SearcherManager(org.apache.lucene.search.SearcherManager) StringReader(java.io.StringReader) EarlyTerminatingSortingCollector(org.apache.lucene.search.EarlyTerminatingSortingCollector) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) Collector(org.apache.lucene.search.Collector) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) TermQuery(org.apache.lucene.search.TermQuery) Occur(org.apache.lucene.search.BooleanClause.Occur) Term(org.apache.lucene.index.Term) BooleanClause(org.apache.lucene.search.BooleanClause) EarlyTerminatingSortingCollector(org.apache.lucene.search.EarlyTerminatingSortingCollector) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute)

Example 42 with BooleanClause

use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.

the class SolrPluginUtils method setMinShouldMatch.

public static BooleanQuery setMinShouldMatch(BooleanQuery q, String spec, boolean mmAutoRelax) {
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    for (BooleanClause clause : q) {
        builder.add(clause);
    }
    setMinShouldMatch(builder, spec, mmAutoRelax);
    return builder.build();
}
Also used : BooleanClause(org.apache.lucene.search.BooleanClause) BooleanQuery(org.apache.lucene.search.BooleanQuery) ResponseBuilder(org.apache.solr.handler.component.ResponseBuilder)

Example 43 with BooleanClause

use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.

the class WeightedSpanTermExtractor method extract.

/**
   * Fills a <code>Map</code> with {@link WeightedSpanTerm}s using the terms from the supplied <code>Query</code>.
   * 
   * @param query
   *          Query to extract Terms from
   * @param terms
   *          Map to place created WeightedSpanTerms in
   * @throws IOException If there is a low-level I/O error
   */
protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
    if (query instanceof BoostQuery) {
        BoostQuery boostQuery = (BoostQuery) query;
        extract(boostQuery.getQuery(), boost * boostQuery.getBoost(), terms);
    } else if (query instanceof BooleanQuery) {
        for (BooleanClause clause : (BooleanQuery) query) {
            if (!clause.isProhibited()) {
                extract(clause.getQuery(), boost, terms);
            }
        }
    } else if (query instanceof PhraseQuery) {
        PhraseQuery phraseQuery = ((PhraseQuery) query);
        Term[] phraseQueryTerms = phraseQuery.getTerms();
        if (phraseQueryTerms.length == 1) {
            extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
        } else {
            SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
            for (int i = 0; i < phraseQueryTerms.length; i++) {
                clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
            }
            // sum position increments beyond 1
            int positionGaps = 0;
            int[] positions = phraseQuery.getPositions();
            if (positions.length >= 2) {
                // positions are in increasing order.   max(0,...) is just a safeguard.
                positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length + 1);
            }
            //if original slop is 0 then require inOrder
            boolean inorder = (phraseQuery.getSlop() == 0);
            SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder);
            extractWeightedSpanTerms(terms, sp, boost);
        }
    } else if (query instanceof TermQuery || query instanceof SynonymQuery) {
        extractWeightedTerms(terms, query, boost);
    } else if (query instanceof SpanQuery) {
        extractWeightedSpanTerms(terms, (SpanQuery) query, boost);
    } else if (query instanceof ConstantScoreQuery) {
        final Query q = ((ConstantScoreQuery) query).getQuery();
        if (q != null) {
            extract(q, boost, terms);
        }
    } else if (query instanceof CommonTermsQuery) {
        // specialized since rewriting would change the result query 
        // this query is TermContext sensitive.
        extractWeightedTerms(terms, query, boost);
    } else if (query instanceof DisjunctionMaxQuery) {
        for (Query clause : ((DisjunctionMaxQuery) query)) {
            extract(clause, boost, terms);
        }
    } else if (query instanceof ToParentBlockJoinQuery) {
        extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
    } else if (query instanceof ToChildBlockJoinQuery) {
        extract(((ToChildBlockJoinQuery) query).getParentQuery(), boost, terms);
    } else if (query instanceof MultiPhraseQuery) {
        final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
        final Term[][] termArrays = mpq.getTermArrays();
        final int[] positions = mpq.getPositions();
        if (positions.length > 0) {
            int maxPosition = positions[positions.length - 1];
            for (int i = 0; i < positions.length - 1; ++i) {
                if (positions[i] > maxPosition) {
                    maxPosition = positions[i];
                }
            }
            @SuppressWarnings({ "unchecked", "rawtypes" }) final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1];
            int distinctPositions = 0;
            for (int i = 0; i < termArrays.length; ++i) {
                final Term[] termArray = termArrays[i];
                List<SpanQuery> disjuncts = disjunctLists[positions[i]];
                if (disjuncts == null) {
                    disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
                    ++distinctPositions;
                }
                for (Term aTermArray : termArray) {
                    disjuncts.add(new SpanTermQuery(aTermArray));
                }
            }
            int positionGaps = 0;
            int position = 0;
            final SpanQuery[] clauses = new SpanQuery[distinctPositions];
            for (List<SpanQuery> disjuncts : disjunctLists) {
                if (disjuncts != null) {
                    clauses[position++] = new SpanOrQuery(disjuncts.toArray(new SpanQuery[disjuncts.size()]));
                } else {
                    ++positionGaps;
                }
            }
            if (clauses.length == 1) {
                extractWeightedSpanTerms(terms, clauses[0], boost);
            } else {
                final int slop = mpq.getSlop();
                final boolean inorder = (slop == 0);
                SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                extractWeightedSpanTerms(terms, sp, boost);
            }
        }
    } else if (query instanceof MatchAllDocsQuery) {
    //nothing
    } else if (query instanceof CustomScoreQuery) {
        extract(((CustomScoreQuery) query).getSubQuery(), boost, terms);
    } else if (isQueryUnsupported(query.getClass())) {
    // nothing
    } else {
        if (query instanceof MultiTermQuery && (!expandMultiTermQuery || !fieldNameComparator(((MultiTermQuery) query).getField()))) {
            return;
        }
        Query origQuery = query;
        final IndexReader reader = getLeafContext().reader();
        Query rewritten;
        if (query instanceof MultiTermQuery) {
            rewritten = MultiTermQuery.SCORING_BOOLEAN_REWRITE.rewrite(reader, (MultiTermQuery) query);
        } else {
            rewritten = origQuery.rewrite(reader);
        }
        if (rewritten != origQuery) {
            // only rewrite once and then flatten again - the rewritten query could have a special treatment
            // if this method is overwritten in a subclass or above in the next recursion
            extract(rewritten, boost, terms);
        } else {
            extractUnknownQuery(query, terms);
        }
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) SpanFirstQuery(org.apache.lucene.search.spans.SpanFirstQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) FieldMaskingSpanQuery(org.apache.lucene.search.spans.FieldMaskingSpanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TermQuery(org.apache.lucene.search.TermQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) BoostQuery(org.apache.lucene.search.BoostQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) BoostQuery(org.apache.lucene.search.BoostQuery) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) List(java.util.List) ArrayList(java.util.ArrayList) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) FieldMaskingSpanQuery(org.apache.lucene.search.spans.FieldMaskingSpanQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) BooleanClause(org.apache.lucene.search.BooleanClause) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) IndexReader(org.apache.lucene.index.IndexReader) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 44 with BooleanClause

use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.

the class FieldQuery method flatten.

void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries, float boost) throws IOException {
    while (sourceQuery instanceof BoostQuery) {
        BoostQuery bq = (BoostQuery) sourceQuery;
        sourceQuery = bq.getQuery();
        boost *= bq.getBoost();
    }
    if (sourceQuery instanceof BooleanQuery) {
        BooleanQuery bq = (BooleanQuery) sourceQuery;
        for (BooleanClause clause : bq) {
            if (!clause.isProhibited()) {
                flatten(clause.getQuery(), reader, flatQueries, boost);
            }
        }
    } else if (sourceQuery instanceof DisjunctionMaxQuery) {
        DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) sourceQuery;
        for (Query query : dmq) {
            flatten(query, reader, flatQueries, boost);
        }
    } else if (sourceQuery instanceof TermQuery) {
        if (boost != 1f) {
            sourceQuery = new BoostQuery(sourceQuery, boost);
        }
        if (!flatQueries.contains(sourceQuery))
            flatQueries.add(sourceQuery);
    } else if (sourceQuery instanceof SynonymQuery) {
        SynonymQuery synQuery = (SynonymQuery) sourceQuery;
        for (Term term : synQuery.getTerms()) {
            flatten(new TermQuery(term), reader, flatQueries, boost);
        }
    } else if (sourceQuery instanceof PhraseQuery) {
        PhraseQuery pq = (PhraseQuery) sourceQuery;
        if (pq.getTerms().length == 1)
            sourceQuery = new TermQuery(pq.getTerms()[0]);
        if (boost != 1f) {
            sourceQuery = new BoostQuery(sourceQuery, boost);
        }
        flatQueries.add(sourceQuery);
    } else if (sourceQuery instanceof ConstantScoreQuery) {
        final Query q = ((ConstantScoreQuery) sourceQuery).getQuery();
        if (q != null) {
            flatten(q, reader, flatQueries, boost);
        }
    } else if (sourceQuery instanceof CustomScoreQuery) {
        final Query q = ((CustomScoreQuery) sourceQuery).getSubQuery();
        if (q != null) {
            flatten(q, reader, flatQueries, boost);
        }
    } else if (sourceQuery instanceof ToParentBlockJoinQuery) {
        Query childQuery = ((ToParentBlockJoinQuery) sourceQuery).getChildQuery();
        if (childQuery != null) {
            flatten(childQuery, reader, flatQueries, boost);
        }
    } else if (reader != null) {
        Query query = sourceQuery;
        Query rewritten;
        if (sourceQuery instanceof MultiTermQuery) {
            rewritten = new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS).rewrite(reader, (MultiTermQuery) query);
        } else {
            rewritten = query.rewrite(reader);
        }
        if (rewritten != query) {
            // only rewrite once and then flatten again - the rewritten query could have a speacial treatment
            // if this method is overwritten in a subclass.
            flatten(rewritten, reader, flatQueries, boost);
        }
    // if the query is already rewritten we discard it
    }
// else discard queries
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) TermQuery(org.apache.lucene.search.TermQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) BoostQuery(org.apache.lucene.search.BoostQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) Term(org.apache.lucene.index.Term) BoostQuery(org.apache.lucene.search.BoostQuery) BooleanClause(org.apache.lucene.search.BooleanClause) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery)

Example 45 with BooleanClause

use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.

the class TestQPHelper method testRegexQueryParsing.

public void testRegexQueryParsing() throws Exception {
    final String[] fields = { "b", "t" };
    final StandardQueryParser parser = new StandardQueryParser();
    parser.setMultiFields(fields);
    parser.setDefaultOperator(StandardQueryConfigHandler.Operator.AND);
    parser.setAnalyzer(new MockAnalyzer(random()));
    BooleanQuery.Builder exp = new BooleanQuery.Builder();
    //TODO spezification? was "MUST"
    exp.add(new BooleanClause(new RegexpQuery(new Term("b", "ab.+")), BooleanClause.Occur.SHOULD));
    //TODO spezification? was "MUST"
    exp.add(new BooleanClause(new RegexpQuery(new Term("t", "ab.+")), BooleanClause.Occur.SHOULD));
    assertEquals(exp.build(), parser.parse("/ab.+/", null));
    RegexpQuery regexpQueryexp = new RegexpQuery(new Term("test", "[abc]?[0-9]"));
    assertEquals(regexpQueryexp, parser.parse("test:/[abc]?[0-9]/", null));
}
Also used : BooleanClause(org.apache.lucene.search.BooleanClause) BooleanQuery(org.apache.lucene.search.BooleanQuery) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Term(org.apache.lucene.index.Term) RegexpQuery(org.apache.lucene.search.RegexpQuery)

Aggregations

BooleanClause (org.apache.lucene.search.BooleanClause)102 BooleanQuery (org.apache.lucene.search.BooleanQuery)92 Query (org.apache.lucene.search.Query)56 TermQuery (org.apache.lucene.search.TermQuery)46 Term (org.apache.lucene.index.Term)44 BoostQuery (org.apache.lucene.search.BoostQuery)33 ArrayList (java.util.ArrayList)23 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)21 PhraseQuery (org.apache.lucene.search.PhraseQuery)20 DisjunctionMaxQuery (org.apache.lucene.search.DisjunctionMaxQuery)19 SynonymQuery (org.apache.lucene.search.SynonymQuery)18 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)18 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)17 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)16 SpanQuery (org.apache.lucene.search.spans.SpanQuery)15 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)14 MatchNoDocsQuery (org.apache.lucene.search.MatchNoDocsQuery)14 WildcardQuery (org.apache.lucene.search.WildcardQuery)14 MultiPhraseQuery (org.apache.lucene.search.MultiPhraseQuery)13 PrefixQuery (org.apache.lucene.search.PrefixQuery)13