Search in sources :

Example 16 with PrefixQuery

use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterMTQ method testWithMaxLenAndMultipleWildcardMatches.

public void testWithMaxLenAndMultipleWildcardMatches() throws IOException {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "", fieldType);
    Document doc = new Document();
    doc.add(body);
    //tests interleaving of multiple wildcard matches with the CompositePostingsEnum
    //In this case the CompositePostingsEnum will have an underlying PostingsEnum that jumps form pos 1 to 9 for bravo
    //and a second with position 2 for Bravado
    body.setStringValue("Alpha Bravo Bravado foo foo foo. Foo foo Alpha Bravo");
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    //a little past first sentence
    highlighter.setMaxLength(32);
    BooleanQuery query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "alpha")), BooleanClause.Occur.MUST).add(new PrefixQuery(new Term("body", "bra")), BooleanClause.Occur.MUST).build();
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    //ask for 2 but we'll only get 1
    String[] snippets = highlighter.highlight("body", query, topDocs, 2);
    assertArrayEquals(new String[] { "<b>Alpha</b> <b>Bravo</b> <b>Bravado</b> foo foo foo." }, snippets);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) PrefixQuery(org.apache.lucene.search.PrefixQuery) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 17 with PrefixQuery

use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.

the class AnalyzingInfixSuggester method lookup.

/**
   * This is an advanced method providing the capability to send down to the suggester any 
   * arbitrary lucene query to be used to filter the result of the suggester
   * 
   * @param key the keyword being looked for
   * @param contextQuery an arbitrary Lucene query to be used to filter the result of the suggester. {@link #addContextToQuery} could be used to build this contextQuery.
   * @param num number of items to return
   * @param allTermsRequired all searched terms must match or not
   * @param doHighlight if true, the matching term will be highlighted in the search result
   * @return the result of the suggester
   * @throws IOException f the is IO exception while reading data from the index
   */
public List<LookupResult> lookup(CharSequence key, BooleanQuery contextQuery, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
    if (searcherMgr == null) {
        throw new IllegalStateException("suggester was not built");
    }
    final BooleanClause.Occur occur;
    if (allTermsRequired) {
        occur = BooleanClause.Occur.MUST;
    } else {
        occur = BooleanClause.Occur.SHOULD;
    }
    BooleanQuery.Builder query;
    Set<String> matchedTokens;
    String prefixToken = null;
    try (TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()))) {
        //long t0 = System.currentTimeMillis();
        ts.reset();
        final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        String lastToken = null;
        query = new BooleanQuery.Builder();
        int maxEndOffset = -1;
        matchedTokens = new HashSet<>();
        while (ts.incrementToken()) {
            if (lastToken != null) {
                matchedTokens.add(lastToken);
                query.add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur);
            }
            lastToken = termAtt.toString();
            if (lastToken != null) {
                maxEndOffset = Math.max(maxEndOffset, offsetAtt.endOffset());
            }
        }
        ts.end();
        if (lastToken != null) {
            Query lastQuery;
            if (maxEndOffset == offsetAtt.endOffset()) {
                // Use PrefixQuery (or the ngram equivalent) when
                // there was no trailing discarded chars in the
                // string (e.g. whitespace), so that if query does
                // not end with a space we show prefix matches for
                // that token:
                lastQuery = getLastTokenQuery(lastToken);
                prefixToken = lastToken;
            } else {
                // Use TermQuery for an exact match if there were
                // trailing discarded chars (e.g. whitespace), so
                // that if query ends with a space we only show
                // exact matches for that term:
                matchedTokens.add(lastToken);
                lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken));
            }
            if (lastQuery != null) {
                query.add(lastQuery, occur);
            }
        }
        if (contextQuery != null) {
            boolean allMustNot = true;
            for (BooleanClause clause : contextQuery.clauses()) {
                if (clause.getOccur() != BooleanClause.Occur.MUST_NOT) {
                    allMustNot = false;
                    break;
                }
            }
            if (allMustNot) {
                // All are MUST_NOT: add the contextQuery to the main query instead (not as sub-query)
                for (BooleanClause clause : contextQuery.clauses()) {
                    query.add(clause);
                }
            } else if (allTermsRequired == false) {
                // We must carefully upgrade the query clauses to MUST:
                BooleanQuery.Builder newQuery = new BooleanQuery.Builder();
                newQuery.add(query.build(), BooleanClause.Occur.MUST);
                newQuery.add(contextQuery, BooleanClause.Occur.MUST);
                query = newQuery;
            } else {
                // Add contextQuery as sub-query
                query.add(contextQuery, BooleanClause.Occur.MUST);
            }
        }
    }
    // TODO: we could allow blended sort here, combining
    // weight w/ score.  Now we ignore score and sort only
    // by weight:
    Query finalQuery = finishQuery(query, allTermsRequired);
    //System.out.println("finalQuery=" + finalQuery);
    // Sort by weight, descending:
    TopFieldCollector c = TopFieldCollector.create(SORT, num, true, false, false);
    // We sorted postings by weight during indexing, so we
    // only retrieve the first num hits now:
    Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
    List<LookupResult> results = null;
    SearcherManager mgr;
    IndexSearcher searcher;
    synchronized (searcherMgrLock) {
        // acquire & release on same SearcherManager, via local reference
        mgr = searcherMgr;
        searcher = mgr.acquire();
    }
    try {
        //System.out.println("got searcher=" + searcher);
        searcher.search(finalQuery, c2);
        TopFieldDocs hits = c.topDocs();
        // Slower way if postings are not pre-sorted by weight:
        // hits = searcher.search(query, null, num, SORT);
        results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken);
    } finally {
        mgr.release(searcher);
    }
    return results;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) PrefixQuery(org.apache.lucene.search.PrefixQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) SearcherManager(org.apache.lucene.search.SearcherManager) StringReader(java.io.StringReader) EarlyTerminatingSortingCollector(org.apache.lucene.search.EarlyTerminatingSortingCollector) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) Collector(org.apache.lucene.search.Collector) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) TermQuery(org.apache.lucene.search.TermQuery) Occur(org.apache.lucene.search.BooleanClause.Occur) Term(org.apache.lucene.index.Term) BooleanClause(org.apache.lucene.search.BooleanClause) EarlyTerminatingSortingCollector(org.apache.lucene.search.EarlyTerminatingSortingCollector) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute)

Example 18 with PrefixQuery

use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.

the class TestQPHelper method testWildcard.

public void testWildcard() throws Exception {
    assertQueryEquals("term*", null, "term*");
    assertQueryEquals("term*^2", null, "(term*)^2.0");
    assertQueryEquals("term~", null, "term~2");
    assertQueryEquals("term~0.7", null, "term~1");
    assertQueryEquals("term~^3", null, "(term~2)^3.0");
    assertQueryEquals("term^3~", null, "(term~2)^3.0");
    assertQueryEquals("term*germ", null, "term*germ");
    assertQueryEquals("term*germ^3", null, "(term*germ)^3.0");
    assertTrue(getQuery("term*", null) instanceof PrefixQuery);
    assertTrue(getQuery("term*^2", null) instanceof BoostQuery);
    assertTrue(((BoostQuery) getQuery("term*^2", null)).getQuery() instanceof PrefixQuery);
    assertTrue(getQuery("term~", null) instanceof FuzzyQuery);
    assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery);
    FuzzyQuery fq = (FuzzyQuery) getQuery("term~0.7", null);
    assertEquals(1, fq.getMaxEdits());
    assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
    fq = (FuzzyQuery) getQuery("term~", null);
    assertEquals(2, fq.getMaxEdits());
    assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
    // value > 1, throws exception
    assertQueryNodeException("term~1.1");
    assertTrue(getQuery("term*germ", null) instanceof WildcardQuery);
    /*
     * Tests to see that wild card terms are (or are not) properly lower-cased
     * with propery parser configuration
     */
    // First prefix queries:
    // by default, convert to lowercase:
    assertWildcardQueryEquals("Term*", "term*");
    // explicitly set lowercase:
    assertWildcardQueryEquals("term*", "term*");
    assertWildcardQueryEquals("Term*", "term*");
    assertWildcardQueryEquals("TERM*", "term*");
    // Then 'full' wildcard queries:
    // by default, convert to lowercase:
    assertWildcardQueryEquals("Te?m", "te?m");
    // explicitly set lowercase:
    assertWildcardQueryEquals("te?m", "te?m");
    assertWildcardQueryEquals("Te?m", "te?m");
    assertWildcardQueryEquals("TE?M", "te?m");
    assertWildcardQueryEquals("Te?m*gerM", "te?m*germ");
    // Fuzzy queries:
    assertWildcardQueryEquals("Term~", "term~2");
    // Range queries:
    // TODO: implement this on QueryParser
    // Q0002E_INVALID_SYNTAX_CANNOT_PARSE: Syntax Error, cannot parse '[A TO
    // C]': Lexical error at line 1, column 1. Encountered: "[" (91), after
    // : ""
    assertWildcardQueryEquals("[A TO C]", "[a TO c]");
    // Test suffix queries: first disallow
    expectThrows(QueryNodeException.class, () -> {
        assertWildcardQueryEquals("*Term", "*term");
    });
    expectThrows(QueryNodeException.class, () -> {
        assertWildcardQueryEquals("?Term", "?term");
    });
    // Test suffix queries: then allow
    assertWildcardQueryEquals("*Term", "*term", true);
    assertWildcardQueryEquals("?Term", "?term", true);
}
Also used : WildcardQuery(org.apache.lucene.search.WildcardQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) BoostQuery(org.apache.lucene.search.BoostQuery)

Example 19 with PrefixQuery

use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.

the class TestQPHelper method testConstantScoreAutoRewrite.

public void testConstantScoreAutoRewrite() throws Exception {
    StandardQueryParser qp = new StandardQueryParser(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
    Query q = qp.parse("foo*bar", "field");
    assertTrue(q instanceof WildcardQuery);
    assertEquals(MultiTermQuery.CONSTANT_SCORE_REWRITE, ((MultiTermQuery) q).getRewriteMethod());
    q = qp.parse("foo*", "field");
    assertTrue(q instanceof PrefixQuery);
    assertEquals(MultiTermQuery.CONSTANT_SCORE_REWRITE, ((MultiTermQuery) q).getRewriteMethod());
    q = qp.parse("[a TO z]", "field");
    assertTrue(q instanceof TermRangeQuery);
    assertEquals(MultiTermQuery.CONSTANT_SCORE_REWRITE, ((MultiTermQuery) q).getRewriteMethod());
}
Also used : WildcardQuery(org.apache.lucene.search.WildcardQuery) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery)

Example 20 with PrefixQuery

use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.

the class TestSimpleQueryParser method testPrefix.

/** test a simple prefix */
public void testPrefix() throws Exception {
    PrefixQuery expected = new PrefixQuery(new Term("field", "foobar"));
    assertEquals(expected, parse("foobar*"));
}
Also used : PrefixQuery(org.apache.lucene.search.PrefixQuery) Term(org.apache.lucene.index.Term)

Aggregations

PrefixQuery (org.apache.lucene.search.PrefixQuery)68 Term (org.apache.lucene.index.Term)62 BooleanQuery (org.apache.lucene.search.BooleanQuery)34 Query (org.apache.lucene.search.Query)30 TermQuery (org.apache.lucene.search.TermQuery)29 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)27 WildcardQuery (org.apache.lucene.search.WildcardQuery)23 BoostQuery (org.apache.lucene.search.BoostQuery)20 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)19 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)15 Document (org.apache.lucene.document.Document)14 IndexSearcher (org.apache.lucene.search.IndexSearcher)14 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)14 PhraseQuery (org.apache.lucene.search.PhraseQuery)14 RegexpQuery (org.apache.lucene.search.RegexpQuery)13 TopDocs (org.apache.lucene.search.TopDocs)13 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)12 IndexReader (org.apache.lucene.index.IndexReader)11 TermRangeQuery (org.apache.lucene.search.TermRangeQuery)11 Field (org.apache.lucene.document.Field)10