Search in sources :

Example 11 with PrefixQuery

use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.

the class SimpleQueryParser method newPrefixQuery.

/**
   * Factory method to generate a prefix query.
   */
protected Query newPrefixQuery(String text) {
    BooleanQuery.Builder bq = new BooleanQuery.Builder();
    for (Map.Entry<String, Float> entry : weights.entrySet()) {
        final String fieldName = entry.getKey();
        final BytesRef term = getAnalyzer().normalize(fieldName, text);
        Query q = new PrefixQuery(new Term(fieldName, term));
        float boost = entry.getValue();
        if (boost != 1f) {
            q = new BoostQuery(q, boost);
        }
        bq.add(q, BooleanClause.Occur.SHOULD);
    }
    return simplify(bq.build());
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) QueryBuilder(org.apache.lucene.util.QueryBuilder) Term(org.apache.lucene.index.Term) BoostQuery(org.apache.lucene.search.BoostQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) Map(java.util.Map) BytesRef(org.apache.lucene.util.BytesRef)

Example 12 with PrefixQuery

use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.

the class PrefixWildcardQueryNodeBuilder method build.

@Override
public PrefixQuery build(QueryNode queryNode) throws QueryNodeException {
    PrefixWildcardQueryNode wildcardNode = (PrefixWildcardQueryNode) queryNode;
    String text = wildcardNode.getText().subSequence(0, wildcardNode.getText().length() - 1).toString();
    PrefixQuery q = new PrefixQuery(new Term(wildcardNode.getFieldAsString(), text));
    MultiTermQuery.RewriteMethod method = (MultiTermQuery.RewriteMethod) queryNode.getTag(MultiTermRewriteMethodProcessor.TAG_ID);
    if (method != null) {
        q.setRewriteMethod(method);
    }
    return q;
}
Also used : MultiTermQuery(org.apache.lucene.search.MultiTermQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) PrefixWildcardQueryNode(org.apache.lucene.queryparser.flexible.standard.nodes.PrefixWildcardQueryNode) Term(org.apache.lucene.index.Term)

Example 13 with PrefixQuery

use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterMTQ method testWhichMTQMatched.

/**
   * Runs a query with two MTQs and confirms the formatter
   * can tell which query matched which hit.
   */
public void testWhichMTQMatched() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "", fieldType);
    Document doc = new Document();
    doc.add(body);
    body.setStringValue("Test a one sentence document.");
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    // use a variety of common MTQ types
    BooleanQuery query = new BooleanQuery.Builder().add(new PrefixQuery(new Term("body", "te")), BooleanClause.Occur.SHOULD).add(new WildcardQuery(new Term("body", "*one*")), BooleanClause.Occur.SHOULD).add(new FuzzyQuery(new Term("body", "zentence~")), BooleanClause.Occur.SHOULD).build();
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(1, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(1, snippets.length);
    // Default formatter just bolds each hit:
    assertEquals("<b>Test</b> a <b>one</b> <b>sentence</b> document.", snippets[0]);
    // Now use our own formatter, that also stuffs the
    // matching term's text into the result:
    highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {

        @Override
        protected PassageFormatter getFormatter(String field) {
            return new PassageFormatter() {

                @Override
                public Object format(Passage[] passages, String content) {
                    // Copied from DefaultPassageFormatter, but
                    // tweaked to include the matched term:
                    StringBuilder sb = new StringBuilder();
                    int pos = 0;
                    for (Passage passage : passages) {
                        // don't add ellipsis if its the first one, or if its connected.
                        if (passage.getStartOffset() > pos && pos > 0) {
                            sb.append("... ");
                        }
                        pos = passage.getStartOffset();
                        for (int i = 0; i < passage.getNumMatches(); i++) {
                            int start = passage.getMatchStarts()[i];
                            int end = passage.getMatchEnds()[i];
                            // its possible to have overlapping terms
                            if (start > pos) {
                                sb.append(content, pos, start);
                            }
                            if (end > pos) {
                                sb.append("<b>");
                                sb.append(content, Math.max(pos, start), end);
                                sb.append('(');
                                sb.append(passage.getMatchTerms()[i].utf8ToString());
                                sb.append(')');
                                sb.append("</b>");
                                pos = end;
                            }
                        }
                        // its possible a "term" from the analyzer could span a sentence boundary.
                        sb.append(content, pos, Math.max(pos, passage.getEndOffset()));
                        pos = passage.getEndOffset();
                    }
                    return sb.toString();
                }
            };
        }
    };
    assertEquals(1, topDocs.totalHits);
    snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(1, snippets.length);
    assertEquals("<b>Test(body:te*)</b> a <b>one(body:*one*)</b> <b>sentence(body:zentence~~2)</b> document.", snippets[0]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 14 with PrefixQuery

use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterMTQ method testMultiSegment.

public void testMultiSegment() throws Exception {
    // If we incorrectly got the term vector from mis-matched global/leaf doc ID, this test may fail
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Document doc = new Document();
    doc.add(new Field("body", "word aberration", fieldType));
    iw.addDocument(doc);
    // make segment
    iw.commit();
    doc = new Document();
    doc.add(new Field("body", "word absolve", fieldType));
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    Query query = new PrefixQuery(new Term("body", "ab"));
    TopDocs topDocs = searcher.search(query, 10);
    String[] snippets = highlighter.highlightFields(new String[] { "body" }, query, topDocs).get("body");
    Arrays.sort(snippets);
    assertEquals("[word <b>aberration</b>, word <b>absolve</b>]", Arrays.toString(snippets));
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) SpanFirstQuery(org.apache.lucene.search.spans.SpanFirstQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) IndexReader(org.apache.lucene.index.IndexReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 15 with PrefixQuery

use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterStrictPhrases method testMtq.

/**
   * Like {@link #testRewriteAndMtq} but no rewrite.
   */
public void testMtq() throws IOException {
    indexWriter.addDocument(newDoc("alpha bravo charlie - charlie bravo alpha"));
    initReaderSearcherHighlighter();
    SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("body", "bravo")), // does NOT rewrite
    new SpanTermQuery(new Term("body", "charlie")) }, 0, true);
    BooleanQuery query = new BooleanQuery.Builder().add(snq, BooleanClause.Occur.MUST).add(new PrefixQuery(new Term("body", "al")), // MTQ
    BooleanClause.Occur.MUST).add(newPhraseQuery("body", "alpha bravo"), BooleanClause.Occur.MUST).add(newPhraseQuery("title", "bravo alpha"), BooleanClause.Occur.SHOULD).build();
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    String[] snippets = highlighter.highlight("body", query, topDocs);
    assertArrayEquals(new String[] { "<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>" }, snippets);
    // do again, this time with MTQ disabled.
    //disable but leave phrase processing enabled
    highlighter.setHandleMultiTermQuery(false);
    topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    snippets = highlighter.highlight("body", query, topDocs);
    assertArrayEquals(new String[] { "<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo alpha" }, snippets);
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) BooleanQuery(org.apache.lucene.search.BooleanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) Term(org.apache.lucene.index.Term) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Aggregations

PrefixQuery (org.apache.lucene.search.PrefixQuery)68 Term (org.apache.lucene.index.Term)62 BooleanQuery (org.apache.lucene.search.BooleanQuery)34 Query (org.apache.lucene.search.Query)30 TermQuery (org.apache.lucene.search.TermQuery)29 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)27 WildcardQuery (org.apache.lucene.search.WildcardQuery)23 BoostQuery (org.apache.lucene.search.BoostQuery)20 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)19 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)15 Document (org.apache.lucene.document.Document)14 IndexSearcher (org.apache.lucene.search.IndexSearcher)14 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)14 PhraseQuery (org.apache.lucene.search.PhraseQuery)14 RegexpQuery (org.apache.lucene.search.RegexpQuery)13 TopDocs (org.apache.lucene.search.TopDocs)13 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)12 IndexReader (org.apache.lucene.index.IndexReader)11 TermRangeQuery (org.apache.lucene.search.TermRangeQuery)11 Field (org.apache.lucene.document.Field)10