Search in sources :

Example 46 with BooleanQuery

use of org.apache.lucene.search.BooleanQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterMTQ method testWhichMTQMatched.

/**
   * Runs a query with two MTQs and confirms the formatter
   * can tell which query matched which hit.
   */
public void testWhichMTQMatched() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "", fieldType);
    Document doc = new Document();
    doc.add(body);
    body.setStringValue("Test a one sentence document.");
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    // use a variety of common MTQ types
    BooleanQuery query = new BooleanQuery.Builder().add(new PrefixQuery(new Term("body", "te")), BooleanClause.Occur.SHOULD).add(new WildcardQuery(new Term("body", "*one*")), BooleanClause.Occur.SHOULD).add(new FuzzyQuery(new Term("body", "zentence~")), BooleanClause.Occur.SHOULD).build();
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(1, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(1, snippets.length);
    // Default formatter just bolds each hit:
    assertEquals("<b>Test</b> a <b>one</b> <b>sentence</b> document.", snippets[0]);
    // Now use our own formatter, that also stuffs the
    // matching term's text into the result:
    highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {

        @Override
        protected PassageFormatter getFormatter(String field) {
            return new PassageFormatter() {

                @Override
                public Object format(Passage[] passages, String content) {
                    // Copied from DefaultPassageFormatter, but
                    // tweaked to include the matched term:
                    StringBuilder sb = new StringBuilder();
                    int pos = 0;
                    for (Passage passage : passages) {
                        // don't add ellipsis if its the first one, or if its connected.
                        if (passage.getStartOffset() > pos && pos > 0) {
                            sb.append("... ");
                        }
                        pos = passage.getStartOffset();
                        for (int i = 0; i < passage.getNumMatches(); i++) {
                            int start = passage.getMatchStarts()[i];
                            int end = passage.getMatchEnds()[i];
                            // its possible to have overlapping terms
                            if (start > pos) {
                                sb.append(content, pos, start);
                            }
                            if (end > pos) {
                                sb.append("<b>");
                                sb.append(content, Math.max(pos, start), end);
                                sb.append('(');
                                sb.append(passage.getMatchTerms()[i].utf8ToString());
                                sb.append(')');
                                sb.append("</b>");
                                pos = end;
                            }
                        }
                        // its possible a "term" from the analyzer could span a sentence boundary.
                        sb.append(content, pos, Math.max(pos, passage.getEndOffset()));
                        pos = passage.getEndOffset();
                    }
                    return sb.toString();
                }
            };
        }
    };
    assertEquals(1, topDocs.totalHits);
    snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(1, snippets.length);
    assertEquals("<b>Test(body:te*)</b> a <b>one(body:*one*)</b> <b>sentence(body:zentence~~2)</b> document.", snippets[0]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 47 with BooleanQuery

use of org.apache.lucene.search.BooleanQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterMTQ method testPositionSensitiveWithWildcardDoesNotHighlight.

public void testPositionSensitiveWithWildcardDoesNotHighlight() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Document doc = new Document();
    doc.add(new Field("body", "iterate insect ipswitch illinois indirect", fieldType));
    doc.add(newTextField("id", "id", Field.Store.YES));
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
    PhraseQuery pq = new PhraseQuery.Builder().add(new Term("body", "consent")).add(new Term("body", "order")).build();
    BooleanQuery query = new BooleanQuery.Builder().add(new WildcardQuery(new Term("body", "enforc*")), BooleanClause.Occur.MUST).add(pq, BooleanClause.Occur.MUST).build();
    int[] docIds = new int[] { docID };
    String[] snippets = highlighter.highlightFields(new String[] { "body" }, query, docIds, new int[] { 2 }).get("body");
    assertEquals(1, snippets.length);
    assertEquals("iterate insect ipswitch illinois indirect", snippets[0]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) Field(org.apache.lucene.document.Field) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 48 with BooleanQuery

use of org.apache.lucene.search.BooleanQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterMTQ method testRanges.

public void testRanges() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "", fieldType);
    Document doc = new Document();
    doc.add(body);
    body.setStringValue("This is a test.");
    iw.addDocument(doc);
    body.setStringValue("Test a one sentence document.");
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    Query query = TermRangeQuery.newStringRange("body", "ta", "tf", true, true);
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("This is a <b>test</b>.", snippets[0]);
    assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
    // null start
    query = TermRangeQuery.newStringRange("body", null, "tf", true, true);
    topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("This <b>is</b> <b>a</b> <b>test</b>.", snippets[0]);
    assertEquals("<b>Test</b> <b>a</b> <b>one</b> <b>sentence</b> <b>document</b>.", snippets[1]);
    // null end
    query = TermRangeQuery.newStringRange("body", "ta", null, true, true);
    topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("<b>This</b> is a <b>test</b>.", snippets[0]);
    assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
    // exact start inclusive
    query = TermRangeQuery.newStringRange("body", "test", "tf", true, true);
    topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("This is a <b>test</b>.", snippets[0]);
    assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
    // exact end inclusive
    query = TermRangeQuery.newStringRange("body", "ta", "test", true, true);
    topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("This is a <b>test</b>.", snippets[0]);
    assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
    // exact start exclusive
    BooleanQuery bq = new BooleanQuery.Builder().add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD).add(TermRangeQuery.newStringRange("body", "test", "tf", false, true), BooleanClause.Occur.SHOULD).build();
    topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    snippets = highlighter.highlight("body", bq, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("This is a test.", snippets[0]);
    assertEquals("Test a one sentence document.", snippets[1]);
    // exact end exclusive
    bq = new BooleanQuery.Builder().add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD).add(TermRangeQuery.newStringRange("body", "ta", "test", true, false), BooleanClause.Occur.SHOULD).build();
    topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    snippets = highlighter.highlight("body", bq, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("This is a test.", snippets[0]);
    assertEquals("Test a one sentence document.", snippets[1]);
    // wrong field
    bq = new BooleanQuery.Builder().add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD).add(TermRangeQuery.newStringRange("bogus", "ta", "tf", true, true), BooleanClause.Occur.SHOULD).build();
    topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    snippets = highlighter.highlight("body", bq, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("This is a test.", snippets[0]);
    assertEquals("Test a one sentence document.", snippets[1]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) SpanFirstQuery(org.apache.lucene.search.spans.SpanFirstQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 49 with BooleanQuery

use of org.apache.lucene.search.BooleanQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterMTQ method testOneRegexp.

public void testOneRegexp() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "", fieldType);
    Document doc = new Document();
    doc.add(body);
    body.setStringValue("This is a test.");
    iw.addDocument(doc);
    body.setStringValue("Test a one sentence document.");
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    Query query = new RegexpQuery(new Term("body", "te.*"));
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("This is a <b>test</b>.", snippets[0]);
    assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
    // wrong field
    BooleanQuery bq = new BooleanQuery.Builder().add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD).add(new RegexpQuery(new Term("bogus", "te.*")), BooleanClause.Occur.SHOULD).build();
    topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    snippets = highlighter.highlight("body", bq, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("This is a test.", snippets[0]);
    assertEquals("Test a one sentence document.", snippets[1]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) SpanFirstQuery(org.apache.lucene.search.spans.SpanFirstQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 50 with BooleanQuery

use of org.apache.lucene.search.BooleanQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterRanking method checkQuery.

private void checkQuery(IndexSearcher is, Query query, int doc, int maxTopN) throws IOException {
    for (int n = 1; n < maxTopN; n++) {
        final FakePassageFormatter f1 = new FakePassageFormatter();
        UnifiedHighlighter p1 = new UnifiedHighlighter(is, indexAnalyzer) {

            @Override
            protected PassageFormatter getFormatter(String field) {
                assertEquals("body", field);
                return f1;
            }
        };
        p1.setMaxLength(Integer.MAX_VALUE - 1);
        final FakePassageFormatter f2 = new FakePassageFormatter();
        UnifiedHighlighter p2 = new UnifiedHighlighter(is, indexAnalyzer) {

            @Override
            protected PassageFormatter getFormatter(String field) {
                assertEquals("body", field);
                return f2;
            }
        };
        p2.setMaxLength(Integer.MAX_VALUE - 1);
        BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
        queryBuilder.add(query, BooleanClause.Occur.MUST);
        queryBuilder.add(new TermQuery(new Term("id", Integer.toString(doc))), BooleanClause.Occur.MUST);
        BooleanQuery bq = queryBuilder.build();
        TopDocs td = is.search(bq, 1);
        p1.highlight("body", bq, td, n);
        p2.highlight("body", bq, td, n + 1);
        assertTrue(f2.seen.containsAll(f1.seen));
    }
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term)

Aggregations

BooleanQuery (org.apache.lucene.search.BooleanQuery)297 TermQuery (org.apache.lucene.search.TermQuery)176 Term (org.apache.lucene.index.Term)144 Query (org.apache.lucene.search.Query)129 BooleanClause (org.apache.lucene.search.BooleanClause)89 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)70 BoostQuery (org.apache.lucene.search.BoostQuery)58 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)52 PhraseQuery (org.apache.lucene.search.PhraseQuery)50 ArrayList (java.util.ArrayList)47 TopDocs (org.apache.lucene.search.TopDocs)47 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)43 WildcardQuery (org.apache.lucene.search.WildcardQuery)42 IndexSearcher (org.apache.lucene.search.IndexSearcher)40 IndexReader (org.apache.lucene.index.IndexReader)39 PrefixQuery (org.apache.lucene.search.PrefixQuery)39 Test (org.junit.Test)39 TermRangeQuery (org.apache.lucene.search.TermRangeQuery)38 Document (org.apache.lucene.document.Document)36 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)33