Search in sources :

Example 76 with TopDocs

use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.

the class SynonymTokenizer method testUnRewrittenQuery.

public void testUnRewrittenQuery() throws Exception {
    final TestHighlightRunner helper = new TestHighlightRunner() {

        @Override
        public void run() throws Exception {
            numHighlights = 0;
            // test to show how rewritten query can still be used
            searcher = newSearcher(reader);
            BooleanQuery.Builder query = new BooleanQuery.Builder();
            query.add(new WildcardQuery(new Term(FIELD_NAME, "jf?")), Occur.SHOULD);
            query.add(new WildcardQuery(new Term(FIELD_NAME, "kenned*")), Occur.SHOULD);
            if (VERBOSE)
                System.out.println("Searching with primitive query");
            // forget to set this and...
            // query=query.rewrite(reader);
            TopDocs hits = searcher.search(query.build(), 1000);
            // create an instance of the highlighter with the tags used to surround
            // highlighted text
            // QueryHighlightExtractor highlighter = new
            // QueryHighlightExtractor(this,
            // query, new StandardAnalyzer(TEST_VERSION));
            int maxNumFragmentsRequired = 3;
            for (int i = 0; i < hits.totalHits; i++) {
                final int docId = hits.scoreDocs[i].doc;
                final Document doc = searcher.doc(docId);
                String text = doc.get(FIELD_NAME);
                TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
                Highlighter highlighter = getHighlighter(query.build(), FIELD_NAME, HighlighterTest.this, false);
                highlighter.setTextFragmenter(new SimpleFragmenter(40));
                String highlightedText = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
                if (VERBOSE)
                    System.out.println(highlightedText);
            }
            // We expect to have zero highlights if the query is multi-terms and is
            // not
            // rewritten!
            assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 0);
        }
    };
    helper.start();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) TestHighlightRunner(org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Builder(org.apache.lucene.search.PhraseQuery.Builder) DocumentBuilder(javax.xml.parsers.DocumentBuilder) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) TopDocs(org.apache.lucene.search.TopDocs)

Example 77 with TopDocs

use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.

the class TestUnifiedHighlighter method testCustomFieldValueSource.

public void testCustomFieldValueSource() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Document doc = new Document();
    final String text = "This is a test.  Just highlighting from postings. This is also a much sillier test.  Feel free to test test test test test test test.";
    Field body = new Field("body", text, fieldType);
    doc.add(body);
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {

        @Override
        protected List<CharSequence[]> loadFieldValues(String[] fields, DocIdSetIterator docIter, int cacheCharsThreshold) throws IOException {
            assert fields.length == 1;
            assert docIter.cost() == 1;
            docIter.nextDoc();
            return Collections.singletonList(new CharSequence[] { text });
        }

        @Override
        protected BreakIterator getBreakIterator(String field) {
            return new WholeBreakIterator();
        }
    };
    Query query = new TermQuery(new Term("body", "test"));
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(1, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs, 2);
    assertEquals(1, snippets.length);
    assertEquals("This is a <b>test</b>.  Just highlighting from postings. This is also a much sillier <b>test</b>.  Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) IndexReader(org.apache.lucene.index.IndexReader) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 78 with TopDocs

use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.

the class TestUnifiedHighlighter method testBooleanMustNot.

public void testBooleanMustNot() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "This sentence has both terms.  This sentence has only terms.", fieldType);
    Document document = new Document();
    document.add(body);
    iw.addDocument(document);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    BooleanQuery query2 = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "both")), BooleanClause.Occur.MUST_NOT).build();
    BooleanQuery query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "terms")), BooleanClause.Occur.SHOULD).add(query2, BooleanClause.Occur.SHOULD).build();
    TopDocs topDocs = searcher.search(query, 10);
    assertEquals(1, topDocs.totalHits);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    highlighter.setMaxLength(Integer.MAX_VALUE - 1);
    String[] snippets = highlighter.highlight("body", query, topDocs, 2);
    assertEquals(1, snippets.length);
    assertFalse(snippets[0].contains("<b>both</b>"));
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 79 with TopDocs

use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.

the class TestUnifiedHighlighter method testEncode.

public void testEncode() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "", fieldType);
    Document doc = new Document();
    doc.add(body);
    body.setStringValue("This is a test. Just a test highlighting from <i>postings</i>. Feel free to ignore.");
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {

        @Override
        protected PassageFormatter getFormatter(String field) {
            return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
        }
    };
    Query query = new TermQuery(new Term("body", "highlighting"));
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(1, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(1, snippets.length);
    assertEquals("Just&#32;a&#32;test&#32;<b>highlighting</b>&#32;from&#32;&lt;i&gt;postings&lt;&#x2F;i&gt;&#46;&#32;", snippets[0]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 80 with TopDocs

use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.

the class TestUnifiedHighlighter method testCambridgeMA.

public void testCambridgeMA() throws Exception {
    BufferedReader r = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream("CambridgeMA.utf8"), StandardCharsets.UTF_8));
    String text = r.readLine();
    r.close();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", text, fieldType);
    Document document = new Document();
    document.add(body);
    iw.addDocument(document);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    BooleanQuery query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "porter")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "square")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "massachusetts")), BooleanClause.Occur.SHOULD).build();
    TopDocs topDocs = searcher.search(query, 10);
    assertEquals(1, topDocs.totalHits);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    highlighter.setMaxLength(Integer.MAX_VALUE - 1);
    String[] snippets = highlighter.highlight("body", query, topDocs, 2);
    assertEquals(1, snippets.length);
    assertTrue(snippets[0].contains("<b>Square</b>"));
    assertTrue(snippets[0].contains("<b>Porter</b>"));
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) InputStreamReader(java.io.InputStreamReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) BufferedReader(java.io.BufferedReader) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Aggregations

TopDocs (org.apache.lucene.search.TopDocs)496 IndexSearcher (org.apache.lucene.search.IndexSearcher)302 Document (org.apache.lucene.document.Document)275 TermQuery (org.apache.lucene.search.TermQuery)189 IndexReader (org.apache.lucene.index.IndexReader)187 Term (org.apache.lucene.index.Term)174 Directory (org.apache.lucene.store.Directory)174 Query (org.apache.lucene.search.Query)172 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)144 BooleanQuery (org.apache.lucene.search.BooleanQuery)127 ScoreDoc (org.apache.lucene.search.ScoreDoc)127 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)120 Sort (org.apache.lucene.search.Sort)94 Field (org.apache.lucene.document.Field)85 SortField (org.apache.lucene.search.SortField)74 IOException (java.io.IOException)58 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)56 TextField (org.apache.lucene.document.TextField)47 PhraseQuery (org.apache.lucene.search.PhraseQuery)46 PrefixQuery (org.apache.lucene.search.PrefixQuery)45