Search in sources :

Example 96 with IndexSearcher

use of org.apache.lucene.search.IndexSearcher in project lucene-solr by apache.

the class TestUnifiedHighlighter method testBasics.

//
//  Tests below were ported from the PostingsHighlighter. Possibly augmented.  Far below are newer tests.
//
public void testBasics() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "", fieldType);
    Document doc = new Document();
    doc.add(body);
    body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
    iw.addDocument(doc);
    body.setStringValue("Highlighting the first term. Hope it works.");
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    Query query = new TermQuery(new Term("body", "highlighting"));
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
    assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) IndexReader(org.apache.lucene.index.IndexReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 97 with IndexSearcher

use of org.apache.lucene.search.IndexSearcher in project lucene-solr by apache.

the class TestUnifiedHighlighter method testMultipleTerms.

public void testMultipleTerms() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "", fieldType);
    Document doc = new Document();
    doc.add(body);
    body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
    iw.addDocument(doc);
    body.setStringValue("Highlighting the first term. Hope it works.");
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    BooleanQuery query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "just")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "first")), BooleanClause.Occur.SHOULD).build();
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("<b>Just</b> a test <b>highlighting</b> from postings. ", snippets[0]);
    assertEquals("<b>Highlighting</b> the <b>first</b> term. ", snippets[1]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 98 with IndexSearcher

use of org.apache.lucene.search.IndexSearcher in project lucene-solr by apache.

the class TokenSourcesTest method testOverlapWithPositionsAndOffset.

public void testOverlapWithPositionsAndOffset() throws IOException, InvalidTokenOffsetsException {
    final String TEXT = "the fox did not jump";
    final Directory directory = newDirectory();
    final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(null));
    try {
        final Document document = new Document();
        FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
        customType.setStoreTermVectors(true);
        customType.setStoreTermVectorOffsets(true);
        customType.setStoreTermVectorPositions(true);
        document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
        indexWriter.addDocument(document);
    } finally {
        indexWriter.close();
    }
    final IndexReader indexReader = DirectoryReader.open(directory);
    try {
        assertEquals(1, indexReader.numDocs());
        final IndexSearcher indexSearcher = newSearcher(indexReader);
        final DisjunctionMaxQuery query = new DisjunctionMaxQuery(Arrays.asList(new SpanTermQuery(new Term(FIELD, "{fox}")), new SpanTermQuery(new Term(FIELD, "fox"))), 1);
        // final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
        // new SpanTermQuery(new Term(FIELD, "{fox}")),
        // new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);
        TopDocs hits = indexSearcher.search(query, 1);
        assertEquals(1, hits.totalHits);
        final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(query));
        final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
        assertEquals("<B>the fox</B> did not jump", highlighter.getBestFragment(tokenStream, TEXT));
    } finally {
        indexReader.close();
        directory.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) IndexReader(org.apache.lucene.index.IndexReader) Directory(org.apache.lucene.store.Directory)

Example 99 with IndexSearcher

use of org.apache.lucene.search.IndexSearcher in project lucene-solr by apache.

the class TokenSourcesTest method testOverlapWithPositionsAndOffsetExactPhrase.

public void testOverlapWithPositionsAndOffsetExactPhrase() throws IOException, InvalidTokenOffsetsException {
    final String TEXT = "the fox did not jump";
    final Directory directory = newDirectory();
    final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(null));
    try {
        final Document document = new Document();
        FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
        customType.setStoreTermVectors(true);
        customType.setStoreTermVectorPositions(true);
        customType.setStoreTermVectorOffsets(true);
        document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
        indexWriter.addDocument(document);
    } finally {
        indexWriter.close();
    }
    final IndexReader indexReader = DirectoryReader.open(directory);
    try {
        assertEquals(1, indexReader.numDocs());
        final IndexSearcher indexSearcher = newSearcher(indexReader);
        // final DisjunctionMaxQuery query = new DisjunctionMaxQuery(1);
        // query.add(new SpanTermQuery(new Term(FIELD, "the")));
        // query.add(new SpanTermQuery(new Term(FIELD, "fox")));
        final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "the")), new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);
        TopDocs hits = indexSearcher.search(phraseQuery, 1);
        assertEquals(1, hits.totalHits);
        final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
        final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
        assertEquals("<B>the fox</B> did not jump", highlighter.getBestFragment(tokenStream, TEXT));
    } finally {
        indexReader.close();
        directory.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) IndexReader(org.apache.lucene.index.IndexReader) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) Directory(org.apache.lucene.store.Directory)

Example 100 with IndexSearcher

use of org.apache.lucene.search.IndexSearcher in project lucene-solr by apache.

the class TestUnifiedHighlighter method testCustomFieldValueSource.

public void testCustomFieldValueSource() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Document doc = new Document();
    final String text = "This is a test.  Just highlighting from postings. This is also a much sillier test.  Feel free to test test test test test test test.";
    Field body = new Field("body", text, fieldType);
    doc.add(body);
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {

        @Override
        protected List<CharSequence[]> loadFieldValues(String[] fields, DocIdSetIterator docIter, int cacheCharsThreshold) throws IOException {
            assert fields.length == 1;
            assert docIter.cost() == 1;
            docIter.nextDoc();
            return Collections.singletonList(new CharSequence[] { text });
        }

        @Override
        protected BreakIterator getBreakIterator(String field) {
            return new WholeBreakIterator();
        }
    };
    Query query = new TermQuery(new Term("body", "test"));
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(1, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs, 2);
    assertEquals(1, snippets.length);
    assertEquals("This is a <b>test</b>.  Just highlighting from postings. This is also a much sillier <b>test</b>.  Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) IndexReader(org.apache.lucene.index.IndexReader) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Aggregations

IndexSearcher (org.apache.lucene.search.IndexSearcher)927 Document (org.apache.lucene.document.Document)528 IndexReader (org.apache.lucene.index.IndexReader)430 Directory (org.apache.lucene.store.Directory)407 TopDocs (org.apache.lucene.search.TopDocs)382 TermQuery (org.apache.lucene.search.TermQuery)332 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)311 Query (org.apache.lucene.search.Query)299 Term (org.apache.lucene.index.Term)287 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)229 BooleanQuery (org.apache.lucene.search.BooleanQuery)176 IOException (java.io.IOException)155 Field (org.apache.lucene.document.Field)140 IndexWriter (org.apache.lucene.index.IndexWriter)139 ScoreDoc (org.apache.lucene.search.ScoreDoc)135 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)122 Sort (org.apache.lucene.search.Sort)114 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)111 DirectoryReader (org.apache.lucene.index.DirectoryReader)110 ArrayList (java.util.ArrayList)109