Search in sources :

Example 71 with TopDocs

use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.

the class TestUnifiedHighlighter method testBasics.

//
//  Tests below were ported from the PostingsHighlighter. Possibly augmented.  Far below are newer tests.
//
public void testBasics() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "", fieldType);
    Document doc = new Document();
    doc.add(body);
    body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
    iw.addDocument(doc);
    body.setStringValue("Highlighting the first term. Hope it works.");
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    Query query = new TermQuery(new Term("body", "highlighting"));
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
    assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) IndexReader(org.apache.lucene.index.IndexReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 72 with TopDocs

use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.

the class TestUnifiedHighlighter method testMultipleTerms.

public void testMultipleTerms() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "", fieldType);
    Document doc = new Document();
    doc.add(body);
    body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
    iw.addDocument(doc);
    body.setStringValue("Highlighting the first term. Hope it works.");
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    BooleanQuery query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "just")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "first")), BooleanClause.Occur.SHOULD).build();
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("<b>Just</b> a test <b>highlighting</b> from postings. ", snippets[0]);
    assertEquals("<b>Highlighting</b> the <b>first</b> term. ", snippets[1]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 73 with TopDocs

use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.

the class TokenSourcesTest method testOverlapWithPositionsAndOffset.

public void testOverlapWithPositionsAndOffset() throws IOException, InvalidTokenOffsetsException {
    final String TEXT = "the fox did not jump";
    final Directory directory = newDirectory();
    final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(null));
    try {
        final Document document = new Document();
        FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
        customType.setStoreTermVectors(true);
        customType.setStoreTermVectorOffsets(true);
        customType.setStoreTermVectorPositions(true);
        document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
        indexWriter.addDocument(document);
    } finally {
        indexWriter.close();
    }
    final IndexReader indexReader = DirectoryReader.open(directory);
    try {
        assertEquals(1, indexReader.numDocs());
        final IndexSearcher indexSearcher = newSearcher(indexReader);
        final DisjunctionMaxQuery query = new DisjunctionMaxQuery(Arrays.asList(new SpanTermQuery(new Term(FIELD, "{fox}")), new SpanTermQuery(new Term(FIELD, "fox"))), 1);
        // final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
        // new SpanTermQuery(new Term(FIELD, "{fox}")),
        // new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);
        TopDocs hits = indexSearcher.search(query, 1);
        assertEquals(1, hits.totalHits);
        final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(query));
        final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
        assertEquals("<B>the fox</B> did not jump", highlighter.getBestFragment(tokenStream, TEXT));
    } finally {
        indexReader.close();
        directory.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) IndexReader(org.apache.lucene.index.IndexReader) Directory(org.apache.lucene.store.Directory)

Example 74 with TopDocs

use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.

the class TokenSourcesTest method testOverlapWithPositionsAndOffsetExactPhrase.

public void testOverlapWithPositionsAndOffsetExactPhrase() throws IOException, InvalidTokenOffsetsException {
    final String TEXT = "the fox did not jump";
    final Directory directory = newDirectory();
    final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(null));
    try {
        final Document document = new Document();
        FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
        customType.setStoreTermVectors(true);
        customType.setStoreTermVectorPositions(true);
        customType.setStoreTermVectorOffsets(true);
        document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
        indexWriter.addDocument(document);
    } finally {
        indexWriter.close();
    }
    final IndexReader indexReader = DirectoryReader.open(directory);
    try {
        assertEquals(1, indexReader.numDocs());
        final IndexSearcher indexSearcher = newSearcher(indexReader);
        // final DisjunctionMaxQuery query = new DisjunctionMaxQuery(1);
        // query.add(new SpanTermQuery(new Term(FIELD, "the")));
        // query.add(new SpanTermQuery(new Term(FIELD, "fox")));
        final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "the")), new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);
        TopDocs hits = indexSearcher.search(phraseQuery, 1);
        assertEquals(1, hits.totalHits);
        final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
        final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
        assertEquals("<B>the fox</B> did not jump", highlighter.getBestFragment(tokenStream, TEXT));
    } finally {
        indexReader.close();
        directory.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) IndexReader(org.apache.lucene.index.IndexReader) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) Directory(org.apache.lucene.store.Directory)

Example 75 with TopDocs

use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.

the class SynonymTokenizer method testHighlightingCommonTermsQuery.

public void testHighlightingCommonTermsQuery() throws Exception {
    CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 3);
    //stop-word
    query.add(new Term(FIELD_NAME, "this"));
    query.add(new Term(FIELD_NAME, "long"));
    query.add(new Term(FIELD_NAME, "very"));
    searcher = newSearcher(reader);
    TopDocs hits = searcher.search(query, 10, new Sort(SortField.FIELD_DOC, SortField.FIELD_SCORE));
    assertEquals(2, hits.totalHits);
    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
    Highlighter highlighter = new Highlighter(scorer);
    final int docId0 = hits.scoreDocs[0].doc;
    Document doc = searcher.doc(docId0);
    String storedField = doc.get(FIELD_NAME);
    TokenStream stream = getAnyTokenStream(FIELD_NAME, docId0);
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
    highlighter.setTextFragmenter(fragmenter);
    String fragment = highlighter.getBestFragment(stream, storedField);
    assertEquals("Hello this is a piece of text that is <B>very</B> <B>long</B> and contains too much preamble and the meat is really here which says kennedy has been shot", fragment);
    final int docId1 = hits.scoreDocs[1].doc;
    doc = searcher.doc(docId1);
    storedField = doc.get(FIELD_NAME);
    stream = getAnyTokenStream(FIELD_NAME, docId1);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
    fragment = highlighter.getBestFragment(stream, storedField);
    assertEquals("This piece of text refers to Kennedy at the beginning then has a longer piece of text that is <B>very</B>", fragment);
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Sort(org.apache.lucene.search.Sort) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery)

Aggregations

TopDocs (org.apache.lucene.search.TopDocs)496 IndexSearcher (org.apache.lucene.search.IndexSearcher)302 Document (org.apache.lucene.document.Document)275 TermQuery (org.apache.lucene.search.TermQuery)189 IndexReader (org.apache.lucene.index.IndexReader)187 Term (org.apache.lucene.index.Term)174 Directory (org.apache.lucene.store.Directory)174 Query (org.apache.lucene.search.Query)172 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)144 BooleanQuery (org.apache.lucene.search.BooleanQuery)127 ScoreDoc (org.apache.lucene.search.ScoreDoc)127 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)120 Sort (org.apache.lucene.search.Sort)94 Field (org.apache.lucene.document.Field)85 SortField (org.apache.lucene.search.SortField)74 IOException (java.io.IOException)58 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)56 TextField (org.apache.lucene.document.TextField)47 PhraseQuery (org.apache.lucene.search.PhraseQuery)46 PrefixQuery (org.apache.lucene.search.PrefixQuery)45