Search in sources :

Example 11 with Snippet

use of org.apache.lucene.search.highlight.Snippet in project elasticsearch by elastic.

the class CustomPassageFormatterTests method testSimpleFormat.

public void testSimpleFormat() {
    String content = "This is a really cool highlighter. Postings highlighter gives nice snippets back. No matches here.";
    CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<em>", "</em>", new DefaultEncoder());
    Passage[] passages = new Passage[3];
    String match = "highlighter";
    BytesRef matchBytesRef = new BytesRef(match);
    Passage passage1 = new Passage();
    int start = content.indexOf(match);
    int end = start + match.length();
    passage1.startOffset = 0;
    //lets include the whitespace at the end to make sure we trim it
    passage1.endOffset = end + 2;
    passage1.addMatch(start, end, matchBytesRef);
    passages[0] = passage1;
    Passage passage2 = new Passage();
    start = content.lastIndexOf(match);
    end = start + match.length();
    passage2.startOffset = passage1.endOffset;
    passage2.endOffset = end + 26;
    passage2.addMatch(start, end, matchBytesRef);
    passages[1] = passage2;
    Passage passage3 = new Passage();
    passage3.startOffset = passage2.endOffset;
    passage3.endOffset = content.length();
    passages[2] = passage3;
    Snippet[] fragments = passageFormatter.format(passages, content);
    assertThat(fragments, notNullValue());
    assertThat(fragments.length, equalTo(3));
    assertThat(fragments[0].getText(), equalTo("This is a really cool <em>highlighter</em>."));
    assertThat(fragments[0].isHighlighted(), equalTo(true));
    assertThat(fragments[1].getText(), equalTo("Postings <em>highlighter</em> gives nice snippets back."));
    assertThat(fragments[1].isHighlighted(), equalTo(true));
    assertThat(fragments[2].getText(), equalTo("No matches here."));
    assertThat(fragments[2].isHighlighted(), equalTo(false));
}
Also used : DefaultEncoder(org.apache.lucene.search.highlight.DefaultEncoder) Snippet(org.apache.lucene.search.highlight.Snippet) BytesRef(org.apache.lucene.util.BytesRef)

Example 12 with Snippet

use of org.apache.lucene.search.highlight.Snippet in project elasticsearch by elastic.

the class CustomPostingsHighlighterTests method testNoMatchSize.

public void testNoMatchSize() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    Field body = new Field("body", "", offsetsType);
    Field none = new Field("none", "", offsetsType);
    Document doc = new Document();
    doc.add(body);
    doc.add(none);
    String firstValue = "This is a test. Just a test highlighting from postings. Feel free to ignore.";
    body.setStringValue(firstValue);
    none.setStringValue(firstValue);
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    Query query = new TermQuery(new Term("none", "highlighting"));
    IndexSearcher searcher = newSearcher(ir);
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertThat(topDocs.totalHits, equalTo(1));
    int docId = topDocs.scoreDocs[0].doc;
    CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder());
    CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(null, passageFormatter, firstValue, false);
    Snippet[] snippets = highlighter.highlightField("body", query, searcher, docId, 5);
    assertThat(snippets.length, equalTo(0));
    highlighter = new CustomPostingsHighlighter(null, passageFormatter, firstValue, true);
    snippets = highlighter.highlightField("body", query, searcher, docId, 5);
    assertThat(snippets.length, equalTo(1));
    assertThat(snippets[0].getText(), equalTo("This is a test."));
    ir.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Snippet(org.apache.lucene.search.highlight.Snippet) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) DefaultEncoder(org.apache.lucene.search.highlight.DefaultEncoder) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

Snippet (org.apache.lucene.search.highlight.Snippet)12 IndexSearcher (org.apache.lucene.search.IndexSearcher)5 DefaultEncoder (org.apache.lucene.search.highlight.DefaultEncoder)5 BytesRef (org.apache.lucene.util.BytesRef)5 ArrayList (java.util.ArrayList)3 Document (org.apache.lucene.document.Document)3 Field (org.apache.lucene.document.Field)3 FieldType (org.apache.lucene.document.FieldType)3 TextField (org.apache.lucene.document.TextField)3 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)3 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)3 TopDocs (org.apache.lucene.search.TopDocs)3 Directory (org.apache.lucene.store.Directory)3 IOException (java.io.IOException)2 BreakIterator (java.text.BreakIterator)2 Analyzer (org.apache.lucene.analysis.Analyzer)2 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)2 IndexReader (org.apache.lucene.index.IndexReader)2 Term (org.apache.lucene.index.Term)2 Query (org.apache.lucene.search.Query)2