Search in sources :

Example 6 with DefaultEncoder

use of org.apache.lucene.search.highlight.DefaultEncoder in project elasticsearch by elastic.

the class CustomPostingsHighlighterTests method testNoMatchSize.

public void testNoMatchSize() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    Field body = new Field("body", "", offsetsType);
    Field none = new Field("none", "", offsetsType);
    Document doc = new Document();
    doc.add(body);
    doc.add(none);
    String firstValue = "This is a test. Just a test highlighting from postings. Feel free to ignore.";
    body.setStringValue(firstValue);
    none.setStringValue(firstValue);
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    Query query = new TermQuery(new Term("none", "highlighting"));
    IndexSearcher searcher = newSearcher(ir);
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertThat(topDocs.totalHits, equalTo(1));
    int docId = topDocs.scoreDocs[0].doc;
    CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder());
    CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(null, passageFormatter, firstValue, false);
    Snippet[] snippets = highlighter.highlightField("body", query, searcher, docId, 5);
    assertThat(snippets.length, equalTo(0));
    highlighter = new CustomPostingsHighlighter(null, passageFormatter, firstValue, true);
    snippets = highlighter.highlightField("body", query, searcher, docId, 5);
    assertThat(snippets.length, equalTo(1));
    assertThat(snippets[0].getText(), equalTo("This is a test."));
    ir.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Snippet(org.apache.lucene.search.highlight.Snippet) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) DefaultEncoder(org.apache.lucene.search.highlight.DefaultEncoder) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 7 with DefaultEncoder

use of org.apache.lucene.search.highlight.DefaultEncoder in project lucene-solr by apache.

the class FastVectorHighlighterTest method testMultiValuedSortByScore.

public void testMultiValuedSortByScore() throws IOException {
    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
    Document doc = new Document();
    FieldType type = new FieldType(TextField.TYPE_STORED);
    type.setStoreTermVectorOffsets(true);
    type.setStoreTermVectorPositions(true);
    type.setStoreTermVectors(true);
    type.freeze();
    // The first two fields contain the best match
    doc.add(new Field("field", "zero if naught", type));
    // but total a lower score (3) than the bottom
    doc.add(new Field("field", "hero of legend", type));
    // two fields (4)
    doc.add(new Field("field", "naught of hero", type));
    doc.add(new Field("field", "naught of hero", type));
    writer.addDocument(doc);
    FastVectorHighlighter highlighter = new FastVectorHighlighter();
    ScoreOrderFragmentsBuilder fragmentsBuilder = new ScoreOrderFragmentsBuilder();
    fragmentsBuilder.setDiscreteMultiValueHighlighting(true);
    IndexReader reader = DirectoryReader.open(writer);
    String[] preTags = new String[] { "<b>" };
    String[] postTags = new String[] { "</b>" };
    Encoder encoder = new DefaultEncoder();
    int docId = 0;
    BooleanQuery.Builder query = new BooleanQuery.Builder();
    query.add(clause("field", "hero"), Occur.SHOULD);
    query.add(clause("field", "of"), Occur.SHOULD);
    query.add(clause("field", "legend"), Occur.SHOULD);
    FieldQuery fieldQuery = highlighter.getFieldQuery(query.build(), reader);
    for (FragListBuilder fragListBuilder : new FragListBuilder[] { new SimpleFragListBuilder(), new WeightedFragListBuilder() }) {
        String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 20, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder);
        assertEquals("<b>hero</b> <b>of</b> <b>legend</b>", bestFragments[0]);
        bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 28, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder);
        assertEquals("<b>hero</b> <b>of</b> <b>legend</b>", bestFragments[0]);
        bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 30000, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder);
        assertEquals("<b>hero</b> <b>of</b> <b>legend</b>", bestFragments[0]);
    }
    reader.close();
    writer.close();
    dir.close();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) DefaultEncoder(org.apache.lucene.search.highlight.DefaultEncoder) Encoder(org.apache.lucene.search.highlight.Encoder) DefaultEncoder(org.apache.lucene.search.highlight.DefaultEncoder) IndexReader(org.apache.lucene.index.IndexReader) Directory(org.apache.lucene.store.Directory)

Aggregations

DefaultEncoder (org.apache.lucene.search.highlight.DefaultEncoder)7 Document (org.apache.lucene.document.Document)5 Field (org.apache.lucene.document.Field)5 FieldType (org.apache.lucene.document.FieldType)5 TextField (org.apache.lucene.document.TextField)5 Snippet (org.apache.lucene.search.highlight.Snippet)5 Directory (org.apache.lucene.store.Directory)5 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)4 IndexReader (org.apache.lucene.index.IndexReader)4 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)3 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)3 IndexSearcher (org.apache.lucene.search.IndexSearcher)3 Query (org.apache.lucene.search.Query)3 TermQuery (org.apache.lucene.search.TermQuery)3 TopDocs (org.apache.lucene.search.TopDocs)3 StoredField (org.apache.lucene.document.StoredField)2 IndexWriter (org.apache.lucene.index.IndexWriter)2 Term (org.apache.lucene.index.Term)2 BooleanQuery (org.apache.lucene.search.BooleanQuery)2 Encoder (org.apache.lucene.search.highlight.Encoder)2