use of org.apache.lucene.index.IndexReader in project lucene-solr by apache.
the class TestUnifiedHighlighter method testEncode.
public void testEncode() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test. Just a test highlighting from <i>postings</i>. Feel free to ignore.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected PassageFormatter getFormatter(String field) {
return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
}
};
Query query = new TermQuery(new Term("body", "highlighting"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighter.highlight("body", query, topDocs);
assertEquals(1, snippets.length);
assertEquals("Just a test <b>highlighting</b> from <i>postings</i>. ", snippets[0]);
ir.close();
}
use of org.apache.lucene.index.IndexReader in project lucene-solr by apache.
the class TestUnifiedHighlighter method testCambridgeMA.
public void testCambridgeMA() throws Exception {
BufferedReader r = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream("CambridgeMA.utf8"), StandardCharsets.UTF_8));
String text = r.readLine();
r.close();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", text, fieldType);
Document document = new Document();
document.add(body);
iw.addDocument(document);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
BooleanQuery query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "porter")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "square")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "massachusetts")), BooleanClause.Occur.SHOULD).build();
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setMaxLength(Integer.MAX_VALUE - 1);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals(1, snippets.length);
assertTrue(snippets[0].contains("<b>Square</b>"));
assertTrue(snippets[0].contains("<b>Porter</b>"));
ir.close();
}
use of org.apache.lucene.index.IndexReader in project lucene-solr by apache.
the class TestUnifiedHighlighter method testCuriousGeorge.
public void testCuriousGeorge() throws Exception {
String text = "It’s the formula for success for preschoolers—Curious George and fire trucks! " + "Curious George and the Firefighters is a story based on H. A. and Margret Rey’s " + "popular primate and painted in the original watercolor and charcoal style. " + "Firefighters are a famously brave lot, but can they withstand a visit from one curious monkey?";
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", text, fieldType);
Document document = new Document();
document.add(body);
iw.addDocument(document);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
PhraseQuery query = new PhraseQuery.Builder().add(new Term("body", "curious")).add(new Term("body", "george")).build();
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setHighlightPhrasesStrictly(false);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals(1, snippets.length);
assertFalse(snippets[0].contains("<b>Curious</b>Curious"));
ir.close();
}
use of org.apache.lucene.index.IndexReader in project lucene-solr by apache.
the class TestUnifiedHighlighter method testMultipleSnippetSizes.
public void testMultipleSnippetSizes() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Field title = new Field("title", "", UHTestHelper.randomFieldType(random()));
Document doc = new Document();
doc.add(body);
doc.add(title);
body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
title.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
BooleanQuery query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "test")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("title", "test")), BooleanClause.Occur.SHOULD).build();
Map<String, String[]> snippets = highlighter.highlightFields(new String[] { "title", "body" }, query, new int[] { 0 }, new int[] { 1, 2 });
String titleHighlight = snippets.get("title")[0];
String bodyHighlight = snippets.get("body")[0];
assertEquals("This is a <b>test</b>. ", titleHighlight);
assertEquals("This is a <b>test</b>. Just a <b>test</b> highlighting from postings. ", bodyHighlight);
ir.close();
}
use of org.apache.lucene.index.IndexReader in project lucene-solr by apache.
the class TestUnifiedHighlighter method testCustomEmptyHighlights.
/**
* Make sure highlighter we can customize how emtpy
* highlight is returned.
*/
public void testCustomEmptyHighlights() throws Exception {
indexAnalyzer.setPositionIncrementGap(10);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Document doc = new Document();
Field body = new Field("body", "test this is. another sentence this test has. far away is that planet.", fieldType);
doc.add(body);
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
// don't want any default summary
highlighter.setMaxNoHighlightPassages(0);
Query query = new TermQuery(new Term("body", "highlighting"));
int[] docIDs = new int[] { 0 };
String[] snippets = highlighter.highlightFields(new String[] { "body" }, query, docIDs, new int[] { 2 }).get("body");
assertEquals(1, snippets.length);
assertNull(snippets[0]);
ir.close();
}
Aggregations