use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.
the class TestUnifiedHighlighter method testCustomFieldValueSource.
public void testCustomFieldValueSource() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Document doc = new Document();
final String text = "This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.";
Field body = new Field("body", text, fieldType);
doc.add(body);
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected List<CharSequence[]> loadFieldValues(String[] fields, DocIdSetIterator docIter, int cacheCharsThreshold) throws IOException {
assert fields.length == 1;
assert docIter.cost() == 1;
docIter.nextDoc();
return Collections.singletonList(new CharSequence[] { text });
}
@Override
protected BreakIterator getBreakIterator(String field) {
return new WholeBreakIterator();
}
};
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals(1, snippets.length);
assertEquals("This is a <b>test</b>. Just highlighting from postings. This is also a much sillier <b>test</b>. Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
ir.close();
}
use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.
the class TestUnifiedHighlighter method testBooleanMustNot.
public void testBooleanMustNot() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "This sentence has both terms. This sentence has only terms.", fieldType);
Document document = new Document();
document.add(body);
iw.addDocument(document);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
BooleanQuery query2 = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "both")), BooleanClause.Occur.MUST_NOT).build();
BooleanQuery query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "terms")), BooleanClause.Occur.SHOULD).add(query2, BooleanClause.Occur.SHOULD).build();
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setMaxLength(Integer.MAX_VALUE - 1);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals(1, snippets.length);
assertFalse(snippets[0].contains("<b>both</b>"));
ir.close();
}
use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.
the class TestUnifiedHighlighter method testEncode.
public void testEncode() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test. Just a test highlighting from <i>postings</i>. Feel free to ignore.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected PassageFormatter getFormatter(String field) {
return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
}
};
Query query = new TermQuery(new Term("body", "highlighting"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighter.highlight("body", query, topDocs);
assertEquals(1, snippets.length);
assertEquals("Just a test <b>highlighting</b> from <i>postings</i>. ", snippets[0]);
ir.close();
}
use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.
the class TestUnifiedHighlighter method testCambridgeMA.
public void testCambridgeMA() throws Exception {
BufferedReader r = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream("CambridgeMA.utf8"), StandardCharsets.UTF_8));
String text = r.readLine();
r.close();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", text, fieldType);
Document document = new Document();
document.add(body);
iw.addDocument(document);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
BooleanQuery query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "porter")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "square")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "massachusetts")), BooleanClause.Occur.SHOULD).build();
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setMaxLength(Integer.MAX_VALUE - 1);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals(1, snippets.length);
assertTrue(snippets[0].contains("<b>Square</b>"));
assertTrue(snippets[0].contains("<b>Porter</b>"));
ir.close();
}
use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.
the class TestUnifiedHighlighter method testCuriousGeorge.
public void testCuriousGeorge() throws Exception {
String text = "It’s the formula for success for preschoolers—Curious George and fire trucks! " + "Curious George and the Firefighters is a story based on H. A. and Margret Rey’s " + "popular primate and painted in the original watercolor and charcoal style. " + "Firefighters are a famously brave lot, but can they withstand a visit from one curious monkey?";
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", text, fieldType);
Document document = new Document();
document.add(body);
iw.addDocument(document);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
PhraseQuery query = new PhraseQuery.Builder().add(new Term("body", "curious")).add(new Term("body", "george")).build();
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setHighlightPhrasesStrictly(false);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals(1, snippets.length);
assertFalse(snippets[0].contains("<b>Curious</b>Curious"));
ir.close();
}
Aggregations