use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.
the class SynonymTokenizer method testUnRewrittenQuery.
public void testUnRewrittenQuery() throws Exception {
final TestHighlightRunner helper = new TestHighlightRunner() {
@Override
public void run() throws Exception {
numHighlights = 0;
// test to show how rewritten query can still be used
searcher = newSearcher(reader);
BooleanQuery.Builder query = new BooleanQuery.Builder();
query.add(new WildcardQuery(new Term(FIELD_NAME, "jf?")), Occur.SHOULD);
query.add(new WildcardQuery(new Term(FIELD_NAME, "kenned*")), Occur.SHOULD);
if (VERBOSE)
System.out.println("Searching with primitive query");
// forget to set this and...
// query=query.rewrite(reader);
TopDocs hits = searcher.search(query.build(), 1000);
// create an instance of the highlighter with the tags used to surround
// highlighted text
// QueryHighlightExtractor highlighter = new
// QueryHighlightExtractor(this,
// query, new StandardAnalyzer(TEST_VERSION));
int maxNumFragmentsRequired = 3;
for (int i = 0; i < hits.totalHits; i++) {
final int docId = hits.scoreDocs[i].doc;
final Document doc = searcher.doc(docId);
String text = doc.get(FIELD_NAME);
TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
Highlighter highlighter = getHighlighter(query.build(), FIELD_NAME, HighlighterTest.this, false);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
String highlightedText = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
if (VERBOSE)
System.out.println(highlightedText);
}
// We expect to have zero highlights if the query is multi-terms and is
// not
// rewritten!
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 0);
}
};
helper.start();
}
use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.
the class TestUnifiedHighlighter method testCustomFieldValueSource.
public void testCustomFieldValueSource() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Document doc = new Document();
final String text = "This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.";
Field body = new Field("body", text, fieldType);
doc.add(body);
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected List<CharSequence[]> loadFieldValues(String[] fields, DocIdSetIterator docIter, int cacheCharsThreshold) throws IOException {
assert fields.length == 1;
assert docIter.cost() == 1;
docIter.nextDoc();
return Collections.singletonList(new CharSequence[] { text });
}
@Override
protected BreakIterator getBreakIterator(String field) {
return new WholeBreakIterator();
}
};
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals(1, snippets.length);
assertEquals("This is a <b>test</b>. Just highlighting from postings. This is also a much sillier <b>test</b>. Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
ir.close();
}
use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.
the class TestUnifiedHighlighter method testBooleanMustNot.
public void testBooleanMustNot() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "This sentence has both terms. This sentence has only terms.", fieldType);
Document document = new Document();
document.add(body);
iw.addDocument(document);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
BooleanQuery query2 = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "both")), BooleanClause.Occur.MUST_NOT).build();
BooleanQuery query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "terms")), BooleanClause.Occur.SHOULD).add(query2, BooleanClause.Occur.SHOULD).build();
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setMaxLength(Integer.MAX_VALUE - 1);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals(1, snippets.length);
assertFalse(snippets[0].contains("<b>both</b>"));
ir.close();
}
use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.
the class TestUnifiedHighlighter method testEncode.
public void testEncode() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test. Just a test highlighting from <i>postings</i>. Feel free to ignore.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected PassageFormatter getFormatter(String field) {
return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
}
};
Query query = new TermQuery(new Term("body", "highlighting"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighter.highlight("body", query, topDocs);
assertEquals(1, snippets.length);
assertEquals("Just a test <b>highlighting</b> from <i>postings</i>. ", snippets[0]);
ir.close();
}
use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.
the class TestUnifiedHighlighter method testCambridgeMA.
public void testCambridgeMA() throws Exception {
BufferedReader r = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream("CambridgeMA.utf8"), StandardCharsets.UTF_8));
String text = r.readLine();
r.close();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", text, fieldType);
Document document = new Document();
document.add(body);
iw.addDocument(document);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
BooleanQuery query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "porter")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "square")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "massachusetts")), BooleanClause.Occur.SHOULD).build();
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setMaxLength(Integer.MAX_VALUE - 1);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals(1, snippets.length);
assertTrue(snippets[0].contains("<b>Square</b>"));
assertTrue(snippets[0].contains("<b>Porter</b>"));
ir.close();
}
Aggregations