use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.
the class TestUnifiedHighlighter method testBasics.
//
// Tests below were ported from the PostingsHighlighter. Possibly augmented. Far below are newer tests.
//
public void testBasics() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
iw.addDocument(doc);
body.setStringValue("Highlighting the first term. Hope it works.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
Query query = new TermQuery(new Term("body", "highlighting"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
String[] snippets = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
ir.close();
}
use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.
the class TestUnifiedHighlighter method testMultipleTerms.
public void testMultipleTerms() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
iw.addDocument(doc);
body.setStringValue("Highlighting the first term. Hope it works.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
BooleanQuery query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "just")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "first")), BooleanClause.Occur.SHOULD).build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
String[] snippets = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("<b>Just</b> a test <b>highlighting</b> from postings. ", snippets[0]);
assertEquals("<b>Highlighting</b> the <b>first</b> term. ", snippets[1]);
ir.close();
}
use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.
the class TokenSourcesTest method testOverlapWithPositionsAndOffset.
public void testOverlapWithPositionsAndOffset() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox did not jump";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(null));
try {
final Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectorPositions(true);
document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
}
final IndexReader indexReader = DirectoryReader.open(directory);
try {
assertEquals(1, indexReader.numDocs());
final IndexSearcher indexSearcher = newSearcher(indexReader);
final DisjunctionMaxQuery query = new DisjunctionMaxQuery(Arrays.asList(new SpanTermQuery(new Term(FIELD, "{fox}")), new SpanTermQuery(new Term(FIELD, "fox"))), 1);
// final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
// new SpanTermQuery(new Term(FIELD, "{fox}")),
// new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);
TopDocs hits = indexSearcher.search(query, 1);
assertEquals(1, hits.totalHits);
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(query));
final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
assertEquals("<B>the fox</B> did not jump", highlighter.getBestFragment(tokenStream, TEXT));
} finally {
indexReader.close();
directory.close();
}
}
use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.
the class TokenSourcesTest method testOverlapWithPositionsAndOffsetExactPhrase.
public void testOverlapWithPositionsAndOffsetExactPhrase() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox did not jump";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(null));
try {
final Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorOffsets(true);
document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
}
final IndexReader indexReader = DirectoryReader.open(directory);
try {
assertEquals(1, indexReader.numDocs());
final IndexSearcher indexSearcher = newSearcher(indexReader);
// final DisjunctionMaxQuery query = new DisjunctionMaxQuery(1);
// query.add(new SpanTermQuery(new Term(FIELD, "the")));
// query.add(new SpanTermQuery(new Term(FIELD, "fox")));
final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "the")), new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);
TopDocs hits = indexSearcher.search(phraseQuery, 1);
assertEquals(1, hits.totalHits);
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
assertEquals("<B>the fox</B> did not jump", highlighter.getBestFragment(tokenStream, TEXT));
} finally {
indexReader.close();
directory.close();
}
}
use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.
the class SynonymTokenizer method testHighlightingCommonTermsQuery.
public void testHighlightingCommonTermsQuery() throws Exception {
CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 3);
//stop-word
query.add(new Term(FIELD_NAME, "this"));
query.add(new Term(FIELD_NAME, "long"));
query.add(new Term(FIELD_NAME, "very"));
searcher = newSearcher(reader);
TopDocs hits = searcher.search(query, 10, new Sort(SortField.FIELD_DOC, SortField.FIELD_SCORE));
assertEquals(2, hits.totalHits);
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(scorer);
final int docId0 = hits.scoreDocs[0].doc;
Document doc = searcher.doc(docId0);
String storedField = doc.get(FIELD_NAME);
TokenStream stream = getAnyTokenStream(FIELD_NAME, docId0);
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
highlighter.setTextFragmenter(fragmenter);
String fragment = highlighter.getBestFragment(stream, storedField);
assertEquals("Hello this is a piece of text that is <B>very</B> <B>long</B> and contains too much preamble and the meat is really here which says kennedy has been shot", fragment);
final int docId1 = hits.scoreDocs[1].doc;
doc = searcher.doc(docId1);
storedField = doc.get(FIELD_NAME);
stream = getAnyTokenStream(FIELD_NAME, docId1);
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
fragment = highlighter.getBestFragment(stream, storedField);
assertEquals("This piece of text refers to Kennedy at the beginning then has a longer piece of text that is <B>very</B>", fragment);
}
Aggregations