use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.
the class SynonymTokenizer method testGetSimpleHighlight.
public void testGetSimpleHighlight() throws Exception {
TestHighlightRunner helper = new TestHighlightRunner() {
@Override
public void run() throws Exception {
numHighlights = 0;
doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
Highlighter highlighter = getHighlighter(query, FIELD_NAME, HighlighterTest.this);
String result = highlighter.getBestFragment(tokenStream, text);
if (VERBOSE)
System.out.println("\t" + result);
}
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
}
};
helper.start();
}
use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.
the class SynonymTokenizer method testHighlightingCommonTermsQuery.
public void testHighlightingCommonTermsQuery() throws Exception {
CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 3);
//stop-word
query.add(new Term(FIELD_NAME, "this"));
query.add(new Term(FIELD_NAME, "long"));
query.add(new Term(FIELD_NAME, "very"));
searcher = newSearcher(reader);
TopDocs hits = searcher.search(query, 10, new Sort(SortField.FIELD_DOC, SortField.FIELD_SCORE));
assertEquals(2, hits.totalHits);
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(scorer);
final int docId0 = hits.scoreDocs[0].doc;
Document doc = searcher.doc(docId0);
String storedField = doc.get(FIELD_NAME);
TokenStream stream = getAnyTokenStream(FIELD_NAME, docId0);
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
highlighter.setTextFragmenter(fragmenter);
String fragment = highlighter.getBestFragment(stream, storedField);
assertEquals("Hello this is a piece of text that is <B>very</B> <B>long</B> and contains too much preamble and the meat is really here which says kennedy has been shot", fragment);
final int docId1 = hits.scoreDocs[1].doc;
doc = searcher.doc(docId1);
storedField = doc.get(FIELD_NAME);
stream = getAnyTokenStream(FIELD_NAME, docId1);
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
fragment = highlighter.getBestFragment(stream, storedField);
assertEquals("This piece of text refers to Kennedy at the beginning then has a longer piece of text that is <B>very</B>", fragment);
}
use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.
the class SynonymTokenizer method testGetTextFragments.
public void testGetTextFragments() throws Exception {
TestHighlightRunner helper = new TestHighlightRunner() {
@Override
public void run() throws Exception {
doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
for (int i = 0; i < hits.totalHits; i++) {
final int docId = hits.scoreDocs[i].doc;
final Document doc = searcher.doc(docId);
String text = doc.get(FIELD_NAME);
TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
Highlighter highlighter = getHighlighter(query, FIELD_NAME, // new Highlighter(this, new
HighlighterTest.this);
// QueryTermScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(20));
String[] stringResults = highlighter.getBestFragments(tokenStream, text, 10);
tokenStream = analyzer.tokenStream(FIELD_NAME, text);
TextFragment[] fragmentResults = highlighter.getBestTextFragments(tokenStream, text, true, 10);
assertTrue("Failed to find correct number of text Fragments: " + fragmentResults.length + " vs " + stringResults.length, fragmentResults.length == stringResults.length);
for (int j = 0; j < stringResults.length; j++) {
if (VERBOSE)
System.out.println(fragmentResults[j]);
assertTrue("Failed to find same text Fragments: " + fragmentResults[j] + " found", fragmentResults[j].toString().equals(stringResults[j]));
}
}
}
};
helper.start();
}
use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.
the class SynonymTokenizer method testUnRewrittenQuery.
public void testUnRewrittenQuery() throws Exception {
final TestHighlightRunner helper = new TestHighlightRunner() {
@Override
public void run() throws Exception {
numHighlights = 0;
// test to show how rewritten query can still be used
searcher = newSearcher(reader);
BooleanQuery.Builder query = new BooleanQuery.Builder();
query.add(new WildcardQuery(new Term(FIELD_NAME, "jf?")), Occur.SHOULD);
query.add(new WildcardQuery(new Term(FIELD_NAME, "kenned*")), Occur.SHOULD);
if (VERBOSE)
System.out.println("Searching with primitive query");
// forget to set this and...
// query=query.rewrite(reader);
TopDocs hits = searcher.search(query.build(), 1000);
// create an instance of the highlighter with the tags used to surround
// highlighted text
// QueryHighlightExtractor highlighter = new
// QueryHighlightExtractor(this,
// query, new StandardAnalyzer(TEST_VERSION));
int maxNumFragmentsRequired = 3;
for (int i = 0; i < hits.totalHits; i++) {
final int docId = hits.scoreDocs[i].doc;
final Document doc = searcher.doc(docId);
String text = doc.get(FIELD_NAME);
TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
Highlighter highlighter = getHighlighter(query.build(), FIELD_NAME, HighlighterTest.this, false);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
String highlightedText = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
if (VERBOSE)
System.out.println(highlightedText);
}
// We expect to have zero highlights if the query is multi-terms and is
// not
// rewritten!
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 0);
}
};
helper.start();
}
use of org.apache.lucene.analysis.TokenStream in project lucene-solr by apache.
the class SynonymTokenizer method testSimpleSpanHighlighterWithStopWordsStraddlingFragmentBoundaries.
// LUCENE-2229
public void testSimpleSpanHighlighterWithStopWordsStraddlingFragmentBoundaries() throws Exception {
doSearching(new PhraseQuery(FIELD_NAME, "all", "tokens"));
int maxNumFragmentsRequired = 1;
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(scorer);
assertEquals("Must have one hit", 1, hits.totalHits);
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 36));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
if (VERBOSE)
System.out.println("\t" + result);
assertTrue("Fragment must be less than 60 characters long", result.length() < 60);
}
}
Aggregations