use of org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner in project lucene-solr by apache.
the class SynonymTokenizer method testSpanHighlighting.
public void testSpanHighlighting() throws Exception {
Query query1 = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD_NAME, "wordx")), new SpanTermQuery(new Term(FIELD_NAME, "wordy")) }, 1, false);
Query query2 = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD_NAME, "wordy")), new SpanTermQuery(new Term(FIELD_NAME, "wordc")) }, 1, false);
BooleanQuery.Builder bquery = new BooleanQuery.Builder();
bquery.add(query1, Occur.SHOULD);
bquery.add(query2, Occur.SHOULD);
doSearching(bquery.build());
TestHighlightRunner helper = new TestHighlightRunner() {
@Override
public void run() throws Exception {
mode = QUERY;
doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
}
};
helper.run();
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 7);
}
use of org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner in project lucene-solr by apache.
the class SynonymTokenizer method testGetBestSingleFragmentWithWeights.
public void testGetBestSingleFragmentWithWeights() throws Exception {
TestHighlightRunner helper = new TestHighlightRunner() {
@Override
public void run() throws Exception {
WeightedSpanTerm[] wTerms = new WeightedSpanTerm[2];
wTerms[0] = new WeightedSpanTerm(10f, "hello");
List<PositionSpan> positionSpans = new ArrayList<>();
positionSpans.add(new PositionSpan(0, 0));
wTerms[0].addPositionSpans(positionSpans);
wTerms[1] = new WeightedSpanTerm(1f, "kennedy");
positionSpans = new ArrayList<>();
positionSpans.add(new PositionSpan(14, 14));
wTerms[1].addPositionSpans(positionSpans);
// new
Highlighter highlighter = getHighlighter(wTerms, HighlighterTest.this);
// Highlighter(new
// QueryTermScorer(wTerms));
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, texts[0]);
highlighter.setTextFragmenter(new SimpleFragmenter(2));
String result = highlighter.getBestFragment(tokenStream, texts[0]).trim();
assertTrue("Failed to find best section using weighted terms. Found: [" + result + "]", "<B>Hello</B>".equals(result));
// readjust weights
wTerms[1].setWeight(50f);
tokenStream = analyzer.tokenStream(FIELD_NAME, texts[0]);
highlighter = getHighlighter(wTerms, HighlighterTest.this);
highlighter.setTextFragmenter(new SimpleFragmenter(2));
result = highlighter.getBestFragment(tokenStream, texts[0]).trim();
assertTrue("Failed to find best section using weighted terms. Found: " + result, "<B>kennedy</B>".equals(result));
}
};
helper.start();
}
use of org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner in project lucene-solr by apache.
the class SynonymTokenizer method testOverlapAnalyzer2.
public void testOverlapAnalyzer2() throws Exception {
TestHighlightRunner helper = new TestHighlightRunner() {
@Override
public void run() throws Exception {
String s = "Hi-Speed10 foo";
Query query;
Highlighter highlighter;
String result;
query = new TermQuery(new Term("text", "foo"));
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("Hi-Speed10 <B>foo</B>", result);
query = new TermQuery(new Term("text", "10"));
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("Hi-Speed<B>10</B> foo", result);
query = new TermQuery(new Term("text", "hi"));
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("<B>Hi</B>-Speed10 foo", result);
query = new TermQuery(new Term("text", "speed"));
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("Hi-<B>Speed</B>10 foo", result);
query = new TermQuery(new Term("text", "hispeed"));
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("<B>Hi-Speed</B>10 foo", result);
BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
booleanQuery.add(new TermQuery(new Term("text", "hi")), Occur.SHOULD);
booleanQuery.add(new TermQuery(new Term("text", "speed")), Occur.SHOULD);
query = booleanQuery.build();
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("<B>Hi-Speed</B>10 foo", result);
// ///////////////// same tests, just put the bigger overlapping token
// first
query = new TermQuery(new Term("text", "foo"));
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("Hi-Speed10 <B>foo</B>", result);
query = new TermQuery(new Term("text", "10"));
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("Hi-Speed<B>10</B> foo", result);
query = new TermQuery(new Term("text", "hi"));
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("<B>Hi</B>-Speed10 foo", result);
query = new TermQuery(new Term("text", "speed"));
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("Hi-<B>Speed</B>10 foo", result);
query = new TermQuery(new Term("text", "hispeed"));
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("<B>Hi-Speed</B>10 foo", result);
query = booleanQuery.build();
highlighter = getHighlighter(query, "text", HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("<B>Hi-Speed</B>10 foo", result);
}
};
helper.start();
}
use of org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner in project lucene-solr by apache.
the class SynonymTokenizer method testMaxSizeEndHighlight.
public void testMaxSizeEndHighlight() throws Exception {
TestHighlightRunner helper = new TestHighlightRunner() {
@Override
public void run() throws Exception {
CharacterRunAutomaton stopWords = new CharacterRunAutomaton(new RegExp("i[nt]").toAutomaton());
TermQuery query = new TermQuery(new Term("text", "searchterm"));
String text = "this is a text with searchterm in it";
SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
Highlighter hg = getHighlighter(query, "text", fm);
hg.setTextFragmenter(new NullFragmenter());
hg.setMaxDocCharsToAnalyze(36);
String match = hg.getBestFragment(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords), "text", text);
assertTrue("Matched text should contain remainder of text after highlighted query ", match.endsWith("in it"));
}
};
helper.start();
}
use of org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner in project lucene-solr by apache.
the class SynonymTokenizer method testGetSimpleHighlight.
public void testGetSimpleHighlight() throws Exception {
TestHighlightRunner helper = new TestHighlightRunner() {
@Override
public void run() throws Exception {
numHighlights = 0;
doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
Highlighter highlighter = getHighlighter(query, FIELD_NAME, HighlighterTest.this);
String result = highlighter.getBestFragment(tokenStream, text);
if (VERBOSE)
System.out.println("\t" + result);
}
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
}
};
helper.start();
}
Aggregations