use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class TestQueryBuilder method testCJKPhrase.
public void testCJKPhrase() throws Exception {
// individual CJK chars as terms
SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
PhraseQuery expected = new PhraseQuery("field", "中", "国");
QueryBuilder builder = new QueryBuilder(analyzer);
assertEquals(expected, builder.createPhraseQuery("field", "中国"));
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class TestQueryBuilder method testCJKSloppyPhrase.
public void testCJKSloppyPhrase() throws Exception {
// individual CJK chars as terms
SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
PhraseQuery expected = new PhraseQuery(3, "field", "中", "国");
QueryBuilder builder = new QueryBuilder(analyzer);
assertEquals(expected, builder.createPhraseQuery("field", "中国", 3));
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class TestUnifiedHighlighter method testBuddhism.
public void testBuddhism() throws Exception {
String text = "This eight-volume set brings together seminal papers in Buddhist studies from a vast " + "range of academic disciplines published over the last forty years. With a new introduction " + "by the editor, this collection is a unique and unrivalled research resource for both " + "student and scholar. Coverage includes: - Buddhist origins; early history of Buddhism in " + "South and Southeast Asia - early Buddhist Schools and Doctrinal History; Theravada Doctrine " + "- the Origins and nature of Mahayana Buddhism; some Mahayana religious topics - Abhidharma " + "and Madhyamaka - Yogacara, the Epistemological tradition, and Tathagatagarbha - Tantric " + "Buddhism (Including China and Japan); Buddhism in Nepal and Tibet - Buddhism in South and " + "Southeast Asia, and - Buddhism in China, East Asia, and Japan.";
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", text, fieldType);
Document document = new Document();
document.add(body);
iw.addDocument(document);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
PhraseQuery query = new PhraseQuery.Builder().add(new Term("body", "buddhist")).add(new Term("body", "origins")).build();
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setHighlightPhrasesStrictly(false);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals(1, snippets.length);
assertTrue(snippets[0].contains("<b>Buddhist</b> <b>origins</b>"));
ir.close();
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class TestUnifiedHighlighter method testFieldMatcherPhraseQuery.
public void testFieldMatcherPhraseQuery() throws Exception {
IndexReader ir = indexSomeFields();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighterNoFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected Predicate<String> getFieldMatcher(String field) {
// requireFieldMatch=false
return (qf) -> true;
}
};
UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer);
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder().add(new PhraseQuery("title", "this", "is", "the", "title"), BooleanClause.Occur.SHOULD).add(new PhraseQuery(2, "category", "this", "is", "the", "field"), BooleanClause.Occur.SHOULD).add(new PhraseQuery("text", "this", "is"), BooleanClause.Occur.SHOULD).add(new PhraseQuery("category", "this", "is"), BooleanClause.Occur.SHOULD).add(new PhraseQuery(1, "text", "you", "can", "put", "text"), BooleanClause.Occur.SHOULD);
Query query = queryBuilder.build();
// title
{
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> <b>field</b>.", snippets[0]);
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> field.", snippets[0]);
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
highlighterFieldMatch.setFieldMatcher(null);
}
// text
{
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>text</b> <b>field</b>. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the <b>text</b> field. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("This is the text field. You can put some text if you want.", snippets[0]);
highlighterFieldMatch.setFieldMatcher(null);
}
// category
{
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
highlighterFieldMatch.setFieldMatcher(null);
}
ir.close();
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class SynonymTokenizer method testPosTermStdTerm.
// position sensitive query added after position insensitive query
public void testPosTermStdTerm() throws Exception {
BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
booleanQuery.add(new TermQuery(new Term(FIELD_NAME, "y")), Occur.SHOULD);
PhraseQuery phraseQuery = new PhraseQuery(FIELD_NAME, "x", "y", "z");
booleanQuery.add(phraseQuery, Occur.SHOULD);
doSearching(booleanQuery.build());
int maxNumFragmentsRequired = 2;
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.totalHits; i++) {
final int docId = hits.scoreDocs[i].doc;
final Document doc = searcher.doc(docId);
String text = doc.get(FIELD_NAME);
TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
if (VERBOSE)
System.out.println("\t" + result);
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
}
}
Aggregations