use of org.apache.lucene.search.BooleanQuery in project lucene-solr by apache.
the class TestUnifiedHighlighterMTQ method testWhichMTQMatched.
/**
* Runs a query with two MTQs and confirms the formatter
* can tell which query matched which hit.
*/
public void testWhichMTQMatched() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("Test a one sentence document.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
// use a variety of common MTQ types
BooleanQuery query = new BooleanQuery.Builder().add(new PrefixQuery(new Term("body", "te")), BooleanClause.Occur.SHOULD).add(new WildcardQuery(new Term("body", "*one*")), BooleanClause.Occur.SHOULD).add(new FuzzyQuery(new Term("body", "zentence~")), BooleanClause.Occur.SHOULD).build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighter.highlight("body", query, topDocs);
assertEquals(1, snippets.length);
// Default formatter just bolds each hit:
assertEquals("<b>Test</b> a <b>one</b> <b>sentence</b> document.", snippets[0]);
// Now use our own formatter, that also stuffs the
// matching term's text into the result:
highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected PassageFormatter getFormatter(String field) {
return new PassageFormatter() {
@Override
public Object format(Passage[] passages, String content) {
// Copied from DefaultPassageFormatter, but
// tweaked to include the matched term:
StringBuilder sb = new StringBuilder();
int pos = 0;
for (Passage passage : passages) {
// don't add ellipsis if its the first one, or if its connected.
if (passage.getStartOffset() > pos && pos > 0) {
sb.append("... ");
}
pos = passage.getStartOffset();
for (int i = 0; i < passage.getNumMatches(); i++) {
int start = passage.getMatchStarts()[i];
int end = passage.getMatchEnds()[i];
// its possible to have overlapping terms
if (start > pos) {
sb.append(content, pos, start);
}
if (end > pos) {
sb.append("<b>");
sb.append(content, Math.max(pos, start), end);
sb.append('(');
sb.append(passage.getMatchTerms()[i].utf8ToString());
sb.append(')');
sb.append("</b>");
pos = end;
}
}
// its possible a "term" from the analyzer could span a sentence boundary.
sb.append(content, pos, Math.max(pos, passage.getEndOffset()));
pos = passage.getEndOffset();
}
return sb.toString();
}
};
}
};
assertEquals(1, topDocs.totalHits);
snippets = highlighter.highlight("body", query, topDocs);
assertEquals(1, snippets.length);
assertEquals("<b>Test(body:te*)</b> a <b>one(body:*one*)</b> <b>sentence(body:zentence~~2)</b> document.", snippets[0]);
ir.close();
}
use of org.apache.lucene.search.BooleanQuery in project lucene-solr by apache.
the class TestUnifiedHighlighterMTQ method testPositionSensitiveWithWildcardDoesNotHighlight.
public void testPositionSensitiveWithWildcardDoesNotHighlight() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Document doc = new Document();
doc.add(new Field("body", "iterate insect ipswitch illinois indirect", fieldType));
doc.add(newTextField("id", "id", Field.Store.YES));
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
PhraseQuery pq = new PhraseQuery.Builder().add(new Term("body", "consent")).add(new Term("body", "order")).build();
BooleanQuery query = new BooleanQuery.Builder().add(new WildcardQuery(new Term("body", "enforc*")), BooleanClause.Occur.MUST).add(pq, BooleanClause.Occur.MUST).build();
int[] docIds = new int[] { docID };
String[] snippets = highlighter.highlightFields(new String[] { "body" }, query, docIds, new int[] { 2 }).get("body");
assertEquals(1, snippets.length);
assertEquals("iterate insect ipswitch illinois indirect", snippets[0]);
ir.close();
}
use of org.apache.lucene.search.BooleanQuery in project lucene-solr by apache.
the class TestUnifiedHighlighterMTQ method testRanges.
public void testRanges() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test.");
iw.addDocument(doc);
body.setStringValue("Test a one sentence document.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
Query query = TermRangeQuery.newStringRange("body", "ta", "tf", true, true);
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
String[] snippets = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
// null start
query = TermRangeQuery.newStringRange("body", null, "tf", true, true);
topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
snippets = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("This <b>is</b> <b>a</b> <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> <b>a</b> <b>one</b> <b>sentence</b> <b>document</b>.", snippets[1]);
// null end
query = TermRangeQuery.newStringRange("body", "ta", null, true, true);
topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
snippets = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("<b>This</b> is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
// exact start inclusive
query = TermRangeQuery.newStringRange("body", "test", "tf", true, true);
topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
snippets = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
// exact end inclusive
query = TermRangeQuery.newStringRange("body", "ta", "test", true, true);
topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
snippets = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
// exact start exclusive
BooleanQuery bq = new BooleanQuery.Builder().add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD).add(TermRangeQuery.newStringRange("body", "test", "tf", false, true), BooleanClause.Occur.SHOULD).build();
topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
snippets = highlighter.highlight("body", bq, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a test.", snippets[0]);
assertEquals("Test a one sentence document.", snippets[1]);
// exact end exclusive
bq = new BooleanQuery.Builder().add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD).add(TermRangeQuery.newStringRange("body", "ta", "test", true, false), BooleanClause.Occur.SHOULD).build();
topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
snippets = highlighter.highlight("body", bq, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a test.", snippets[0]);
assertEquals("Test a one sentence document.", snippets[1]);
// wrong field
bq = new BooleanQuery.Builder().add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD).add(TermRangeQuery.newStringRange("bogus", "ta", "tf", true, true), BooleanClause.Occur.SHOULD).build();
topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
snippets = highlighter.highlight("body", bq, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a test.", snippets[0]);
assertEquals("Test a one sentence document.", snippets[1]);
ir.close();
}
use of org.apache.lucene.search.BooleanQuery in project lucene-solr by apache.
the class TestUnifiedHighlighterMTQ method testOneRegexp.
public void testOneRegexp() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test.");
iw.addDocument(doc);
body.setStringValue("Test a one sentence document.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
Query query = new RegexpQuery(new Term("body", "te.*"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
String[] snippets = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
// wrong field
BooleanQuery bq = new BooleanQuery.Builder().add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD).add(new RegexpQuery(new Term("bogus", "te.*")), BooleanClause.Occur.SHOULD).build();
topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
snippets = highlighter.highlight("body", bq, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a test.", snippets[0]);
assertEquals("Test a one sentence document.", snippets[1]);
ir.close();
}
use of org.apache.lucene.search.BooleanQuery in project lucene-solr by apache.
the class TestUnifiedHighlighterRanking method checkQuery.
private void checkQuery(IndexSearcher is, Query query, int doc, int maxTopN) throws IOException {
for (int n = 1; n < maxTopN; n++) {
final FakePassageFormatter f1 = new FakePassageFormatter();
UnifiedHighlighter p1 = new UnifiedHighlighter(is, indexAnalyzer) {
@Override
protected PassageFormatter getFormatter(String field) {
assertEquals("body", field);
return f1;
}
};
p1.setMaxLength(Integer.MAX_VALUE - 1);
final FakePassageFormatter f2 = new FakePassageFormatter();
UnifiedHighlighter p2 = new UnifiedHighlighter(is, indexAnalyzer) {
@Override
protected PassageFormatter getFormatter(String field) {
assertEquals("body", field);
return f2;
}
};
p2.setMaxLength(Integer.MAX_VALUE - 1);
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
queryBuilder.add(query, BooleanClause.Occur.MUST);
queryBuilder.add(new TermQuery(new Term("id", Integer.toString(doc))), BooleanClause.Occur.MUST);
BooleanQuery bq = queryBuilder.build();
TopDocs td = is.search(bq, 1);
p1.highlight("body", bq, td, n);
p2.highlight("body", bq, td, n + 1);
assertTrue(f2.seen.containsAll(f1.seen));
}
}
Aggregations