use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class FastVectorHighlighterTest method testPhraseHighlightTest.
// see LUCENE-4899
public void testPhraseHighlightTest() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType type = new FieldType(TextField.TYPE_STORED);
type.setStoreTermVectorOffsets(true);
type.setStoreTermVectorPositions(true);
type.setStoreTermVectors(true);
type.freeze();
Field longTermField = new Field("long_term", "This is a test thisisaverylongwordandmakessurethisfails where foo is highlighed and should be highlighted", type);
Field noLongTermField = new Field("no_long_term", "This is a test where foo is highlighed and should be highlighted", type);
doc.add(longTermField);
doc.add(noLongTermField);
writer.addDocument(doc);
FastVectorHighlighter highlighter = new FastVectorHighlighter();
IndexReader reader = DirectoryReader.open(writer);
int docId = 0;
String field = "no_long_term";
{
BooleanQuery.Builder query = new BooleanQuery.Builder();
query.add(new TermQuery(new Term(field, "test")), Occur.MUST);
query.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
FieldQuery fieldQuery = highlighter.getFieldQuery(query.build(), reader);
String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, field, 18, 1);
// highlighted results are centered
assertEquals(1, bestFragments.length);
assertEquals("<b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
}
{
BooleanQuery.Builder query = new BooleanQuery.Builder();
PhraseQuery pq = new PhraseQuery(5, field, "test", "foo", "highlighed");
query.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
query.add(pq, Occur.MUST);
query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
FieldQuery fieldQuery = highlighter.getFieldQuery(query.build(), reader);
String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, field, 18, 1);
// highlighted results are centered
assertEquals(0, bestFragments.length);
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, field, 30, 1);
// highlighted results are centered
assertEquals(1, bestFragments.length);
assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
}
{
PhraseQuery query = new PhraseQuery(3, field, "test", "foo", "highlighed");
FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, field, 18, 1);
// highlighted results are centered
assertEquals(0, bestFragments.length);
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, field, 30, 1);
// highlighted results are centered
assertEquals(1, bestFragments.length);
assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
}
{
PhraseQuery query = new PhraseQuery(30, field, "test", "foo", "highlighed");
FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, field, 18, 1);
assertEquals(0, bestFragments.length);
}
{
BooleanQuery.Builder query = new BooleanQuery.Builder();
PhraseQuery pq = new PhraseQuery(5, field, "test", "foo", "highlighed");
BooleanQuery.Builder inner = new BooleanQuery.Builder();
inner.add(pq, Occur.MUST);
inner.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
query.add(inner.build(), Occur.MUST);
query.add(pq, Occur.MUST);
query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
FieldQuery fieldQuery = highlighter.getFieldQuery(query.build(), reader);
String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, field, 18, 1);
assertEquals(0, bestFragments.length);
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, field, 30, 1);
// highlighted results are centered
assertEquals(1, bestFragments.length);
assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
}
field = "long_term";
{
BooleanQuery.Builder query = new BooleanQuery.Builder();
query.add(new TermQuery(new Term(field, "thisisaverylongwordandmakessurethisfails")), Occur.MUST);
query.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
FieldQuery fieldQuery = highlighter.getFieldQuery(query.build(), reader);
String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, field, 18, 1);
// highlighted results are centered
assertEquals(1, bestFragments.length);
assertEquals("<b>thisisaverylongwordandmakessurethisfails</b>", bestFragments[0]);
}
reader.close();
writer.close();
dir.close();
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class FastVectorHighlighterTest method testPhrasesSpanningFieldValues.
public void testPhrasesSpanningFieldValues() throws IOException {
Directory dir = newDirectory();
// positionIncrementGap is 0 so the pharse is found across multiple field
// values.
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
FieldType type = new FieldType(TextField.TYPE_STORED);
type.setStoreTermVectorOffsets(true);
type.setStoreTermVectorPositions(true);
type.setStoreTermVectors(true);
type.freeze();
Document doc = new Document();
doc.add(new Field("field", "one two three five", type));
doc.add(new Field("field", "two three four", type));
doc.add(new Field("field", "five six five", type));
doc.add(new Field("field", "six seven eight nine eight nine eight " + "nine eight nine eight nine eight nine", type));
doc.add(new Field("field", "eight nine", type));
doc.add(new Field("field", "ten eleven", type));
doc.add(new Field("field", "twelve thirteen", type));
writer.addDocument(doc);
BaseFragListBuilder fragListBuilder = new SimpleFragListBuilder();
BaseFragmentsBuilder fragmentsBuilder = new SimpleFragmentsBuilder();
fragmentsBuilder.setDiscreteMultiValueHighlighting(true);
FastVectorHighlighter highlighter = new FastVectorHighlighter(true, true, fragListBuilder, fragmentsBuilder);
IndexReader reader = DirectoryReader.open(writer);
int docId = 0;
// Phrase that spans a field value
Query q = new PhraseQuery("field", "four", "five");
FieldQuery fieldQuery = highlighter.getFieldQuery(q, reader);
String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 1000, 1000);
assertEquals("two three <b>four</b>", bestFragments[0]);
assertEquals("<b>five</b> six five", bestFragments[1]);
assertEquals(2, bestFragments.length);
// Phrase that ends at a field value
q = new PhraseQuery("field", "three", "five");
fieldQuery = highlighter.getFieldQuery(q, reader);
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 1000, 1000);
assertEquals("one two <b>three five</b>", bestFragments[0]);
assertEquals(1, bestFragments.length);
// Phrase that spans across three values
q = new PhraseQuery("field", "nine", "ten", "eleven", "twelve");
fieldQuery = highlighter.getFieldQuery(q, reader);
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 1000, 1000);
assertEquals("eight <b>nine</b>", bestFragments[0]);
assertEquals("<b>ten eleven</b>", bestFragments[1]);
assertEquals("<b>twelve</b> thirteen", bestFragments[2]);
assertEquals(3, bestFragments.length);
// Term query that appears in multiple values
q = new TermQuery(new Term("field", "two"));
fieldQuery = highlighter.getFieldQuery(q, reader);
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 1000, 1000);
assertEquals("one <b>two</b> three five", bestFragments[0]);
assertEquals("<b>two</b> three four", bestFragments[1]);
assertEquals(2, bestFragments.length);
reader.close();
writer.close();
dir.close();
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class TestUnifiedHighlighterStrictPhrases method testPreSpanQueryRewrite.
public void testPreSpanQueryRewrite() throws IOException {
indexWriter.addDocument(newDoc("There is no accord and satisfaction with this - Consideration of the accord is arbitrary."));
initReaderSearcherHighlighter();
highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected Collection<Query> preSpanQueryRewrite(Query query) {
if (query instanceof MyQuery) {
return Collections.singletonList(((MyQuery) query).wrapped);
}
return null;
}
};
highlighter.setHighlightPhrasesStrictly(true);
BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
Query phraseQuery = new BoostQuery(new PhraseQuery("body", "accord", "and", "satisfaction"), 2.0f);
Query oredTerms = new BooleanQuery.Builder().setMinimumNumberShouldMatch(2).add(new TermQuery(new Term("body", "accord")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "satisfaction")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "consideration")), BooleanClause.Occur.SHOULD).build();
Query proximityBoostingQuery = new MyQuery(oredTerms);
Query totalQuery = bqBuilder.add(phraseQuery, BooleanClause.Occur.SHOULD).add(proximityBoostingQuery, BooleanClause.Occur.SHOULD).build();
TopDocs topDocs = searcher.search(totalQuery, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighter.highlight("body", totalQuery, topDocs);
assertArrayEquals(new String[] { "There is no <b>accord</b> <b>and</b> <b>satisfaction</b> with this - <b>Consideration</b> of the <b>accord</b> is arbitrary." }, snippets);
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class TestUnifiedHighlighterStrictPhrases method testMultiValued.
public void testMultiValued() throws IOException {
indexWriter.addDocument(newDoc("one bravo three", "four bravo six"));
initReaderSearcherHighlighter();
BooleanQuery query = new BooleanQuery.Builder().add(newPhraseQuery("body", "one bravo"), BooleanClause.Occur.MUST).add(newPhraseQuery("body", "four bravo"), BooleanClause.Occur.MUST).add(new PrefixQuery(new Term("body", "br")), BooleanClause.Occur.MUST).build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertArrayEquals(new String[] { "<b>one</b> <b>bravo</b> three... <b>four</b> <b>bravo</b> six" }, snippets);
// now test phraseQuery won't span across values
assert indexAnalyzer.getPositionIncrementGap("body") > 0;
PhraseQuery phraseQuery = newPhraseQuery("body", "three four");
// 1 too little; won't span
phraseQuery = setSlop(phraseQuery, indexAnalyzer.getPositionIncrementGap("body") - 1);
query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "bravo")), BooleanClause.Occur.MUST).add(phraseQuery, BooleanClause.Occur.SHOULD).build();
topDocs = searcher.search(query, 10);
snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals("one <b>bravo</b> three... four <b>bravo</b> six", snippets[0]);
// and add just enough slop to cross the values:
phraseQuery = newPhraseQuery("body", "three four");
// just enough to span
phraseQuery = setSlop(phraseQuery, indexAnalyzer.getPositionIncrementGap("body"));
query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "bravo")), BooleanClause.Occur.MUST).add(phraseQuery, // must match and it will
BooleanClause.Occur.MUST).build();
topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits);
snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals("one <b>bravo</b> <b>three</b>... <b>four</b> <b>bravo</b> six", snippets[0]);
}
use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.
the class TestQPHelper method testCJKPhrase.
public void testCJKPhrase() throws Exception {
// individual CJK chars as terms
SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
PhraseQuery expected = new PhraseQuery("field", "中", "国");
assertEquals(expected, getQuery("\"中国\"", analyzer));
}
Aggregations