use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class SynonymTokenizer method testPayloadQuery.
/** We can highlight based on payloads. It's supported both via term vectors and MemoryIndex since Lucene 5. */
public void testPayloadQuery() throws IOException, InvalidTokenOffsetsException {
//"words" at positions 1 & 4
final String text = "random words and words";
//sets payload to "pos: X" (where X is position #)
Analyzer analyzer = new MockPayloadAnalyzer();
try (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzer))) {
writer.deleteAll();
Document doc = new Document();
doc.add(new Field(FIELD_NAME, text, fieldType));
writer.addDocument(doc);
writer.commit();
}
try (IndexReader reader = DirectoryReader.open(dir)) {
Query query = new SpanPayloadCheckQuery(new SpanTermQuery(new Term(FIELD_NAME, "words")), //just match the first "word" occurrence
Collections.singletonList(new BytesRef("pos: 1")));
IndexSearcher searcher = newSearcher(reader);
QueryScorer scorer = new QueryScorer(query, searcher.getIndexReader(), FIELD_NAME);
scorer.setUsePayloads(true);
Highlighter h = new Highlighter(scorer);
TopDocs hits = searcher.search(query, 10);
assertEquals(1, hits.scoreDocs.length);
TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), 0, FIELD_NAME, analyzer);
if (random().nextBoolean()) {
//conceals detection of TokenStreamFromTermVector
stream = new CachingTokenFilter(stream);
}
String result = h.getBestFragment(stream, text);
//only highlight first "word"
assertEquals("random <B>words</B> and words", result);
}
}
use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class TestQueryRescorer method testMissingSecondPassScore.
public void testMissingSecondPassScore() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
Document doc = new Document();
doc.add(newStringField("id", "0", Field.Store.YES));
doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(newStringField("id", "1", Field.Store.YES));
// 1 extra token, but wizard and oz are close;
doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
w.close();
// Do ordinary BooleanQuery:
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
IndexSearcher searcher = getSearcher(r);
TopDocs hits = searcher.search(bq.build(), 10);
assertEquals(2, hits.totalHits);
assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
// Now, resort using PhraseQuery, no slop:
PhraseQuery pq = new PhraseQuery("field", "wizard", "oz");
TopDocs hits2 = QueryRescorer.rescore(searcher, hits, pq, 2.0, 10);
// Resorting changed the order:
assertEquals(2, hits2.totalHits);
assertEquals("1", searcher.doc(hits2.scoreDocs[0].doc).get("id"));
assertEquals("0", searcher.doc(hits2.scoreDocs[1].doc).get("id"));
// Resort using SpanNearQuery:
SpanTermQuery t1 = new SpanTermQuery(new Term("field", "wizard"));
SpanTermQuery t2 = new SpanTermQuery(new Term("field", "oz"));
SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { t1, t2 }, 0, true);
TopDocs hits3 = QueryRescorer.rescore(searcher, hits, snq, 2.0, 10);
// Resorting changed the order:
assertEquals(2, hits3.totalHits);
assertEquals("1", searcher.doc(hits3.scoreDocs[0].doc).get("id"));
assertEquals("0", searcher.doc(hits3.scoreDocs[1].doc).get("id"));
r.close();
dir.close();
}
Aggregations