Search in sources :

Example 86 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class SynonymTokenizer method testPayloadQuery.

/** We can highlight based on payloads. It's supported both via term vectors and MemoryIndex since Lucene 5. */
public void testPayloadQuery() throws IOException, InvalidTokenOffsetsException {
    //"words" at positions 1 & 4
    final String text = "random words and words";
    //sets payload to "pos: X" (where X is position #)
    Analyzer analyzer = new MockPayloadAnalyzer();
    try (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzer))) {
        writer.deleteAll();
        Document doc = new Document();
        doc.add(new Field(FIELD_NAME, text, fieldType));
        writer.addDocument(doc);
        writer.commit();
    }
    try (IndexReader reader = DirectoryReader.open(dir)) {
        Query query = new SpanPayloadCheckQuery(new SpanTermQuery(new Term(FIELD_NAME, "words")), //just match the first "word" occurrence
        Collections.singletonList(new BytesRef("pos: 1")));
        IndexSearcher searcher = newSearcher(reader);
        QueryScorer scorer = new QueryScorer(query, searcher.getIndexReader(), FIELD_NAME);
        scorer.setUsePayloads(true);
        Highlighter h = new Highlighter(scorer);
        TopDocs hits = searcher.search(query, 10);
        assertEquals(1, hits.scoreDocs.length);
        TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), 0, FIELD_NAME, analyzer);
        if (random().nextBoolean()) {
            //conceals detection of TokenStreamFromTermVector
            stream = new CachingTokenFilter(stream);
        }
        String result = h.getBestFragment(stream, text);
        //only highlight first "word"
        assertEquals("random <B>words</B> and words", result);
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) SpanPayloadCheckQuery(org.apache.lucene.queries.payloads.SpanPayloadCheckQuery) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TermQuery(org.apache.lucene.search.TermQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SpanPayloadCheckQuery(org.apache.lucene.queries.payloads.SpanPayloadCheckQuery) Term(org.apache.lucene.index.Term) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) MockPayloadAnalyzer(org.apache.lucene.analysis.MockPayloadAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) MockPayloadAnalyzer(org.apache.lucene.analysis.MockPayloadAnalyzer) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) StoredField(org.apache.lucene.document.StoredField) SortField(org.apache.lucene.search.SortField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexWriter(org.apache.lucene.index.IndexWriter) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) CachingTokenFilter(org.apache.lucene.analysis.CachingTokenFilter) IndexReader(org.apache.lucene.index.IndexReader) BytesRef(org.apache.lucene.util.BytesRef) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 87 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class TestQueryRescorer method testMissingSecondPassScore.

public void testMissingSecondPassScore() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
    Document doc = new Document();
    doc.add(newStringField("id", "0", Field.Store.YES));
    doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO));
    w.addDocument(doc);
    doc = new Document();
    doc.add(newStringField("id", "1", Field.Store.YES));
    // 1 extra token, but wizard and oz are close;
    doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO));
    w.addDocument(doc);
    IndexReader r = w.getReader();
    w.close();
    // Do ordinary BooleanQuery:
    BooleanQuery.Builder bq = new BooleanQuery.Builder();
    bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
    bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
    IndexSearcher searcher = getSearcher(r);
    TopDocs hits = searcher.search(bq.build(), 10);
    assertEquals(2, hits.totalHits);
    assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
    assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
    // Now, resort using PhraseQuery, no slop:
    PhraseQuery pq = new PhraseQuery("field", "wizard", "oz");
    TopDocs hits2 = QueryRescorer.rescore(searcher, hits, pq, 2.0, 10);
    // Resorting changed the order:
    assertEquals(2, hits2.totalHits);
    assertEquals("1", searcher.doc(hits2.scoreDocs[0].doc).get("id"));
    assertEquals("0", searcher.doc(hits2.scoreDocs[1].doc).get("id"));
    // Resort using SpanNearQuery:
    SpanTermQuery t1 = new SpanTermQuery(new Term("field", "wizard"));
    SpanTermQuery t2 = new SpanTermQuery(new Term("field", "oz"));
    SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { t1, t2 }, 0, true);
    TopDocs hits3 = QueryRescorer.rescore(searcher, hits, snq, 2.0, 10);
    // Resorting changed the order:
    assertEquals(2, hits3.totalHits);
    assertEquals("1", searcher.doc(hits3.scoreDocs[0].doc).get("id"));
    assertEquals("0", searcher.doc(hits3.scoreDocs[1].doc).get("id"));
    r.close();
    dir.close();
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) IndexReader(org.apache.lucene.index.IndexReader) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Aggregations

SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)87 Term (org.apache.lucene.index.Term)84 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)52 SpanQuery (org.apache.lucene.search.spans.SpanQuery)48 Document (org.apache.lucene.document.Document)24 IndexReader (org.apache.lucene.index.IndexReader)23 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)23 TopDocs (org.apache.lucene.search.TopDocs)20 Query (org.apache.lucene.search.Query)19 BooleanQuery (org.apache.lucene.search.BooleanQuery)18 IndexSearcher (org.apache.lucene.search.IndexSearcher)18 TermQuery (org.apache.lucene.search.TermQuery)18 Directory (org.apache.lucene.store.Directory)17 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)15 TextField (org.apache.lucene.document.TextField)14 ArrayList (java.util.ArrayList)12 PhraseQuery (org.apache.lucene.search.PhraseQuery)11 Spans (org.apache.lucene.search.spans.Spans)11 TokenStream (org.apache.lucene.analysis.TokenStream)10 IndexWriter (org.apache.lucene.index.IndexWriter)10