Search in sources :

Example 76 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class SynonymTokenizer method testPayloadQuery.

/** We can highlight based on payloads. It's supported both via term vectors and MemoryIndex since Lucene 5. */
public void testPayloadQuery() throws IOException, InvalidTokenOffsetsException {
    //"words" at positions 1 & 4
    final String text = "random words and words";
    //sets payload to "pos: X" (where X is position #)
    Analyzer analyzer = new MockPayloadAnalyzer();
    try (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzer))) {
        writer.deleteAll();
        Document doc = new Document();
        doc.add(new Field(FIELD_NAME, text, fieldType));
        writer.addDocument(doc);
        writer.commit();
    }
    try (IndexReader reader = DirectoryReader.open(dir)) {
        Query query = new SpanPayloadCheckQuery(new SpanTermQuery(new Term(FIELD_NAME, "words")), //just match the first "word" occurrence
        Collections.singletonList(new BytesRef("pos: 1")));
        IndexSearcher searcher = newSearcher(reader);
        QueryScorer scorer = new QueryScorer(query, searcher.getIndexReader(), FIELD_NAME);
        scorer.setUsePayloads(true);
        Highlighter h = new Highlighter(scorer);
        TopDocs hits = searcher.search(query, 10);
        assertEquals(1, hits.scoreDocs.length);
        TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), 0, FIELD_NAME, analyzer);
        if (random().nextBoolean()) {
            //conceals detection of TokenStreamFromTermVector
            stream = new CachingTokenFilter(stream);
        }
        String result = h.getBestFragment(stream, text);
        //only highlight first "word"
        assertEquals("random <B>words</B> and words", result);
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) SpanPayloadCheckQuery(org.apache.lucene.queries.payloads.SpanPayloadCheckQuery) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TermQuery(org.apache.lucene.search.TermQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SpanPayloadCheckQuery(org.apache.lucene.queries.payloads.SpanPayloadCheckQuery) Term(org.apache.lucene.index.Term) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) MockPayloadAnalyzer(org.apache.lucene.analysis.MockPayloadAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) MockPayloadAnalyzer(org.apache.lucene.analysis.MockPayloadAnalyzer) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) StoredField(org.apache.lucene.document.StoredField) SortField(org.apache.lucene.search.SortField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexWriter(org.apache.lucene.index.IndexWriter) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) CachingTokenFilter(org.apache.lucene.analysis.CachingTokenFilter) IndexReader(org.apache.lucene.index.IndexReader) BytesRef(org.apache.lucene.util.BytesRef) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 77 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class TestSolrCoreParser method checkApacheLuceneSolr.

private static void checkApacheLuceneSolr(Query query, String fieldName) {
    assertTrue(query instanceof SpanNearQuery);
    final SpanNearQuery snq = (SpanNearQuery) query;
    assertEquals(fieldName, snq.getField());
    assertEquals(42, snq.getSlop());
    assertFalse(snq.isInOrder());
    assertEquals(3, snq.getClauses().length);
    assertTrue(snq.getClauses()[0] instanceof SpanTermQuery);
    assertTrue(snq.getClauses()[1] instanceof SpanTermQuery);
    assertTrue(snq.getClauses()[2] instanceof SpanTermQuery);
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 78 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class TestSolrCoreParser method checkChooseOneWordQuery.

private static void checkChooseOneWordQuery(boolean span, Query query, String fieldName, String... expectedTermTexts) {
    final Term term;
    if (span) {
        assertTrue(query instanceof SpanTermQuery);
        final SpanTermQuery stq = (SpanTermQuery) query;
        term = stq.getTerm();
    } else {
        assertTrue(query instanceof TermQuery);
        final TermQuery tq = (TermQuery) query;
        term = tq.getTerm();
    }
    final String text = term.text();
    boolean foundExpected = false;
    for (String expected : expectedTermTexts) {
        foundExpected |= expected.equals(text);
    }
    assertEquals(fieldName, term.field());
    assertTrue("expected term text (" + text + ") not found in (" + expectedTermTexts + ")", foundExpected);
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term)

Example 79 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class TestPositionIncrement method testPayloadsPos0.

public void testPayloadsPos0() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, new MockPayloadAnalyzer());
    Document doc = new Document();
    doc.add(new TextField("content", new StringReader("a a b c d e a f g h i j a b k k")));
    writer.addDocument(doc);
    final IndexReader readerFromWriter = writer.getReader();
    LeafReader r = getOnlyLeafReader(readerFromWriter);
    PostingsEnum tp = r.postings(new Term("content", "a"), PostingsEnum.ALL);
    int count = 0;
    assertTrue(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
    // "a" occurs 4 times
    assertEquals(4, tp.freq());
    assertEquals(0, tp.nextPosition());
    assertEquals(1, tp.nextPosition());
    assertEquals(3, tp.nextPosition());
    assertEquals(6, tp.nextPosition());
    // only one doc has "a"
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, tp.nextDoc());
    IndexSearcher is = newSearcher(getOnlyLeafReader(readerFromWriter));
    SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
    SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
    SpanQuery[] sqs = { stq1, stq2 };
    SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);
    count = 0;
    boolean sawZero = false;
    if (VERBOSE) {
        System.out.println("\ngetPayloadSpans test");
    }
    PayloadSpanCollector collector = new PayloadSpanCollector();
    Spans pspans = snq.createWeight(is, false, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS);
    while (pspans.nextDoc() != Spans.NO_MORE_DOCS) {
        while (pspans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
            if (VERBOSE) {
                System.out.println("doc " + pspans.docID() + ": span " + pspans.startPosition() + " to " + pspans.endPosition());
            }
            collector.reset();
            pspans.collect(collector);
            sawZero |= pspans.startPosition() == 0;
            for (BytesRef payload : collector.payloads) {
                count++;
                if (VERBOSE) {
                    System.out.println("  payload: " + Term.toString(payload));
                }
            }
        }
    }
    assertTrue(sawZero);
    assertEquals(8, count);
    // System.out.println("\ngetSpans test");
    Spans spans = snq.createWeight(is, false, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS);
    count = 0;
    sawZero = false;
    while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
        while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
            count++;
            sawZero |= spans.startPosition() == 0;
        // System.out.println(spans.doc() + " - " + spans.start() + " - " +
        // spans.end());
        }
    }
    assertEquals(4, count);
    assertTrue(sawZero);
    writer.close();
    is.getIndexReader().close();
    dir.close();
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) Term(org.apache.lucene.index.Term) MockPayloadAnalyzer(org.apache.lucene.analysis.MockPayloadAnalyzer) Document(org.apache.lucene.document.Document) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Spans(org.apache.lucene.search.spans.Spans) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) StringReader(java.io.StringReader) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) PostingsEnum(org.apache.lucene.index.PostingsEnum) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 80 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class TestPayloadExplanations method testSimpleTerm.

public void testSimpleTerm() throws Exception {
    SpanTermQuery q = new SpanTermQuery(new Term(FIELD, "w2"));
    testAllFunctions(q, new int[] { 0, 1, 2, 3 });
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term)

Aggregations

SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)87 Term (org.apache.lucene.index.Term)84 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)52 SpanQuery (org.apache.lucene.search.spans.SpanQuery)48 Document (org.apache.lucene.document.Document)24 IndexReader (org.apache.lucene.index.IndexReader)23 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)23 TopDocs (org.apache.lucene.search.TopDocs)20 Query (org.apache.lucene.search.Query)19 BooleanQuery (org.apache.lucene.search.BooleanQuery)18 IndexSearcher (org.apache.lucene.search.IndexSearcher)18 TermQuery (org.apache.lucene.search.TermQuery)18 Directory (org.apache.lucene.store.Directory)17 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)15 TextField (org.apache.lucene.document.TextField)14 ArrayList (java.util.ArrayList)12 PhraseQuery (org.apache.lucene.search.PhraseQuery)11 Spans (org.apache.lucene.search.spans.Spans)11 TokenStream (org.apache.lucene.analysis.TokenStream)10 IndexWriter (org.apache.lucene.index.IndexWriter)10