Search in sources :

Example 61 with SpanQuery

use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.

the class TestPayloadSpans method testNestedSpans.

public void testNestedSpans() throws Exception {
    SpanTermQuery stq;
    Spans spans;
    IndexSearcher searcher = getSearcher();
    stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "mark"));
    spans = stq.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS);
    assertNull(spans);
    SpanQuery[] clauses = new SpanQuery[3];
    clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr"));
    clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy"));
    clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx"));
    SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 12, false);
    spans = spanNearQuery.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS);
    assertTrue("spans is null and it shouldn't be", spans != null);
    checkSpans(spans, 2, new int[] { 3, 3 });
    clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx"));
    clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr"));
    clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy"));
    spanNearQuery = new SpanNearQuery(clauses, 6, true);
    spans = spanNearQuery.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS);
    assertTrue("spans is null and it shouldn't be", spans != null);
    checkSpans(spans, 1, new int[] { 3 });
    clauses = new SpanQuery[2];
    clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx"));
    clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr"));
    spanNearQuery = new SpanNearQuery(clauses, 6, true);
    // xx within 6 of rr
    SpanQuery[] clauses2 = new SpanQuery[2];
    clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy"));
    clauses2[1] = spanNearQuery;
    SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses2, 6, false);
    // yy within 6 of xx within 6 of rr
    spans = nestedSpanNearQuery.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS);
    assertTrue("spans is null and it shouldn't be", spans != null);
    checkSpans(spans, 2, new int[] { 3, 3 });
    closeIndexReader.close();
    directory.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) Spans(org.apache.lucene.search.spans.Spans) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Example 62 with SpanQuery

use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.

the class TestPayloadSpans method testShrinkToAfterShortestMatch3.

public void testShrinkToAfterShortestMatch3() throws IOException {
    Directory directory = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new TestPayloadAnalyzer()));
    Document doc = new Document();
    doc.add(new TextField("content", new StringReader("j k a l f k k p a t a k l k t a")));
    writer.addDocument(doc);
    IndexReader reader = writer.getReader();
    IndexSearcher is = newSearcher(getOnlyLeafReader(reader), false);
    writer.close();
    SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
    SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
    SpanQuery[] sqs = { stq1, stq2 };
    SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
    Spans spans = snq.createWeight(is, false, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS);
    TopDocs topDocs = is.search(snq, 1);
    Set<String> payloadSet = new HashSet<>();
    VerifyingCollector collector = new VerifyingCollector();
    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
        while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
            while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                collector.reset();
                spans.collect(collector);
                for (final BytesRef payload : collector.payloads) {
                    payloadSet.add(Term.toString(payload));
                }
            }
        }
    }
    assertEquals(2, payloadSet.size());
    if (VERBOSE) {
        for (final String payload : payloadSet) System.out.println("match:" + payload);
    }
    assertTrue(payloadSet.contains("a:Noise:10"));
    assertTrue(payloadSet.contains("k:Noise:11"));
    reader.close();
    directory.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Spans(org.apache.lucene.search.spans.Spans) TopDocs(org.apache.lucene.search.TopDocs) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) StringReader(java.io.StringReader) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Example 63 with SpanQuery

use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.

the class TestPayloadSpans method testSpanNot.

public void testSpanNot() throws Exception {
    SpanQuery[] clauses = new SpanQuery[2];
    clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
    clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "three"));
    SpanQuery spq = new SpanNearQuery(clauses, 5, true);
    SpanNotQuery snq = new SpanNotQuery(spq, new SpanTermQuery(new Term(PayloadHelper.FIELD, "two")));
    Directory directory = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(similarity));
    Document doc = new Document();
    doc.add(newTextField(PayloadHelper.FIELD, "one two three one four three", Field.Store.YES));
    writer.addDocument(doc);
    IndexReader reader = getOnlyLeafReader(writer.getReader());
    writer.close();
    checkSpans(snq.createWeight(newSearcher(reader, false), false, 1f).getSpans(reader.leaves().get(0), SpanWeight.Postings.PAYLOADS), 1, new int[] { 2 });
    reader.close();
    directory.close();
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) IndexReader(org.apache.lucene.index.IndexReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) Directory(org.apache.lucene.store.Directory)

Example 64 with SpanQuery

use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.

the class TestPayloadSpans method testSpanFirst.

public void testSpanFirst() throws IOException {
    SpanQuery match;
    SpanFirstQuery sfq;
    match = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
    sfq = new SpanFirstQuery(match, 2);
    Spans spans = sfq.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS);
    checkSpans(spans, 109, 1, 1, 1);
    //Test more complicated subclause
    SpanQuery[] clauses = new SpanQuery[2];
    clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
    clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "hundred"));
    match = new SpanNearQuery(clauses, 0, true);
    sfq = new SpanFirstQuery(match, 2);
    checkSpans(sfq.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS), 100, 2, 1, 1);
    match = new SpanNearQuery(clauses, 0, false);
    sfq = new SpanFirstQuery(match, 2);
    checkSpans(sfq.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS), 100, 2, 1, 1);
}
Also used : SpanFirstQuery(org.apache.lucene.search.spans.SpanFirstQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Spans(org.apache.lucene.search.spans.Spans)

Example 65 with SpanQuery

use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.

the class HighlighterPhraseTest method testSparseSpan.

public void testSparseSpan() throws IOException, InvalidTokenOffsetsException {
    final String TEXT = "the fox did not jump";
    final Directory directory = newDirectory();
    final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
    try {
        final Document document = new Document();
        FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
        customType.setStoreTermVectorOffsets(true);
        customType.setStoreTermVectorPositions(true);
        customType.setStoreTermVectors(true);
        document.add(new Field(FIELD, new TokenStreamSparse(), customType));
        indexWriter.addDocument(document);
    } finally {
        indexWriter.close();
    }
    final IndexReader indexReader = DirectoryReader.open(directory);
    try {
        assertEquals(1, indexReader.numDocs());
        final IndexSearcher indexSearcher = newSearcher(indexReader);
        final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "did")), new SpanTermQuery(new Term(FIELD, "jump")) }, 0, true);
        TopDocs hits = indexSearcher.search(phraseQuery, 1);
        assertEquals(0, hits.totalHits);
        final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
        final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
        assertEquals(highlighter.getBestFragment(new TokenStreamSparse(), TEXT), highlighter.getBestFragment(tokenStream, TEXT));
    } finally {
        indexReader.close();
        directory.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) IndexReader(org.apache.lucene.index.IndexReader) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) Directory(org.apache.lucene.store.Directory)

Aggregations

SpanQuery (org.apache.lucene.search.spans.SpanQuery)81 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)53 Term (org.apache.lucene.index.Term)51 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)43 Query (org.apache.lucene.search.Query)27 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)25 IndexReader (org.apache.lucene.index.IndexReader)18 SpanBoostQuery (org.apache.lucene.search.spans.SpanBoostQuery)18 Document (org.apache.lucene.document.Document)17 IndexSearcher (org.apache.lucene.search.IndexSearcher)16 BooleanQuery (org.apache.lucene.search.BooleanQuery)15 TopDocs (org.apache.lucene.search.TopDocs)15 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)13 TermQuery (org.apache.lucene.search.TermQuery)13 ArrayList (java.util.ArrayList)12 Spans (org.apache.lucene.search.spans.Spans)12 PhraseQuery (org.apache.lucene.search.PhraseQuery)11 DisjunctionMaxQuery (org.apache.lucene.search.DisjunctionMaxQuery)10 SpanNotQuery (org.apache.lucene.search.spans.SpanNotQuery)10 Directory (org.apache.lucene.store.Directory)10