Search in sources :

Example 51 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class TestSimilarity2 method testCrazySpans.

/** make sure all sims work with spanOR(termX, termY) where termY does not exist */
public void testCrazySpans() throws Exception {
    // The problem: "normal" lucene queries create scorers, returning null if terms dont exist
    // This means they never score a term that does not exist.
    // however with spans, there is only one scorer for the whole hierarchy:
    // inner queries are not real queries, their boosts are ignored, etc.
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    doc.add(newField("foo", "bar", ft));
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher is = newSearcher(ir);
    for (Similarity sim : sims) {
        is.setSimilarity(sim);
        SpanTermQuery s1 = new SpanTermQuery(new Term("foo", "bar"));
        SpanTermQuery s2 = new SpanTermQuery(new Term("foo", "baz"));
        Query query = new SpanOrQuery(s1, s2);
        TopDocs td = is.search(query, 10);
        assertEquals(1, td.totalHits);
        float score = td.scoreDocs[0].score;
        assertFalse("negative score for " + sim, score < 0.0f);
        assertFalse("inf score for " + sim, Float.isInfinite(score));
        assertFalse("nan score for " + sim, Float.isNaN(score));
    }
    ir.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 52 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class TestPayloadTermQuery method testNoPayload.

public void testNoPayload() throws Exception {
    SpanQuery q1 = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "zero")), new MaxPayloadFunction());
    SpanQuery q2 = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "foo")), new MaxPayloadFunction());
    BooleanClause c1 = new BooleanClause(q1, BooleanClause.Occur.MUST);
    BooleanClause c2 = new BooleanClause(q2, BooleanClause.Occur.MUST_NOT);
    BooleanQuery.Builder query = new BooleanQuery.Builder();
    query.add(c1);
    query.add(c2);
    TopDocs hits = searcher.search(query.build(), 100);
    assertTrue("hits is null and it shouldn't be", hits != null);
    assertTrue("hits Size: " + hits.totalHits + " is not: " + 1, hits.totalHits == 1);
    int[] results = new int[1];
    //hits.scoreDocs[0].doc;
    results[0] = 0;
    CheckHits.checkHitCollector(random(), query.build(), PayloadHelper.NO_PAYLOAD_FIELD, searcher, results);
}
Also used : BooleanClause(org.apache.lucene.search.BooleanClause) TopDocs(org.apache.lucene.search.TopDocs) BooleanQuery(org.apache.lucene.search.BooleanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Example 53 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class TestPayloadTermQuery method testQuery.

public void testQuery() {
    SpanQuery boostingFuncTermQuery = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy")), new MaxPayloadFunction());
    QueryUtils.check(boostingFuncTermQuery);
    SpanTermQuery spanTermQuery = new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"));
    assertTrue(boostingFuncTermQuery.equals(spanTermQuery) == spanTermQuery.equals(boostingFuncTermQuery));
    SpanQuery boostingFuncTermQuery2 = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy")), new AveragePayloadFunction());
    QueryUtils.checkUnequal(boostingFuncTermQuery, boostingFuncTermQuery2);
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Example 54 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class TestPayloadScoreQuery method testNestedNearQuery.

@Test
public void testNestedNearQuery() throws Exception {
    // (one OR hundred) NEAR (twenty two) ~ 1
    //  2    4        4    4
    // one hundred twenty two
    // two hundred twenty two
    SpanNearQuery q = new SpanNearQuery(new SpanQuery[] { new SpanOrQuery(new SpanTermQuery(new Term("field", "one")), new SpanTermQuery(new Term("field", "hundred"))), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("field", "twenty")), new SpanTermQuery(new Term("field", "two")) }, 0, true) }, 1, true);
    // check includeSpanScore makes a difference here
    searcher.setSimilarity(new MultiplyingSimilarity());
    try {
        checkQuery(q, new MaxPayloadFunction(), new int[] { 122, 222 }, new float[] { 20.901256561279297f, 17.06580352783203f });
        checkQuery(q, new MinPayloadFunction(), new int[] { 222, 122 }, new float[] { 17.06580352783203f, 10.450628280639648f });
        checkQuery(q, new AveragePayloadFunction(), new int[] { 122, 222 }, new float[] { 19.15948486328125f, 17.06580352783203f });
        checkQuery(q, new MaxPayloadFunction(), false, new int[] { 122, 222 }, new float[] { 4.0f, 4.0f });
        checkQuery(q, new MinPayloadFunction(), false, new int[] { 222, 122 }, new float[] { 4.0f, 2.0f });
        checkQuery(q, new AveragePayloadFunction(), false, new int[] { 222, 122 }, new float[] { 4.0f, 3.666666f });
    } finally {
        searcher.setSimilarity(similarity);
    }
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Test(org.junit.Test)

Example 55 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class TestPayloadSpans method testShrinkToAfterShortestMatch.

public void testShrinkToAfterShortestMatch() throws IOException {
    Directory directory = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new TestPayloadAnalyzer()));
    Document doc = new Document();
    doc.add(new TextField("content", new StringReader("a b c d e f g h i j a k")));
    writer.addDocument(doc);
    IndexReader reader = writer.getReader();
    IndexSearcher is = newSearcher(getOnlyLeafReader(reader), false);
    writer.close();
    SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
    SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
    SpanQuery[] sqs = { stq1, stq2 };
    SpanNearQuery snq = new SpanNearQuery(sqs, 1, true);
    VerifyingCollector collector = new VerifyingCollector();
    Spans spans = snq.createWeight(is, false, 1f).getSpans(is.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS);
    TopDocs topDocs = is.search(snq, 1);
    Set<String> payloadSet = new HashSet<>();
    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
        while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
            while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                collector.reset();
                spans.collect(collector);
                for (final BytesRef payload : collector.payloads) {
                    payloadSet.add(Term.toString(payload));
                }
            }
        }
    }
    assertEquals(2, payloadSet.size());
    assertTrue(payloadSet.contains("a:Noise:10"));
    assertTrue(payloadSet.contains("k:Noise:11"));
    reader.close();
    directory.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Spans(org.apache.lucene.search.spans.Spans) TopDocs(org.apache.lucene.search.TopDocs) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) StringReader(java.io.StringReader) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Aggregations

SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)87 Term (org.apache.lucene.index.Term)84 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)52 SpanQuery (org.apache.lucene.search.spans.SpanQuery)48 Document (org.apache.lucene.document.Document)24 IndexReader (org.apache.lucene.index.IndexReader)23 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)23 TopDocs (org.apache.lucene.search.TopDocs)20 Query (org.apache.lucene.search.Query)19 BooleanQuery (org.apache.lucene.search.BooleanQuery)18 IndexSearcher (org.apache.lucene.search.IndexSearcher)18 TermQuery (org.apache.lucene.search.TermQuery)18 Directory (org.apache.lucene.store.Directory)17 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)15 TextField (org.apache.lucene.document.TextField)14 ArrayList (java.util.ArrayList)12 PhraseQuery (org.apache.lucene.search.PhraseQuery)11 Spans (org.apache.lucene.search.spans.Spans)11 TokenStream (org.apache.lucene.analysis.TokenStream)10 IndexWriter (org.apache.lucene.index.IndexWriter)10