Search in sources :

Example 36 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class ChooseOneWordQueryBuilder method implGetQuery.

public Query implGetQuery(Element e, boolean span) throws ParserException {
    Term term = null;
    final String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
    for (Node node = e.getFirstChild(); node != null; node = node.getNextSibling()) {
        if (node.getNodeType() == Node.ELEMENT_NODE && node.getNodeName().equals("Word")) {
            final String word = DOMUtils.getNonBlankTextOrFail((Element) node);
            final Term t = new Term(fieldName, word);
            if (term == null || term.text().length() < t.text().length()) {
                term = t;
            }
        }
    }
    return (span ? new SpanTermQuery(term) : new TermQuery(term));
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Node(org.w3c.dom.Node) Term(org.apache.lucene.index.Term)

Example 37 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class QueryBuilder method analyzeGraphPhrase.

/**
   * Creates a span near (phrase) query from a graph token stream. The articulation points of the graph are visited in
   * order and the queries created at each point are merged in the returned near query.
   */
protected SpanQuery analyzeGraphPhrase(TokenStream source, String field, int phraseSlop) throws IOException {
    source.reset();
    GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source);
    List<SpanQuery> clauses = new ArrayList<>();
    int[] articulationPoints = graph.articulationPoints();
    int lastState = 0;
    for (int i = 0; i <= articulationPoints.length; i++) {
        int start = lastState;
        int end = -1;
        if (i < articulationPoints.length) {
            end = articulationPoints[i];
        }
        lastState = end;
        final SpanQuery queryPos;
        if (graph.hasSidePath(start)) {
            List<SpanQuery> queries = new ArrayList<>();
            Iterator<TokenStream> it = graph.getFiniteStrings(start, end);
            while (it.hasNext()) {
                TokenStream ts = it.next();
                SpanQuery q = createSpanQuery(ts, field);
                if (q != null) {
                    queries.add(q);
                }
            }
            if (queries.size() > 0) {
                queryPos = new SpanOrQuery(queries.toArray(new SpanQuery[0]));
            } else {
                queryPos = null;
            }
        } else {
            Term[] terms = graph.getTerms(field, start);
            assert terms.length > 0;
            if (terms.length == 1) {
                queryPos = new SpanTermQuery(terms[0]);
            } else {
                SpanTermQuery[] orClauses = new SpanTermQuery[terms.length];
                for (int idx = 0; idx < terms.length; idx++) {
                    orClauses[idx] = new SpanTermQuery(terms[idx]);
                }
                queryPos = new SpanOrQuery(orClauses);
            }
        }
        if (queryPos != null) {
            clauses.add(queryPos);
        }
    }
    if (clauses.isEmpty()) {
        return null;
    } else if (clauses.size() == 1) {
        return clauses.get(0);
    } else {
        return new SpanNearQuery(clauses.toArray(new SpanQuery[0]), phraseSlop, true);
    }
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) GraphTokenStreamFiniteStrings(org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 38 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class TestBooleanQuery method testBooleanSpanQuery.

// LUCENE-4477 / LUCENE-4401:
public void testBooleanSpanQuery() throws Exception {
    boolean failed = false;
    int hits = 0;
    Directory directory = newDirectory();
    Analyzer indexerAnalyzer = new MockAnalyzer(random());
    IndexWriterConfig config = new IndexWriterConfig(indexerAnalyzer);
    IndexWriter writer = new IndexWriter(directory, config);
    String FIELD = "content";
    Document d = new Document();
    d.add(new TextField(FIELD, "clockwork orange", Field.Store.YES));
    writer.addDocument(d);
    writer.close();
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher searcher = newSearcher(indexReader);
    BooleanQuery.Builder query = new BooleanQuery.Builder();
    SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork"));
    SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork"));
    query.add(sq1, BooleanClause.Occur.SHOULD);
    query.add(sq2, BooleanClause.Occur.SHOULD);
    TopScoreDocCollector collector = TopScoreDocCollector.create(1000);
    searcher.search(query.build(), collector);
    hits = collector.topDocs().scoreDocs.length;
    for (ScoreDoc scoreDoc : collector.topDocs().scoreDocs) {
        System.out.println(scoreDoc.doc);
    }
    indexReader.close();
    assertEquals("Bug in boolean query composed of span queries", failed, false);
    assertEquals("Bug in boolean query composed of span queries", hits, 1);
    directory.close();
}
Also used : Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) SpanQuery(org.apache.lucene.search.spans.SpanQuery) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 39 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class EnwikiQueryMaker method getPrebuiltQueries.

private static Query[] getPrebuiltQueries(String field) {
    WildcardQuery wcq = new WildcardQuery(new Term(field, "fo*"));
    wcq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
    // be wary of unanalyzed text
    return new Query[] { new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 5), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(field, "night")), new SpanTermQuery(new Term(field, "trading")) }, 4, false), new SpanNearQuery(new SpanQuery[] { new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 10), new SpanTermQuery(new Term(field, "credit")) }, 10, false), wcq };
}
Also used : WildcardQuery(org.apache.lucene.search.WildcardQuery) Query(org.apache.lucene.search.Query) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanFirstQuery(org.apache.lucene.search.spans.SpanFirstQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanFirstQuery(org.apache.lucene.search.spans.SpanFirstQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 40 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class TestQueryRescorer method testBasic.

public void testBasic() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
    Document doc = new Document();
    doc.add(newStringField("id", "0", Field.Store.YES));
    doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO));
    w.addDocument(doc);
    doc = new Document();
    doc.add(newStringField("id", "1", Field.Store.YES));
    // 1 extra token, but wizard and oz are close;
    doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO));
    w.addDocument(doc);
    IndexReader r = w.getReader();
    w.close();
    // Do ordinary BooleanQuery:
    BooleanQuery.Builder bq = new BooleanQuery.Builder();
    bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
    bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
    IndexSearcher searcher = getSearcher(r);
    searcher.setSimilarity(new ClassicSimilarity());
    TopDocs hits = searcher.search(bq.build(), 10);
    assertEquals(2, hits.totalHits);
    assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
    assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
    // Now, resort using PhraseQuery:
    PhraseQuery pq = new PhraseQuery(5, "field", "wizard", "oz");
    TopDocs hits2 = QueryRescorer.rescore(searcher, hits, pq, 2.0, 10);
    // Resorting changed the order:
    assertEquals(2, hits2.totalHits);
    assertEquals("1", searcher.doc(hits2.scoreDocs[0].doc).get("id"));
    assertEquals("0", searcher.doc(hits2.scoreDocs[1].doc).get("id"));
    // Resort using SpanNearQuery:
    SpanTermQuery t1 = new SpanTermQuery(new Term("field", "wizard"));
    SpanTermQuery t2 = new SpanTermQuery(new Term("field", "oz"));
    SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { t1, t2 }, 0, true);
    TopDocs hits3 = QueryRescorer.rescore(searcher, hits, snq, 2.0, 10);
    // Resorting changed the order:
    assertEquals(2, hits3.totalHits);
    assertEquals("1", searcher.doc(hits3.scoreDocs[0].doc).get("id"));
    assertEquals("0", searcher.doc(hits3.scoreDocs[1].doc).get("id"));
    r.close();
    dir.close();
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) ClassicSimilarity(org.apache.lucene.search.similarities.ClassicSimilarity) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) IndexReader(org.apache.lucene.index.IndexReader) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Aggregations

SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)196 Term (org.apache.lucene.index.Term)191 SpanQuery (org.apache.lucene.search.spans.SpanQuery)121 Test (org.junit.Test)103 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)59 KrillIndex (de.ids_mannheim.korap.KrillIndex)57 SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)35 SpanNextQuery (de.ids_mannheim.korap.query.SpanNextQuery)34 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)31 Result (de.ids_mannheim.korap.response.Result)30 ArrayList (java.util.ArrayList)27 Document (org.apache.lucene.document.Document)24 IndexReader (org.apache.lucene.index.IndexReader)24 BooleanQuery (org.apache.lucene.search.BooleanQuery)22 Query (org.apache.lucene.search.Query)22 TermQuery (org.apache.lucene.search.TermQuery)22 TopDocs (org.apache.lucene.search.TopDocs)21 SpanFocusQuery (de.ids_mannheim.korap.query.SpanFocusQuery)20 IndexSearcher (org.apache.lucene.search.IndexSearcher)20 SpanRelationQuery (de.ids_mannheim.korap.query.SpanRelationQuery)18