Search in sources :

Example 21 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class PayloadUtils method createSpanQuery.

/**
   * The generated SpanQuery will be either a SpanTermQuery or an ordered, zero slop SpanNearQuery, depending
   * on how many tokens are emitted.
   */
public static SpanQuery createSpanQuery(String field, String value, Analyzer analyzer) throws IOException {
    // adapted this from QueryBuilder.createSpanQuery (which isn't currently public) and added reset(), end(), and close() calls
    List<SpanTermQuery> terms = new ArrayList<>();
    try (TokenStream in = analyzer.tokenStream(field, value)) {
        in.reset();
        TermToBytesRefAttribute termAtt = in.getAttribute(TermToBytesRefAttribute.class);
        while (in.incrementToken()) {
            terms.add(new SpanTermQuery(new Term(field, termAtt.getBytesRef())));
        }
        in.end();
    }
    SpanQuery query;
    if (terms.isEmpty()) {
        query = null;
    } else if (terms.size() == 1) {
        query = terms.get(0);
    } else {
        query = new SpanNearQuery(terms.toArray(new SpanTermQuery[terms.size()]), 0, true);
    }
    return query;
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermToBytesRefAttribute(org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Example 22 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class WeightedSpanTermExtractor method extract.

/**
   * Fills a <code>Map</code> with {@link WeightedSpanTerm}s using the terms from the supplied <code>Query</code>.
   * 
   * @param query
   *          Query to extract Terms from
   * @param terms
   *          Map to place created WeightedSpanTerms in
   * @throws IOException If there is a low-level I/O error
   */
protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
    if (query instanceof BoostQuery) {
        BoostQuery boostQuery = (BoostQuery) query;
        extract(boostQuery.getQuery(), boost * boostQuery.getBoost(), terms);
    } else if (query instanceof BooleanQuery) {
        for (BooleanClause clause : (BooleanQuery) query) {
            if (!clause.isProhibited()) {
                extract(clause.getQuery(), boost, terms);
            }
        }
    } else if (query instanceof PhraseQuery) {
        PhraseQuery phraseQuery = ((PhraseQuery) query);
        Term[] phraseQueryTerms = phraseQuery.getTerms();
        if (phraseQueryTerms.length == 1) {
            extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
        } else {
            SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
            for (int i = 0; i < phraseQueryTerms.length; i++) {
                clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
            }
            // sum position increments beyond 1
            int positionGaps = 0;
            int[] positions = phraseQuery.getPositions();
            if (positions.length >= 2) {
                // positions are in increasing order.   max(0,...) is just a safeguard.
                positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length + 1);
            }
            //if original slop is 0 then require inOrder
            boolean inorder = (phraseQuery.getSlop() == 0);
            SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder);
            extractWeightedSpanTerms(terms, sp, boost);
        }
    } else if (query instanceof TermQuery || query instanceof SynonymQuery) {
        extractWeightedTerms(terms, query, boost);
    } else if (query instanceof SpanQuery) {
        extractWeightedSpanTerms(terms, (SpanQuery) query, boost);
    } else if (query instanceof ConstantScoreQuery) {
        final Query q = ((ConstantScoreQuery) query).getQuery();
        if (q != null) {
            extract(q, boost, terms);
        }
    } else if (query instanceof CommonTermsQuery) {
        // specialized since rewriting would change the result query 
        // this query is TermContext sensitive.
        extractWeightedTerms(terms, query, boost);
    } else if (query instanceof DisjunctionMaxQuery) {
        for (Query clause : ((DisjunctionMaxQuery) query)) {
            extract(clause, boost, terms);
        }
    } else if (query instanceof ToParentBlockJoinQuery) {
        extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
    } else if (query instanceof ToChildBlockJoinQuery) {
        extract(((ToChildBlockJoinQuery) query).getParentQuery(), boost, terms);
    } else if (query instanceof MultiPhraseQuery) {
        final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
        final Term[][] termArrays = mpq.getTermArrays();
        final int[] positions = mpq.getPositions();
        if (positions.length > 0) {
            int maxPosition = positions[positions.length - 1];
            for (int i = 0; i < positions.length - 1; ++i) {
                if (positions[i] > maxPosition) {
                    maxPosition = positions[i];
                }
            }
            @SuppressWarnings({ "unchecked", "rawtypes" }) final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1];
            int distinctPositions = 0;
            for (int i = 0; i < termArrays.length; ++i) {
                final Term[] termArray = termArrays[i];
                List<SpanQuery> disjuncts = disjunctLists[positions[i]];
                if (disjuncts == null) {
                    disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
                    ++distinctPositions;
                }
                for (Term aTermArray : termArray) {
                    disjuncts.add(new SpanTermQuery(aTermArray));
                }
            }
            int positionGaps = 0;
            int position = 0;
            final SpanQuery[] clauses = new SpanQuery[distinctPositions];
            for (List<SpanQuery> disjuncts : disjunctLists) {
                if (disjuncts != null) {
                    clauses[position++] = new SpanOrQuery(disjuncts.toArray(new SpanQuery[disjuncts.size()]));
                } else {
                    ++positionGaps;
                }
            }
            if (clauses.length == 1) {
                extractWeightedSpanTerms(terms, clauses[0], boost);
            } else {
                final int slop = mpq.getSlop();
                final boolean inorder = (slop == 0);
                SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                extractWeightedSpanTerms(terms, sp, boost);
            }
        }
    } else if (query instanceof MatchAllDocsQuery) {
    //nothing
    } else if (query instanceof CustomScoreQuery) {
        extract(((CustomScoreQuery) query).getSubQuery(), boost, terms);
    } else if (isQueryUnsupported(query.getClass())) {
    // nothing
    } else {
        if (query instanceof MultiTermQuery && (!expandMultiTermQuery || !fieldNameComparator(((MultiTermQuery) query).getField()))) {
            return;
        }
        Query origQuery = query;
        final IndexReader reader = getLeafContext().reader();
        Query rewritten;
        if (query instanceof MultiTermQuery) {
            rewritten = MultiTermQuery.SCORING_BOOLEAN_REWRITE.rewrite(reader, (MultiTermQuery) query);
        } else {
            rewritten = origQuery.rewrite(reader);
        }
        if (rewritten != origQuery) {
            // only rewrite once and then flatten again - the rewritten query could have a special treatment
            // if this method is overwritten in a subclass or above in the next recursion
            extract(rewritten, boost, terms);
        } else {
            extractUnknownQuery(query, terms);
        }
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) SpanFirstQuery(org.apache.lucene.search.spans.SpanFirstQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) FieldMaskingSpanQuery(org.apache.lucene.search.spans.FieldMaskingSpanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TermQuery(org.apache.lucene.search.TermQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) BoostQuery(org.apache.lucene.search.BoostQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) BoostQuery(org.apache.lucene.search.BoostQuery) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) List(java.util.List) ArrayList(java.util.ArrayList) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) FieldMaskingSpanQuery(org.apache.lucene.search.spans.FieldMaskingSpanQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) BooleanClause(org.apache.lucene.search.BooleanClause) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) IndexReader(org.apache.lucene.index.IndexReader) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 23 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class TestPayloadCheckQuery method testComplexSpanChecks.

public void testComplexSpanChecks() throws Exception {
    SpanTermQuery one = new SpanTermQuery(new Term("field", "one"));
    SpanTermQuery thous = new SpanTermQuery(new Term("field", "thousand"));
    //should be one position in between
    SpanTermQuery hundred = new SpanTermQuery(new Term("field", "hundred"));
    SpanTermQuery three = new SpanTermQuery(new Term("field", "three"));
    SpanNearQuery oneThous = new SpanNearQuery(new SpanQuery[] { one, thous }, 0, true);
    SpanNearQuery hundredThree = new SpanNearQuery(new SpanQuery[] { hundred, three }, 0, true);
    SpanNearQuery oneThousHunThree = new SpanNearQuery(new SpanQuery[] { oneThous, hundredThree }, 1, true);
    SpanQuery query;
    //this one's too small
    query = new SpanPositionRangeQuery(oneThousHunThree, 1, 2);
    checkHits(query, new int[] {});
    //this one's just right
    query = new SpanPositionRangeQuery(oneThousHunThree, 0, 6);
    checkHits(query, new int[] { 1103, 1203, 1303, 1403, 1503, 1603, 1703, 1803, 1903 });
    List<BytesRef> payloads = new ArrayList<>();
    BytesRef pay = new BytesRef(("pos: " + 0).getBytes(StandardCharsets.UTF_8));
    BytesRef pay2 = new BytesRef(("pos: " + 1).getBytes(StandardCharsets.UTF_8));
    BytesRef pay3 = new BytesRef(("pos: " + 3).getBytes(StandardCharsets.UTF_8));
    BytesRef pay4 = new BytesRef(("pos: " + 4).getBytes(StandardCharsets.UTF_8));
    payloads.add(pay);
    payloads.add(pay2);
    payloads.add(pay3);
    payloads.add(pay4);
    query = new SpanPayloadCheckQuery(oneThousHunThree, payloads);
    checkHits(query, new int[] { 1103, 1203, 1303, 1403, 1503, 1603, 1703, 1803, 1903 });
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) ArrayList(java.util.ArrayList) SpanPositionRangeQuery(org.apache.lucene.search.spans.SpanPositionRangeQuery) Term(org.apache.lucene.index.Term) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) BytesRef(org.apache.lucene.util.BytesRef) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Example 24 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class TestPayloadScoreQuery method testSpanContainingQuery.

@Test
public void testSpanContainingQuery() throws Exception {
    // twenty WITHIN ((one OR hundred) NEAR two)~2
    SpanContainingQuery q = new SpanContainingQuery(new SpanNearQuery(new SpanQuery[] { new SpanOrQuery(new SpanTermQuery(new Term("field", "one")), new SpanTermQuery(new Term("field", "hundred"))), new SpanTermQuery(new Term("field", "two")) }, 2, true), new SpanTermQuery(new Term("field", "twenty")));
    checkQuery(q, new AveragePayloadFunction(), new int[] { 222, 122 }, new float[] { 4.0f, 3.666666f });
    checkQuery(q, new MaxPayloadFunction(), new int[] { 122, 222 }, new float[] { 4.0f, 4.0f });
    checkQuery(q, new MinPayloadFunction(), new int[] { 222, 122 }, new float[] { 4.0f, 2.0f });
}
Also used : SpanContainingQuery(org.apache.lucene.search.spans.SpanContainingQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Test(org.junit.Test)

Example 25 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.

the class TestPayloadSpans method testFirstClauseWithoutPayload.

public void testFirstClauseWithoutPayload() throws Exception {
    Spans spans;
    IndexSearcher searcher = getSearcher();
    SpanQuery[] clauses = new SpanQuery[3];
    clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "nopayload"));
    clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "qq"));
    clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "ss"));
    SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 6, true);
    SpanQuery[] clauses2 = new SpanQuery[2];
    clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "pp"));
    clauses2[1] = spanNearQuery;
    SpanNearQuery snq = new SpanNearQuery(clauses2, 6, false);
    SpanQuery[] clauses3 = new SpanQuery[2];
    clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "np"));
    clauses3[1] = snq;
    SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false);
    spans = nestedSpanNearQuery.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS);
    assertTrue("spans is null and it shouldn't be", spans != null);
    checkSpans(spans, 1, new int[] { 3 });
    closeIndexReader.close();
    directory.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) Spans(org.apache.lucene.search.spans.Spans) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Aggregations

SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)196 Term (org.apache.lucene.index.Term)191 SpanQuery (org.apache.lucene.search.spans.SpanQuery)121 Test (org.junit.Test)103 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)59 KrillIndex (de.ids_mannheim.korap.KrillIndex)57 SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)35 SpanNextQuery (de.ids_mannheim.korap.query.SpanNextQuery)34 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)31 Result (de.ids_mannheim.korap.response.Result)30 ArrayList (java.util.ArrayList)27 Document (org.apache.lucene.document.Document)24 IndexReader (org.apache.lucene.index.IndexReader)24 BooleanQuery (org.apache.lucene.search.BooleanQuery)22 Query (org.apache.lucene.search.Query)22 TermQuery (org.apache.lucene.search.TermQuery)22 TopDocs (org.apache.lucene.search.TopDocs)21 SpanFocusQuery (de.ids_mannheim.korap.query.SpanFocusQuery)20 IndexSearcher (org.apache.lucene.search.IndexSearcher)20 SpanRelationQuery (de.ids_mannheim.korap.query.SpanRelationQuery)18