Search in sources :

Example 11 with SpanNearQuery

use of org.apache.lucene.search.spans.SpanNearQuery in project lucene-solr by apache.

the class TestQueryParser method testMultiWordSynonyms.

// TODO: Move to QueryParserTestBase once standard flexible parser gets this capability
public void testMultiWordSynonyms() throws Exception {
    QueryParser dumb = new QueryParser("field", new Analyzer1());
    dumb.setSplitOnWhitespace(false);
    TermQuery guinea = new TermQuery(new Term("field", "guinea"));
    TermQuery pig = new TermQuery(new Term("field", "pig"));
    TermQuery cavy = new TermQuery(new Term("field", "cavy"));
    // A multi-word synonym source will form a graph query for synonyms that formed the graph token stream
    BooleanQuery.Builder synonym = new BooleanQuery.Builder();
    synonym.add(guinea, BooleanClause.Occur.MUST);
    synonym.add(pig, BooleanClause.Occur.MUST);
    BooleanQuery guineaPig = synonym.build();
    PhraseQuery phraseGuineaPig = new PhraseQuery.Builder().add(new Term("field", "guinea")).add(new Term("field", "pig")).build();
    BooleanQuery graphQuery = new BooleanQuery.Builder().add(guineaPig, BooleanClause.Occur.SHOULD).add(cavy, BooleanClause.Occur.SHOULD).build();
    assertEquals(graphQuery, dumb.parse("guinea pig"));
    // With the phrase operator, a multi-word synonym source will form span near queries.
    SpanNearQuery spanGuineaPig = SpanNearQuery.newOrderedNearQuery("field").addClause(new SpanTermQuery(new Term("field", "guinea"))).addClause(new SpanTermQuery(new Term("field", "pig"))).setSlop(0).build();
    SpanTermQuery spanCavy = new SpanTermQuery(new Term("field", "cavy"));
    SpanOrQuery spanPhrase = new SpanOrQuery(new SpanQuery[] { spanGuineaPig, spanCavy });
    assertEquals(spanPhrase, dumb.parse("\"guinea pig\""));
    // custom behavior, the synonyms are expanded, unless you use quote operator
    QueryParser smart = new SmartQueryParser();
    smart.setSplitOnWhitespace(false);
    graphQuery = new BooleanQuery.Builder().add(guineaPig, BooleanClause.Occur.SHOULD).add(cavy, BooleanClause.Occur.SHOULD).build();
    assertEquals(graphQuery, smart.parse("guinea pig"));
    assertEquals(phraseGuineaPig, smart.parse("\"guinea pig\""));
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 12 with SpanNearQuery

use of org.apache.lucene.search.spans.SpanNearQuery in project lucene-solr by apache.

the class PayloadUtils method createSpanQuery.

/**
   * The generated SpanQuery will be either a SpanTermQuery or an ordered, zero slop SpanNearQuery, depending
   * on how many tokens are emitted.
   */
public static SpanQuery createSpanQuery(String field, String value, Analyzer analyzer) throws IOException {
    // adapted this from QueryBuilder.createSpanQuery (which isn't currently public) and added reset(), end(), and close() calls
    List<SpanTermQuery> terms = new ArrayList<>();
    try (TokenStream in = analyzer.tokenStream(field, value)) {
        in.reset();
        TermToBytesRefAttribute termAtt = in.getAttribute(TermToBytesRefAttribute.class);
        while (in.incrementToken()) {
            terms.add(new SpanTermQuery(new Term(field, termAtt.getBytesRef())));
        }
        in.end();
    }
    SpanQuery query;
    if (terms.isEmpty()) {
        query = null;
    } else if (terms.size() == 1) {
        query = terms.get(0);
    } else {
        query = new SpanNearQuery(terms.toArray(new SpanTermQuery[terms.size()]), 0, true);
    }
    return query;
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermToBytesRefAttribute(org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Example 13 with SpanNearQuery

use of org.apache.lucene.search.spans.SpanNearQuery in project lucene-solr by apache.

the class WeightedSpanTermExtractor method extract.

/**
   * Fills a <code>Map</code> with {@link WeightedSpanTerm}s using the terms from the supplied <code>Query</code>.
   * 
   * @param query
   *          Query to extract Terms from
   * @param terms
   *          Map to place created WeightedSpanTerms in
   * @throws IOException If there is a low-level I/O error
   */
protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
    if (query instanceof BoostQuery) {
        BoostQuery boostQuery = (BoostQuery) query;
        extract(boostQuery.getQuery(), boost * boostQuery.getBoost(), terms);
    } else if (query instanceof BooleanQuery) {
        for (BooleanClause clause : (BooleanQuery) query) {
            if (!clause.isProhibited()) {
                extract(clause.getQuery(), boost, terms);
            }
        }
    } else if (query instanceof PhraseQuery) {
        PhraseQuery phraseQuery = ((PhraseQuery) query);
        Term[] phraseQueryTerms = phraseQuery.getTerms();
        if (phraseQueryTerms.length == 1) {
            extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
        } else {
            SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
            for (int i = 0; i < phraseQueryTerms.length; i++) {
                clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
            }
            // sum position increments beyond 1
            int positionGaps = 0;
            int[] positions = phraseQuery.getPositions();
            if (positions.length >= 2) {
                // positions are in increasing order.   max(0,...) is just a safeguard.
                positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length + 1);
            }
            //if original slop is 0 then require inOrder
            boolean inorder = (phraseQuery.getSlop() == 0);
            SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder);
            extractWeightedSpanTerms(terms, sp, boost);
        }
    } else if (query instanceof TermQuery || query instanceof SynonymQuery) {
        extractWeightedTerms(terms, query, boost);
    } else if (query instanceof SpanQuery) {
        extractWeightedSpanTerms(terms, (SpanQuery) query, boost);
    } else if (query instanceof ConstantScoreQuery) {
        final Query q = ((ConstantScoreQuery) query).getQuery();
        if (q != null) {
            extract(q, boost, terms);
        }
    } else if (query instanceof CommonTermsQuery) {
        // specialized since rewriting would change the result query 
        // this query is TermContext sensitive.
        extractWeightedTerms(terms, query, boost);
    } else if (query instanceof DisjunctionMaxQuery) {
        for (Query clause : ((DisjunctionMaxQuery) query)) {
            extract(clause, boost, terms);
        }
    } else if (query instanceof ToParentBlockJoinQuery) {
        extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
    } else if (query instanceof ToChildBlockJoinQuery) {
        extract(((ToChildBlockJoinQuery) query).getParentQuery(), boost, terms);
    } else if (query instanceof MultiPhraseQuery) {
        final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
        final Term[][] termArrays = mpq.getTermArrays();
        final int[] positions = mpq.getPositions();
        if (positions.length > 0) {
            int maxPosition = positions[positions.length - 1];
            for (int i = 0; i < positions.length - 1; ++i) {
                if (positions[i] > maxPosition) {
                    maxPosition = positions[i];
                }
            }
            @SuppressWarnings({ "unchecked", "rawtypes" }) final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1];
            int distinctPositions = 0;
            for (int i = 0; i < termArrays.length; ++i) {
                final Term[] termArray = termArrays[i];
                List<SpanQuery> disjuncts = disjunctLists[positions[i]];
                if (disjuncts == null) {
                    disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
                    ++distinctPositions;
                }
                for (Term aTermArray : termArray) {
                    disjuncts.add(new SpanTermQuery(aTermArray));
                }
            }
            int positionGaps = 0;
            int position = 0;
            final SpanQuery[] clauses = new SpanQuery[distinctPositions];
            for (List<SpanQuery> disjuncts : disjunctLists) {
                if (disjuncts != null) {
                    clauses[position++] = new SpanOrQuery(disjuncts.toArray(new SpanQuery[disjuncts.size()]));
                } else {
                    ++positionGaps;
                }
            }
            if (clauses.length == 1) {
                extractWeightedSpanTerms(terms, clauses[0], boost);
            } else {
                final int slop = mpq.getSlop();
                final boolean inorder = (slop == 0);
                SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                extractWeightedSpanTerms(terms, sp, boost);
            }
        }
    } else if (query instanceof MatchAllDocsQuery) {
    //nothing
    } else if (query instanceof CustomScoreQuery) {
        extract(((CustomScoreQuery) query).getSubQuery(), boost, terms);
    } else if (isQueryUnsupported(query.getClass())) {
    // nothing
    } else {
        if (query instanceof MultiTermQuery && (!expandMultiTermQuery || !fieldNameComparator(((MultiTermQuery) query).getField()))) {
            return;
        }
        Query origQuery = query;
        final IndexReader reader = getLeafContext().reader();
        Query rewritten;
        if (query instanceof MultiTermQuery) {
            rewritten = MultiTermQuery.SCORING_BOOLEAN_REWRITE.rewrite(reader, (MultiTermQuery) query);
        } else {
            rewritten = origQuery.rewrite(reader);
        }
        if (rewritten != origQuery) {
            // only rewrite once and then flatten again - the rewritten query could have a special treatment
            // if this method is overwritten in a subclass or above in the next recursion
            extract(rewritten, boost, terms);
        } else {
            extractUnknownQuery(query, terms);
        }
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) SpanFirstQuery(org.apache.lucene.search.spans.SpanFirstQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) FieldMaskingSpanQuery(org.apache.lucene.search.spans.FieldMaskingSpanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TermQuery(org.apache.lucene.search.TermQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) BoostQuery(org.apache.lucene.search.BoostQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) BoostQuery(org.apache.lucene.search.BoostQuery) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) List(java.util.List) ArrayList(java.util.ArrayList) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) FieldMaskingSpanQuery(org.apache.lucene.search.spans.FieldMaskingSpanQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) BooleanClause(org.apache.lucene.search.BooleanClause) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) IndexReader(org.apache.lucene.index.IndexReader) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 14 with SpanNearQuery

use of org.apache.lucene.search.spans.SpanNearQuery in project lucene-solr by apache.

the class TestPayloadCheckQuery method testRewrite.

public void testRewrite() throws IOException {
    SpanMultiTermQueryWrapper fiv = new SpanMultiTermQueryWrapper(new WildcardQuery(new Term("field", "fiv*")));
    SpanMultiTermQueryWrapper hund = new SpanMultiTermQueryWrapper(new WildcardQuery(new Term("field", "hund*")));
    SpanMultiTermQueryWrapper twent = new SpanMultiTermQueryWrapper(new WildcardQuery(new Term("field", "twent*")));
    SpanMultiTermQueryWrapper nin = new SpanMultiTermQueryWrapper(new WildcardQuery(new Term("field", "nin*")));
    SpanNearQuery sq = new SpanNearQuery(new SpanQuery[] { fiv, hund, twent, nin }, 0, true);
    List<BytesRef> payloads = new ArrayList<>();
    payloads.add(new BytesRef("pos: 0"));
    payloads.add(new BytesRef("pos: 1"));
    payloads.add(new BytesRef("pos: 2"));
    payloads.add(new BytesRef("pos: 3"));
    SpanPayloadCheckQuery query = new SpanPayloadCheckQuery(sq, payloads);
    // if query wasn't rewritten properly, the query would have failed with "Rewrite first!"
    checkHits(query, new int[] { 529 });
}
Also used : WildcardQuery(org.apache.lucene.search.WildcardQuery) SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) BytesRef(org.apache.lucene.util.BytesRef)

Example 15 with SpanNearQuery

use of org.apache.lucene.search.spans.SpanNearQuery in project lucene-solr by apache.

the class TestPayloadCheckQuery method testComplexSpanChecks.

public void testComplexSpanChecks() throws Exception {
    SpanTermQuery one = new SpanTermQuery(new Term("field", "one"));
    SpanTermQuery thous = new SpanTermQuery(new Term("field", "thousand"));
    //should be one position in between
    SpanTermQuery hundred = new SpanTermQuery(new Term("field", "hundred"));
    SpanTermQuery three = new SpanTermQuery(new Term("field", "three"));
    SpanNearQuery oneThous = new SpanNearQuery(new SpanQuery[] { one, thous }, 0, true);
    SpanNearQuery hundredThree = new SpanNearQuery(new SpanQuery[] { hundred, three }, 0, true);
    SpanNearQuery oneThousHunThree = new SpanNearQuery(new SpanQuery[] { oneThous, hundredThree }, 1, true);
    SpanQuery query;
    //this one's too small
    query = new SpanPositionRangeQuery(oneThousHunThree, 1, 2);
    checkHits(query, new int[] {});
    //this one's just right
    query = new SpanPositionRangeQuery(oneThousHunThree, 0, 6);
    checkHits(query, new int[] { 1103, 1203, 1303, 1403, 1503, 1603, 1703, 1803, 1903 });
    List<BytesRef> payloads = new ArrayList<>();
    BytesRef pay = new BytesRef(("pos: " + 0).getBytes(StandardCharsets.UTF_8));
    BytesRef pay2 = new BytesRef(("pos: " + 1).getBytes(StandardCharsets.UTF_8));
    BytesRef pay3 = new BytesRef(("pos: " + 3).getBytes(StandardCharsets.UTF_8));
    BytesRef pay4 = new BytesRef(("pos: " + 4).getBytes(StandardCharsets.UTF_8));
    payloads.add(pay);
    payloads.add(pay2);
    payloads.add(pay3);
    payloads.add(pay4);
    query = new SpanPayloadCheckQuery(oneThousHunThree, payloads);
    checkHits(query, new int[] { 1103, 1203, 1303, 1403, 1503, 1603, 1703, 1803, 1903 });
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) ArrayList(java.util.ArrayList) SpanPositionRangeQuery(org.apache.lucene.search.spans.SpanPositionRangeQuery) Term(org.apache.lucene.index.Term) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) BytesRef(org.apache.lucene.util.BytesRef) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Aggregations

SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)55 Term (org.apache.lucene.index.Term)49 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)49 SpanQuery (org.apache.lucene.search.spans.SpanQuery)38 BooleanQuery (org.apache.lucene.search.BooleanQuery)14 Query (org.apache.lucene.search.Query)14 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)14 Document (org.apache.lucene.document.Document)13 IndexReader (org.apache.lucene.index.IndexReader)13 ArrayList (java.util.ArrayList)12 IndexSearcher (org.apache.lucene.search.IndexSearcher)12 TopDocs (org.apache.lucene.search.TopDocs)11 Directory (org.apache.lucene.store.Directory)11 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)10 TextField (org.apache.lucene.document.TextField)9 PhraseQuery (org.apache.lucene.search.PhraseQuery)8 PrefixQuery (org.apache.lucene.search.PrefixQuery)8 TermQuery (org.apache.lucene.search.TermQuery)8 BytesRef (org.apache.lucene.util.BytesRef)8 SpanNotQuery (org.apache.lucene.search.spans.SpanNotQuery)7