Search in sources :

Example 6 with MultiPhraseQuery

use of org.apache.lucene.search.MultiPhraseQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterStrictPhrases method testSynonyms.

public void testSynonyms() throws IOException {
    indexWriter.addDocument(newDoc("mother father w mom father w dad"));
    initReaderSearcherHighlighter();
    MultiPhraseQuery query = new MultiPhraseQuery.Builder().add(new Term[] { new Term("body", "mom"), new Term("body", "mother") }).add(new Term[] { new Term("body", "dad"), new Term("body", "father") }).build();
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    String[] snippets = highlighter.highlight("body", query, topDocs);
    assertArrayEquals(new String[] { "<b>mother</b> <b>father</b> w <b>mom</b> <b>father</b> w dad" }, snippets);
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) Term(org.apache.lucene.index.Term)

Example 7 with MultiPhraseQuery

use of org.apache.lucene.search.MultiPhraseQuery in project lucene-solr by apache.

the class WeightedSpanTermExtractor method extract.

/**
   * Fills a <code>Map</code> with {@link WeightedSpanTerm}s using the terms from the supplied <code>Query</code>.
   * 
   * @param query
   *          Query to extract Terms from
   * @param terms
   *          Map to place created WeightedSpanTerms in
   * @throws IOException If there is a low-level I/O error
   */
protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
    if (query instanceof BoostQuery) {
        BoostQuery boostQuery = (BoostQuery) query;
        extract(boostQuery.getQuery(), boost * boostQuery.getBoost(), terms);
    } else if (query instanceof BooleanQuery) {
        for (BooleanClause clause : (BooleanQuery) query) {
            if (!clause.isProhibited()) {
                extract(clause.getQuery(), boost, terms);
            }
        }
    } else if (query instanceof PhraseQuery) {
        PhraseQuery phraseQuery = ((PhraseQuery) query);
        Term[] phraseQueryTerms = phraseQuery.getTerms();
        if (phraseQueryTerms.length == 1) {
            extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
        } else {
            SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
            for (int i = 0; i < phraseQueryTerms.length; i++) {
                clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
            }
            // sum position increments beyond 1
            int positionGaps = 0;
            int[] positions = phraseQuery.getPositions();
            if (positions.length >= 2) {
                // positions are in increasing order.   max(0,...) is just a safeguard.
                positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length + 1);
            }
            //if original slop is 0 then require inOrder
            boolean inorder = (phraseQuery.getSlop() == 0);
            SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder);
            extractWeightedSpanTerms(terms, sp, boost);
        }
    } else if (query instanceof TermQuery || query instanceof SynonymQuery) {
        extractWeightedTerms(terms, query, boost);
    } else if (query instanceof SpanQuery) {
        extractWeightedSpanTerms(terms, (SpanQuery) query, boost);
    } else if (query instanceof ConstantScoreQuery) {
        final Query q = ((ConstantScoreQuery) query).getQuery();
        if (q != null) {
            extract(q, boost, terms);
        }
    } else if (query instanceof CommonTermsQuery) {
        // specialized since rewriting would change the result query 
        // this query is TermContext sensitive.
        extractWeightedTerms(terms, query, boost);
    } else if (query instanceof DisjunctionMaxQuery) {
        for (Query clause : ((DisjunctionMaxQuery) query)) {
            extract(clause, boost, terms);
        }
    } else if (query instanceof ToParentBlockJoinQuery) {
        extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
    } else if (query instanceof ToChildBlockJoinQuery) {
        extract(((ToChildBlockJoinQuery) query).getParentQuery(), boost, terms);
    } else if (query instanceof MultiPhraseQuery) {
        final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
        final Term[][] termArrays = mpq.getTermArrays();
        final int[] positions = mpq.getPositions();
        if (positions.length > 0) {
            int maxPosition = positions[positions.length - 1];
            for (int i = 0; i < positions.length - 1; ++i) {
                if (positions[i] > maxPosition) {
                    maxPosition = positions[i];
                }
            }
            @SuppressWarnings({ "unchecked", "rawtypes" }) final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1];
            int distinctPositions = 0;
            for (int i = 0; i < termArrays.length; ++i) {
                final Term[] termArray = termArrays[i];
                List<SpanQuery> disjuncts = disjunctLists[positions[i]];
                if (disjuncts == null) {
                    disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
                    ++distinctPositions;
                }
                for (Term aTermArray : termArray) {
                    disjuncts.add(new SpanTermQuery(aTermArray));
                }
            }
            int positionGaps = 0;
            int position = 0;
            final SpanQuery[] clauses = new SpanQuery[distinctPositions];
            for (List<SpanQuery> disjuncts : disjunctLists) {
                if (disjuncts != null) {
                    clauses[position++] = new SpanOrQuery(disjuncts.toArray(new SpanQuery[disjuncts.size()]));
                } else {
                    ++positionGaps;
                }
            }
            if (clauses.length == 1) {
                extractWeightedSpanTerms(terms, clauses[0], boost);
            } else {
                final int slop = mpq.getSlop();
                final boolean inorder = (slop == 0);
                SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                extractWeightedSpanTerms(terms, sp, boost);
            }
        }
    } else if (query instanceof MatchAllDocsQuery) {
    //nothing
    } else if (query instanceof CustomScoreQuery) {
        extract(((CustomScoreQuery) query).getSubQuery(), boost, terms);
    } else if (isQueryUnsupported(query.getClass())) {
    // nothing
    } else {
        if (query instanceof MultiTermQuery && (!expandMultiTermQuery || !fieldNameComparator(((MultiTermQuery) query).getField()))) {
            return;
        }
        Query origQuery = query;
        final IndexReader reader = getLeafContext().reader();
        Query rewritten;
        if (query instanceof MultiTermQuery) {
            rewritten = MultiTermQuery.SCORING_BOOLEAN_REWRITE.rewrite(reader, (MultiTermQuery) query);
        } else {
            rewritten = origQuery.rewrite(reader);
        }
        if (rewritten != origQuery) {
            // only rewrite once and then flatten again - the rewritten query could have a special treatment
            // if this method is overwritten in a subclass or above in the next recursion
            extract(rewritten, boost, terms);
        } else {
            extractUnknownQuery(query, terms);
        }
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) SpanFirstQuery(org.apache.lucene.search.spans.SpanFirstQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) FieldMaskingSpanQuery(org.apache.lucene.search.spans.FieldMaskingSpanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TermQuery(org.apache.lucene.search.TermQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) BoostQuery(org.apache.lucene.search.BoostQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) BoostQuery(org.apache.lucene.search.BoostQuery) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) List(java.util.List) ArrayList(java.util.ArrayList) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) FieldMaskingSpanQuery(org.apache.lucene.search.spans.FieldMaskingSpanQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) BooleanClause(org.apache.lucene.search.BooleanClause) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) IndexReader(org.apache.lucene.index.IndexReader) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 8 with MultiPhraseQuery

use of org.apache.lucene.search.MultiPhraseQuery in project zm-mailbox by Zimbra.

the class AbstractIndexStoreTest method multiPhraseQuery.

@Test
public void multiPhraseQuery() throws Exception {
    ZimbraLog.test.debug("--->TEST multiPhraseQuery");
    Mailbox mbox = MailboxManager.getInstance().getMailboxByAccountId(MockProvisioning.DEFAULT_ACCOUNT_ID);
    createContact(mbox, "Non", "Match", "nOn.MaTchiNg@zimbra.com");
    Contact contact1 = createContact(mbox, "Paul", "AA", "aa@example.net", "Software Development Engineer");
    createContact(mbox, "Jane", "BB", "bb@example.net", "Software Planning Engineer");
    Contact contact2 = createContact(mbox, "Peter", "CC", "cc@example.net", "Software Dev Engineer");
    createContact(mbox, "Avril", "DD", "dd@example.net", "Software Architectural Engineer");
    Contact contact3 = createContact(mbox, "Leo", "EE", "ee@example.net", "Software Developer Engineer");
    Contact contact4 = createContact(mbox, "Wow", "DD", "dd@example.net", "Softly Development Engineer");
    createContact(mbox, "Given", "Surname", "GiV.SurN@zimbra.com");
    // Make sure all indexing has been done
    mbox.index.indexDeferredItems();
    IndexStore index = mbox.index.getIndexStore();
    ZimbraIndexSearcher searcher = index.openSearcher();
    MultiPhraseQuery pquery = new MultiPhraseQuery();
    // Lower case required for each term for Lucene
    Term[] firstWords = { new Term(LuceneFields.L_CONTENT, "softly"), new Term(LuceneFields.L_CONTENT, "software") };
    pquery.add(firstWords);
    Term[] secondWords = { new Term(LuceneFields.L_CONTENT, "dev"), new Term(LuceneFields.L_CONTENT, "development"), new Term(LuceneFields.L_CONTENT, "developer") };
    pquery.add(secondWords);
    pquery.add(new Term(LuceneFields.L_CONTENT, "engineer"));
    ZimbraTopDocs result = searcher.search(pquery, 100);
    Assert.assertNotNull("searcher.search result object", result);
    ZimbraLog.test.debug("Result for search [hits=%d]:%s", result.getTotalHits(), result.toString());
    Assert.assertEquals("Number of hits", 4, result.getTotalHits());
    List<String> expecteds = Lists.newArrayList();
    List<String> matches = Lists.newArrayList();
    matches.add(getBlobIdForResultDoc(searcher, result, 0));
    matches.add(getBlobIdForResultDoc(searcher, result, 1));
    matches.add(getBlobIdForResultDoc(searcher, result, 2));
    matches.add(getBlobIdForResultDoc(searcher, result, 3));
    expecteds.add(String.valueOf(contact1.getId()));
    expecteds.add(String.valueOf(contact2.getId()));
    expecteds.add(String.valueOf(contact3.getId()));
    expecteds.add(String.valueOf(contact4.getId()));
    Collections.sort(matches);
    Collections.sort(expecteds);
    for (int ndx = 0; ndx < 4; ndx++) {
        Assert.assertEquals("Match Blob ID", expecteds.get(0), matches.get(0));
    }
}
Also used : Mailbox(com.zimbra.cs.mailbox.Mailbox) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) Term(org.apache.lucene.index.Term) Contact(com.zimbra.cs.mailbox.Contact) ParsedContact(com.zimbra.cs.mime.ParsedContact) Test(org.junit.Test)

Example 9 with MultiPhraseQuery

use of org.apache.lucene.search.MultiPhraseQuery in project crate by crate.

the class MatchQuery method multiPhraseQuery.

private static Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
    MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder();
    mpqb.setSlop(slop);
    TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
    PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
    int position = -1;
    List<Term> multiTerms = new ArrayList<>();
    stream.reset();
    while (stream.incrementToken()) {
        int positionIncrement = posIncrAtt.getPositionIncrement();
        if (positionIncrement > 0 && multiTerms.size() > 0) {
            if (enablePositionIncrements) {
                mpqb.add(multiTerms.toArray(new Term[0]), position);
            } else {
                mpqb.add(multiTerms.toArray(new Term[0]));
            }
            multiTerms.clear();
        }
        position += positionIncrement;
        multiTerms.add(new Term(field, termAtt.getBytesRef()));
    }
    if (enablePositionIncrements) {
        mpqb.add(multiTerms.toArray(new Term[0]), position);
    } else {
        mpqb.add(multiTerms.toArray(new Term[0]));
    }
    return mpqb.build();
}
Also used : TermToBytesRefAttribute(org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute) QueryBuilder(org.apache.lucene.util.QueryBuilder) ArrayList(java.util.ArrayList) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) Term(org.apache.lucene.index.Term) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)

Example 10 with MultiPhraseQuery

use of org.apache.lucene.search.MultiPhraseQuery in project elasticsearch by elastic.

the class CustomFieldQuery method flatten.

@Override
void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries, float boost) throws IOException {
    if (sourceQuery instanceof BoostQuery) {
        BoostQuery bq = (BoostQuery) sourceQuery;
        sourceQuery = bq.getQuery();
        boost *= bq.getBoost();
        flatten(sourceQuery, reader, flatQueries, boost);
    } else if (sourceQuery instanceof SpanTermQuery) {
        super.flatten(new TermQuery(((SpanTermQuery) sourceQuery).getTerm()), reader, flatQueries, boost);
    } else if (sourceQuery instanceof ConstantScoreQuery) {
        flatten(((ConstantScoreQuery) sourceQuery).getQuery(), reader, flatQueries, boost);
    } else if (sourceQuery instanceof FunctionScoreQuery) {
        flatten(((FunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries, boost);
    } else if (sourceQuery instanceof MultiPhrasePrefixQuery) {
        flatten(sourceQuery.rewrite(reader), reader, flatQueries, boost);
    } else if (sourceQuery instanceof FiltersFunctionScoreQuery) {
        flatten(((FiltersFunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries, boost);
    } else if (sourceQuery instanceof MultiPhraseQuery) {
        MultiPhraseQuery q = ((MultiPhraseQuery) sourceQuery);
        convertMultiPhraseQuery(0, new int[q.getTermArrays().length], q, q.getTermArrays(), q.getPositions(), reader, flatQueries);
    } else if (sourceQuery instanceof BlendedTermQuery) {
        final BlendedTermQuery blendedTermQuery = (BlendedTermQuery) sourceQuery;
        flatten(blendedTermQuery.rewrite(reader), reader, flatQueries, boost);
    } else if (sourceQuery instanceof ESToParentBlockJoinQuery) {
        ESToParentBlockJoinQuery blockJoinQuery = (ESToParentBlockJoinQuery) sourceQuery;
        flatten(blockJoinQuery.getChildQuery(), reader, flatQueries, boost);
    } else if (sourceQuery instanceof BoostingQuery) {
        BoostingQuery boostingQuery = (BoostingQuery) sourceQuery;
        //flatten positive query with query boost
        flatten(boostingQuery.getMatch(), reader, flatQueries, boost);
        //flatten negative query with negative boost
        flatten(boostingQuery.getContext(), reader, flatQueries, boostingQuery.getBoost());
    } else if (sourceQuery instanceof SynonymQuery) {
        // SynonymQuery should be handled by the parent class directly.
        // This statement should be removed when https://issues.apache.org/jira/browse/LUCENE-7484 is merged.
        SynonymQuery synQuery = (SynonymQuery) sourceQuery;
        for (Term term : synQuery.getTerms()) {
            flatten(new TermQuery(term), reader, flatQueries, boost);
        }
    } else {
        super.flatten(sourceQuery, reader, flatQueries, boost);
    }
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) BlendedTermQuery(org.apache.lucene.queries.BlendedTermQuery) TermQuery(org.apache.lucene.search.TermQuery) FiltersFunctionScoreQuery(org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery) FunctionScoreQuery(org.elasticsearch.common.lucene.search.function.FunctionScoreQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) ESToParentBlockJoinQuery(org.elasticsearch.index.search.ESToParentBlockJoinQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) BlendedTermQuery(org.apache.lucene.queries.BlendedTermQuery) Term(org.apache.lucene.index.Term) BoostQuery(org.apache.lucene.search.BoostQuery) FiltersFunctionScoreQuery(org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) BoostingQuery(org.apache.lucene.queries.BoostingQuery) MultiPhrasePrefixQuery(org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery)

Aggregations

MultiPhraseQuery (org.apache.lucene.search.MultiPhraseQuery)18 Term (org.apache.lucene.index.Term)15 Query (org.apache.lucene.search.Query)8 PhraseQuery (org.apache.lucene.search.PhraseQuery)7 TermQuery (org.apache.lucene.search.TermQuery)7 BooleanQuery (org.apache.lucene.search.BooleanQuery)5 BoostQuery (org.apache.lucene.search.BoostQuery)5 ArrayList (java.util.ArrayList)4 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)4 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)4 List (java.util.List)3 BooleanClause (org.apache.lucene.search.BooleanClause)3 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)3 MultiTermQuery (org.apache.lucene.search.MultiTermQuery)3 SynonymQuery (org.apache.lucene.search.SynonymQuery)3 DocumentBuilder (javax.xml.parsers.DocumentBuilder)2 IndexReader (org.apache.lucene.index.IndexReader)2 BlendedTermQuery (org.apache.lucene.queries.BlendedTermQuery)2 BoostingQuery (org.apache.lucene.queries.BoostingQuery)2 QueryTreeBuilder (org.apache.lucene.queryparser.flexible.core.builders.QueryTreeBuilder)2