use of org.apache.lucene.search.MultiPhraseQuery in project lucene-solr by apache.
the class TestUnifiedHighlighterStrictPhrases method testSynonyms.
public void testSynonyms() throws IOException {
indexWriter.addDocument(newDoc("mother father w mom father w dad"));
initReaderSearcherHighlighter();
MultiPhraseQuery query = new MultiPhraseQuery.Builder().add(new Term[] { new Term("body", "mom"), new Term("body", "mother") }).add(new Term[] { new Term("body", "dad"), new Term("body", "father") }).build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);
assertArrayEquals(new String[] { "<b>mother</b> <b>father</b> w <b>mom</b> <b>father</b> w dad" }, snippets);
}
use of org.apache.lucene.search.MultiPhraseQuery in project lucene-solr by apache.
the class WeightedSpanTermExtractor method extract.
/**
* Fills a <code>Map</code> with {@link WeightedSpanTerm}s using the terms from the supplied <code>Query</code>.
*
* @param query
* Query to extract Terms from
* @param terms
* Map to place created WeightedSpanTerms in
* @throws IOException If there is a low-level I/O error
*/
protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
if (query instanceof BoostQuery) {
BoostQuery boostQuery = (BoostQuery) query;
extract(boostQuery.getQuery(), boost * boostQuery.getBoost(), terms);
} else if (query instanceof BooleanQuery) {
for (BooleanClause clause : (BooleanQuery) query) {
if (!clause.isProhibited()) {
extract(clause.getQuery(), boost, terms);
}
}
} else if (query instanceof PhraseQuery) {
PhraseQuery phraseQuery = ((PhraseQuery) query);
Term[] phraseQueryTerms = phraseQuery.getTerms();
if (phraseQueryTerms.length == 1) {
extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
} else {
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
for (int i = 0; i < phraseQueryTerms.length; i++) {
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
}
// sum position increments beyond 1
int positionGaps = 0;
int[] positions = phraseQuery.getPositions();
if (positions.length >= 2) {
// positions are in increasing order. max(0,...) is just a safeguard.
positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length + 1);
}
//if original slop is 0 then require inOrder
boolean inorder = (phraseQuery.getSlop() == 0);
SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder);
extractWeightedSpanTerms(terms, sp, boost);
}
} else if (query instanceof TermQuery || query instanceof SynonymQuery) {
extractWeightedTerms(terms, query, boost);
} else if (query instanceof SpanQuery) {
extractWeightedSpanTerms(terms, (SpanQuery) query, boost);
} else if (query instanceof ConstantScoreQuery) {
final Query q = ((ConstantScoreQuery) query).getQuery();
if (q != null) {
extract(q, boost, terms);
}
} else if (query instanceof CommonTermsQuery) {
// specialized since rewriting would change the result query
// this query is TermContext sensitive.
extractWeightedTerms(terms, query, boost);
} else if (query instanceof DisjunctionMaxQuery) {
for (Query clause : ((DisjunctionMaxQuery) query)) {
extract(clause, boost, terms);
}
} else if (query instanceof ToParentBlockJoinQuery) {
extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
} else if (query instanceof ToChildBlockJoinQuery) {
extract(((ToChildBlockJoinQuery) query).getParentQuery(), boost, terms);
} else if (query instanceof MultiPhraseQuery) {
final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
final Term[][] termArrays = mpq.getTermArrays();
final int[] positions = mpq.getPositions();
if (positions.length > 0) {
int maxPosition = positions[positions.length - 1];
for (int i = 0; i < positions.length - 1; ++i) {
if (positions[i] > maxPosition) {
maxPosition = positions[i];
}
}
@SuppressWarnings({ "unchecked", "rawtypes" }) final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1];
int distinctPositions = 0;
for (int i = 0; i < termArrays.length; ++i) {
final Term[] termArray = termArrays[i];
List<SpanQuery> disjuncts = disjunctLists[positions[i]];
if (disjuncts == null) {
disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
++distinctPositions;
}
for (Term aTermArray : termArray) {
disjuncts.add(new SpanTermQuery(aTermArray));
}
}
int positionGaps = 0;
int position = 0;
final SpanQuery[] clauses = new SpanQuery[distinctPositions];
for (List<SpanQuery> disjuncts : disjunctLists) {
if (disjuncts != null) {
clauses[position++] = new SpanOrQuery(disjuncts.toArray(new SpanQuery[disjuncts.size()]));
} else {
++positionGaps;
}
}
if (clauses.length == 1) {
extractWeightedSpanTerms(terms, clauses[0], boost);
} else {
final int slop = mpq.getSlop();
final boolean inorder = (slop == 0);
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
extractWeightedSpanTerms(terms, sp, boost);
}
}
} else if (query instanceof MatchAllDocsQuery) {
//nothing
} else if (query instanceof CustomScoreQuery) {
extract(((CustomScoreQuery) query).getSubQuery(), boost, terms);
} else if (isQueryUnsupported(query.getClass())) {
// nothing
} else {
if (query instanceof MultiTermQuery && (!expandMultiTermQuery || !fieldNameComparator(((MultiTermQuery) query).getField()))) {
return;
}
Query origQuery = query;
final IndexReader reader = getLeafContext().reader();
Query rewritten;
if (query instanceof MultiTermQuery) {
rewritten = MultiTermQuery.SCORING_BOOLEAN_REWRITE.rewrite(reader, (MultiTermQuery) query);
} else {
rewritten = origQuery.rewrite(reader);
}
if (rewritten != origQuery) {
// only rewrite once and then flatten again - the rewritten query could have a special treatment
// if this method is overwritten in a subclass or above in the next recursion
extract(rewritten, boost, terms);
} else {
extractUnknownQuery(query, terms);
}
}
}
use of org.apache.lucene.search.MultiPhraseQuery in project zm-mailbox by Zimbra.
the class AbstractIndexStoreTest method multiPhraseQuery.
@Test
public void multiPhraseQuery() throws Exception {
ZimbraLog.test.debug("--->TEST multiPhraseQuery");
Mailbox mbox = MailboxManager.getInstance().getMailboxByAccountId(MockProvisioning.DEFAULT_ACCOUNT_ID);
createContact(mbox, "Non", "Match", "nOn.MaTchiNg@zimbra.com");
Contact contact1 = createContact(mbox, "Paul", "AA", "aa@example.net", "Software Development Engineer");
createContact(mbox, "Jane", "BB", "bb@example.net", "Software Planning Engineer");
Contact contact2 = createContact(mbox, "Peter", "CC", "cc@example.net", "Software Dev Engineer");
createContact(mbox, "Avril", "DD", "dd@example.net", "Software Architectural Engineer");
Contact contact3 = createContact(mbox, "Leo", "EE", "ee@example.net", "Software Developer Engineer");
Contact contact4 = createContact(mbox, "Wow", "DD", "dd@example.net", "Softly Development Engineer");
createContact(mbox, "Given", "Surname", "GiV.SurN@zimbra.com");
// Make sure all indexing has been done
mbox.index.indexDeferredItems();
IndexStore index = mbox.index.getIndexStore();
ZimbraIndexSearcher searcher = index.openSearcher();
MultiPhraseQuery pquery = new MultiPhraseQuery();
// Lower case required for each term for Lucene
Term[] firstWords = { new Term(LuceneFields.L_CONTENT, "softly"), new Term(LuceneFields.L_CONTENT, "software") };
pquery.add(firstWords);
Term[] secondWords = { new Term(LuceneFields.L_CONTENT, "dev"), new Term(LuceneFields.L_CONTENT, "development"), new Term(LuceneFields.L_CONTENT, "developer") };
pquery.add(secondWords);
pquery.add(new Term(LuceneFields.L_CONTENT, "engineer"));
ZimbraTopDocs result = searcher.search(pquery, 100);
Assert.assertNotNull("searcher.search result object", result);
ZimbraLog.test.debug("Result for search [hits=%d]:%s", result.getTotalHits(), result.toString());
Assert.assertEquals("Number of hits", 4, result.getTotalHits());
List<String> expecteds = Lists.newArrayList();
List<String> matches = Lists.newArrayList();
matches.add(getBlobIdForResultDoc(searcher, result, 0));
matches.add(getBlobIdForResultDoc(searcher, result, 1));
matches.add(getBlobIdForResultDoc(searcher, result, 2));
matches.add(getBlobIdForResultDoc(searcher, result, 3));
expecteds.add(String.valueOf(contact1.getId()));
expecteds.add(String.valueOf(contact2.getId()));
expecteds.add(String.valueOf(contact3.getId()));
expecteds.add(String.valueOf(contact4.getId()));
Collections.sort(matches);
Collections.sort(expecteds);
for (int ndx = 0; ndx < 4; ndx++) {
Assert.assertEquals("Match Blob ID", expecteds.get(0), matches.get(0));
}
}
use of org.apache.lucene.search.MultiPhraseQuery in project crate by crate.
the class MatchQuery method multiPhraseQuery.
private static Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder();
mpqb.setSlop(slop);
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
int position = -1;
List<Term> multiTerms = new ArrayList<>();
stream.reset();
while (stream.incrementToken()) {
int positionIncrement = posIncrAtt.getPositionIncrement();
if (positionIncrement > 0 && multiTerms.size() > 0) {
if (enablePositionIncrements) {
mpqb.add(multiTerms.toArray(new Term[0]), position);
} else {
mpqb.add(multiTerms.toArray(new Term[0]));
}
multiTerms.clear();
}
position += positionIncrement;
multiTerms.add(new Term(field, termAtt.getBytesRef()));
}
if (enablePositionIncrements) {
mpqb.add(multiTerms.toArray(new Term[0]), position);
} else {
mpqb.add(multiTerms.toArray(new Term[0]));
}
return mpqb.build();
}
use of org.apache.lucene.search.MultiPhraseQuery in project elasticsearch by elastic.
the class CustomFieldQuery method flatten.
@Override
void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries, float boost) throws IOException {
if (sourceQuery instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) sourceQuery;
sourceQuery = bq.getQuery();
boost *= bq.getBoost();
flatten(sourceQuery, reader, flatQueries, boost);
} else if (sourceQuery instanceof SpanTermQuery) {
super.flatten(new TermQuery(((SpanTermQuery) sourceQuery).getTerm()), reader, flatQueries, boost);
} else if (sourceQuery instanceof ConstantScoreQuery) {
flatten(((ConstantScoreQuery) sourceQuery).getQuery(), reader, flatQueries, boost);
} else if (sourceQuery instanceof FunctionScoreQuery) {
flatten(((FunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries, boost);
} else if (sourceQuery instanceof MultiPhrasePrefixQuery) {
flatten(sourceQuery.rewrite(reader), reader, flatQueries, boost);
} else if (sourceQuery instanceof FiltersFunctionScoreQuery) {
flatten(((FiltersFunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries, boost);
} else if (sourceQuery instanceof MultiPhraseQuery) {
MultiPhraseQuery q = ((MultiPhraseQuery) sourceQuery);
convertMultiPhraseQuery(0, new int[q.getTermArrays().length], q, q.getTermArrays(), q.getPositions(), reader, flatQueries);
} else if (sourceQuery instanceof BlendedTermQuery) {
final BlendedTermQuery blendedTermQuery = (BlendedTermQuery) sourceQuery;
flatten(blendedTermQuery.rewrite(reader), reader, flatQueries, boost);
} else if (sourceQuery instanceof ESToParentBlockJoinQuery) {
ESToParentBlockJoinQuery blockJoinQuery = (ESToParentBlockJoinQuery) sourceQuery;
flatten(blockJoinQuery.getChildQuery(), reader, flatQueries, boost);
} else if (sourceQuery instanceof BoostingQuery) {
BoostingQuery boostingQuery = (BoostingQuery) sourceQuery;
//flatten positive query with query boost
flatten(boostingQuery.getMatch(), reader, flatQueries, boost);
//flatten negative query with negative boost
flatten(boostingQuery.getContext(), reader, flatQueries, boostingQuery.getBoost());
} else if (sourceQuery instanceof SynonymQuery) {
// SynonymQuery should be handled by the parent class directly.
// This statement should be removed when https://issues.apache.org/jira/browse/LUCENE-7484 is merged.
SynonymQuery synQuery = (SynonymQuery) sourceQuery;
for (Term term : synQuery.getTerms()) {
flatten(new TermQuery(term), reader, flatQueries, boost);
}
} else {
super.flatten(sourceQuery, reader, flatQueries, boost);
}
}
Aggregations