use of org.apache.lucene.search.BooleanQuery in project lucene-solr by apache.
the class TestQueryParser method testMultiWordSynonyms.
// TODO: Move to QueryParserTestBase once standard flexible parser gets this capability
public void testMultiWordSynonyms() throws Exception {
QueryParser dumb = new QueryParser("field", new Analyzer1());
dumb.setSplitOnWhitespace(false);
TermQuery guinea = new TermQuery(new Term("field", "guinea"));
TermQuery pig = new TermQuery(new Term("field", "pig"));
TermQuery cavy = new TermQuery(new Term("field", "cavy"));
// A multi-word synonym source will form a graph query for synonyms that formed the graph token stream
BooleanQuery.Builder synonym = new BooleanQuery.Builder();
synonym.add(guinea, BooleanClause.Occur.MUST);
synonym.add(pig, BooleanClause.Occur.MUST);
BooleanQuery guineaPig = synonym.build();
PhraseQuery phraseGuineaPig = new PhraseQuery.Builder().add(new Term("field", "guinea")).add(new Term("field", "pig")).build();
BooleanQuery graphQuery = new BooleanQuery.Builder().add(guineaPig, BooleanClause.Occur.SHOULD).add(cavy, BooleanClause.Occur.SHOULD).build();
assertEquals(graphQuery, dumb.parse("guinea pig"));
// With the phrase operator, a multi-word synonym source will form span near queries.
SpanNearQuery spanGuineaPig = SpanNearQuery.newOrderedNearQuery("field").addClause(new SpanTermQuery(new Term("field", "guinea"))).addClause(new SpanTermQuery(new Term("field", "pig"))).setSlop(0).build();
SpanTermQuery spanCavy = new SpanTermQuery(new Term("field", "cavy"));
SpanOrQuery spanPhrase = new SpanOrQuery(new SpanQuery[] { spanGuineaPig, spanCavy });
assertEquals(spanPhrase, dumb.parse("\"guinea pig\""));
// custom behavior, the synonyms are expanded, unless you use quote operator
QueryParser smart = new SmartQueryParser();
smart.setSplitOnWhitespace(false);
graphQuery = new BooleanQuery.Builder().add(guineaPig, BooleanClause.Occur.SHOULD).add(cavy, BooleanClause.Occur.SHOULD).build();
assertEquals(graphQuery, smart.parse("guinea pig"));
assertEquals(phraseGuineaPig, smart.parse("\"guinea pig\""));
}
use of org.apache.lucene.search.BooleanQuery in project lucene-solr by apache.
the class TestExtendableQueryParser method testExtFieldUnqoted.
public void testExtFieldUnqoted() throws Exception {
for (int i = 0; i < DELIMITERS.length; i++) {
Extensions ext = newExtensions(DELIMITERS[i]);
ext.add("testExt", new ExtensionStub());
ExtendableQueryParser parser = (ExtendableQueryParser) getParser(null, ext);
String field = ext.buildExtensionField("testExt", "aField");
Query query = parser.parse(String.format(Locale.ROOT, "%s:foo bar", field));
assertTrue("expected instance of BooleanQuery but was " + query.getClass(), query instanceof BooleanQuery);
BooleanQuery bquery = (BooleanQuery) query;
BooleanClause[] clauses = bquery.clauses().toArray(new BooleanClause[0]);
assertEquals(2, clauses.length);
BooleanClause booleanClause = clauses[0];
query = booleanClause.getQuery();
assertTrue("expected instance of TermQuery but was " + query.getClass(), query instanceof TermQuery);
TermQuery tquery = (TermQuery) query;
assertEquals("aField", tquery.getTerm().field());
assertEquals("foo", tquery.getTerm().text());
booleanClause = clauses[1];
query = booleanClause.getQuery();
assertTrue("expected instance of TermQuery but was " + query.getClass(), query instanceof TermQuery);
tquery = (TermQuery) query;
assertEquals(getDefaultField(), tquery.getTerm().field());
assertEquals("bar", tquery.getTerm().text());
}
}
use of org.apache.lucene.search.BooleanQuery in project lucene-solr by apache.
the class AnalyzingInfixSuggester method lookup.
/**
* This is an advanced method providing the capability to send down to the suggester any
* arbitrary lucene query to be used to filter the result of the suggester
*
* @param key the keyword being looked for
* @param contextQuery an arbitrary Lucene query to be used to filter the result of the suggester. {@link #addContextToQuery} could be used to build this contextQuery.
* @param num number of items to return
* @param allTermsRequired all searched terms must match or not
* @param doHighlight if true, the matching term will be highlighted in the search result
* @return the result of the suggester
* @throws IOException f the is IO exception while reading data from the index
*/
public List<LookupResult> lookup(CharSequence key, BooleanQuery contextQuery, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
if (searcherMgr == null) {
throw new IllegalStateException("suggester was not built");
}
final BooleanClause.Occur occur;
if (allTermsRequired) {
occur = BooleanClause.Occur.MUST;
} else {
occur = BooleanClause.Occur.SHOULD;
}
BooleanQuery.Builder query;
Set<String> matchedTokens;
String prefixToken = null;
try (TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()))) {
//long t0 = System.currentTimeMillis();
ts.reset();
final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
String lastToken = null;
query = new BooleanQuery.Builder();
int maxEndOffset = -1;
matchedTokens = new HashSet<>();
while (ts.incrementToken()) {
if (lastToken != null) {
matchedTokens.add(lastToken);
query.add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur);
}
lastToken = termAtt.toString();
if (lastToken != null) {
maxEndOffset = Math.max(maxEndOffset, offsetAtt.endOffset());
}
}
ts.end();
if (lastToken != null) {
Query lastQuery;
if (maxEndOffset == offsetAtt.endOffset()) {
// Use PrefixQuery (or the ngram equivalent) when
// there was no trailing discarded chars in the
// string (e.g. whitespace), so that if query does
// not end with a space we show prefix matches for
// that token:
lastQuery = getLastTokenQuery(lastToken);
prefixToken = lastToken;
} else {
// Use TermQuery for an exact match if there were
// trailing discarded chars (e.g. whitespace), so
// that if query ends with a space we only show
// exact matches for that term:
matchedTokens.add(lastToken);
lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken));
}
if (lastQuery != null) {
query.add(lastQuery, occur);
}
}
if (contextQuery != null) {
boolean allMustNot = true;
for (BooleanClause clause : contextQuery.clauses()) {
if (clause.getOccur() != BooleanClause.Occur.MUST_NOT) {
allMustNot = false;
break;
}
}
if (allMustNot) {
// All are MUST_NOT: add the contextQuery to the main query instead (not as sub-query)
for (BooleanClause clause : contextQuery.clauses()) {
query.add(clause);
}
} else if (allTermsRequired == false) {
// We must carefully upgrade the query clauses to MUST:
BooleanQuery.Builder newQuery = new BooleanQuery.Builder();
newQuery.add(query.build(), BooleanClause.Occur.MUST);
newQuery.add(contextQuery, BooleanClause.Occur.MUST);
query = newQuery;
} else {
// Add contextQuery as sub-query
query.add(contextQuery, BooleanClause.Occur.MUST);
}
}
}
// TODO: we could allow blended sort here, combining
// weight w/ score. Now we ignore score and sort only
// by weight:
Query finalQuery = finishQuery(query, allTermsRequired);
//System.out.println("finalQuery=" + finalQuery);
// Sort by weight, descending:
TopFieldCollector c = TopFieldCollector.create(SORT, num, true, false, false);
// We sorted postings by weight during indexing, so we
// only retrieve the first num hits now:
Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
List<LookupResult> results = null;
SearcherManager mgr;
IndexSearcher searcher;
synchronized (searcherMgrLock) {
// acquire & release on same SearcherManager, via local reference
mgr = searcherMgr;
searcher = mgr.acquire();
}
try {
//System.out.println("got searcher=" + searcher);
searcher.search(finalQuery, c2);
TopFieldDocs hits = c.topDocs();
// Slower way if postings are not pre-sorted by weight:
// hits = searcher.search(query, null, num, SORT);
results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken);
} finally {
mgr.release(searcher);
}
return results;
}
use of org.apache.lucene.search.BooleanQuery in project lucene-solr by apache.
the class SolrPluginUtils method setMinShouldMatch.
public static BooleanQuery setMinShouldMatch(BooleanQuery q, String spec, boolean mmAutoRelax) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (BooleanClause clause : q) {
builder.add(clause);
}
setMinShouldMatch(builder, spec, mmAutoRelax);
return builder.build();
}
use of org.apache.lucene.search.BooleanQuery in project lucene-solr by apache.
the class WeightedSpanTermExtractor method extract.
/**
* Fills a <code>Map</code> with {@link WeightedSpanTerm}s using the terms from the supplied <code>Query</code>.
*
* @param query
* Query to extract Terms from
* @param terms
* Map to place created WeightedSpanTerms in
* @throws IOException If there is a low-level I/O error
*/
protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
if (query instanceof BoostQuery) {
BoostQuery boostQuery = (BoostQuery) query;
extract(boostQuery.getQuery(), boost * boostQuery.getBoost(), terms);
} else if (query instanceof BooleanQuery) {
for (BooleanClause clause : (BooleanQuery) query) {
if (!clause.isProhibited()) {
extract(clause.getQuery(), boost, terms);
}
}
} else if (query instanceof PhraseQuery) {
PhraseQuery phraseQuery = ((PhraseQuery) query);
Term[] phraseQueryTerms = phraseQuery.getTerms();
if (phraseQueryTerms.length == 1) {
extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
} else {
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
for (int i = 0; i < phraseQueryTerms.length; i++) {
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
}
// sum position increments beyond 1
int positionGaps = 0;
int[] positions = phraseQuery.getPositions();
if (positions.length >= 2) {
// positions are in increasing order. max(0,...) is just a safeguard.
positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length + 1);
}
//if original slop is 0 then require inOrder
boolean inorder = (phraseQuery.getSlop() == 0);
SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder);
extractWeightedSpanTerms(terms, sp, boost);
}
} else if (query instanceof TermQuery || query instanceof SynonymQuery) {
extractWeightedTerms(terms, query, boost);
} else if (query instanceof SpanQuery) {
extractWeightedSpanTerms(terms, (SpanQuery) query, boost);
} else if (query instanceof ConstantScoreQuery) {
final Query q = ((ConstantScoreQuery) query).getQuery();
if (q != null) {
extract(q, boost, terms);
}
} else if (query instanceof CommonTermsQuery) {
// specialized since rewriting would change the result query
// this query is TermContext sensitive.
extractWeightedTerms(terms, query, boost);
} else if (query instanceof DisjunctionMaxQuery) {
for (Query clause : ((DisjunctionMaxQuery) query)) {
extract(clause, boost, terms);
}
} else if (query instanceof ToParentBlockJoinQuery) {
extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
} else if (query instanceof ToChildBlockJoinQuery) {
extract(((ToChildBlockJoinQuery) query).getParentQuery(), boost, terms);
} else if (query instanceof MultiPhraseQuery) {
final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
final Term[][] termArrays = mpq.getTermArrays();
final int[] positions = mpq.getPositions();
if (positions.length > 0) {
int maxPosition = positions[positions.length - 1];
for (int i = 0; i < positions.length - 1; ++i) {
if (positions[i] > maxPosition) {
maxPosition = positions[i];
}
}
@SuppressWarnings({ "unchecked", "rawtypes" }) final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1];
int distinctPositions = 0;
for (int i = 0; i < termArrays.length; ++i) {
final Term[] termArray = termArrays[i];
List<SpanQuery> disjuncts = disjunctLists[positions[i]];
if (disjuncts == null) {
disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
++distinctPositions;
}
for (Term aTermArray : termArray) {
disjuncts.add(new SpanTermQuery(aTermArray));
}
}
int positionGaps = 0;
int position = 0;
final SpanQuery[] clauses = new SpanQuery[distinctPositions];
for (List<SpanQuery> disjuncts : disjunctLists) {
if (disjuncts != null) {
clauses[position++] = new SpanOrQuery(disjuncts.toArray(new SpanQuery[disjuncts.size()]));
} else {
++positionGaps;
}
}
if (clauses.length == 1) {
extractWeightedSpanTerms(terms, clauses[0], boost);
} else {
final int slop = mpq.getSlop();
final boolean inorder = (slop == 0);
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
extractWeightedSpanTerms(terms, sp, boost);
}
}
} else if (query instanceof MatchAllDocsQuery) {
//nothing
} else if (query instanceof CustomScoreQuery) {
extract(((CustomScoreQuery) query).getSubQuery(), boost, terms);
} else if (isQueryUnsupported(query.getClass())) {
// nothing
} else {
if (query instanceof MultiTermQuery && (!expandMultiTermQuery || !fieldNameComparator(((MultiTermQuery) query).getField()))) {
return;
}
Query origQuery = query;
final IndexReader reader = getLeafContext().reader();
Query rewritten;
if (query instanceof MultiTermQuery) {
rewritten = MultiTermQuery.SCORING_BOOLEAN_REWRITE.rewrite(reader, (MultiTermQuery) query);
} else {
rewritten = origQuery.rewrite(reader);
}
if (rewritten != origQuery) {
// only rewrite once and then flatten again - the rewritten query could have a special treatment
// if this method is overwritten in a subclass or above in the next recursion
extract(rewritten, boost, terms);
} else {
extractUnknownQuery(query, terms);
}
}
}
Aggregations