use of org.apache.lucene.search.DisjunctionMaxQuery in project lucene-solr by apache.
the class SolrPluginUtils method setMinShouldMatch.
/**
* Checks the number of optional clauses in the query, and compares it
* with the specification string to determine the proper value to use.
* <p>
* If mmAutoRelax=true, we'll perform auto relaxation of mm if tokens
* are removed from some but not all DisMax clauses, as can happen when
* stopwords or punctuation tokens are removed in analysis.
* </p>
* <p>
* Details about the specification format can be found
* <a href="doc-files/min-should-match.html">here</a>
* </p>
*
* <p>A few important notes...</p>
* <ul>
* <li>
* If the calculations based on the specification determine that no
* optional clauses are needed, BooleanQuerysetMinMumberShouldMatch
* will never be called, but the usual rules about BooleanQueries
* still apply at search time (a BooleanQuery containing no required
* clauses must still match at least one optional clause)
* <li>
* <li>
* No matter what number the calculation arrives at,
* BooleanQuery.setMinShouldMatch() will never be called with a
* value greater then the number of optional clauses (or less then 1)
* </li>
* </ul>
*
* <p>:TODO: should optimize the case where number is same
* as clauses to just make them all "required"
* </p>
*
* @param q The query as a BooleanQuery.Builder
* @param spec The mm spec
* @param mmAutoRelax whether to perform auto relaxation of mm if tokens are removed from some but not all DisMax clauses
*/
public static void setMinShouldMatch(BooleanQuery.Builder q, String spec, boolean mmAutoRelax) {
int optionalClauses = 0;
int maxDisjunctsSize = 0;
int optionalDismaxClauses = 0;
for (BooleanClause c : q.build().clauses()) {
if (c.getOccur() == Occur.SHOULD) {
if (mmAutoRelax && c.getQuery() instanceof DisjunctionMaxQuery) {
int numDisjuncts = ((DisjunctionMaxQuery) c.getQuery()).getDisjuncts().size();
if (numDisjuncts > maxDisjunctsSize) {
maxDisjunctsSize = numDisjuncts;
optionalDismaxClauses = 1;
} else if (numDisjuncts == maxDisjunctsSize) {
optionalDismaxClauses++;
}
} else {
optionalClauses++;
}
}
}
int msm = calculateMinShouldMatch(optionalClauses + optionalDismaxClauses, spec);
if (0 < msm) {
q.setMinimumNumberShouldMatch(msm);
}
}
use of org.apache.lucene.search.DisjunctionMaxQuery in project lucene-solr by apache.
the class MultiTermHighlighting method extractAutomata.
/**
* Extracts MultiTermQueries that match the provided field predicate.
* Returns equivalent automata that will match terms.
*/
public static CharacterRunAutomaton[] extractAutomata(Query query, Predicate<String> fieldMatcher, boolean lookInSpan, Function<Query, Collection<Query>> preRewriteFunc) {
// TODO Lucene needs a Query visitor API! LUCENE-3041
List<CharacterRunAutomaton> list = new ArrayList<>();
Collection<Query> customSubQueries = preRewriteFunc.apply(query);
if (customSubQueries != null) {
for (Query sub : customSubQueries) {
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
}
} else if (query instanceof BooleanQuery) {
for (BooleanClause clause : (BooleanQuery) query) {
if (!clause.isProhibited()) {
list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
}
}
} else if (query instanceof ConstantScoreQuery) {
list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
} else if (query instanceof BoostQuery) {
list.addAll(Arrays.asList(extractAutomata(((BoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
} else if (query instanceof DisjunctionMaxQuery) {
for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
}
} else if (lookInSpan && query instanceof SpanOrQuery) {
for (Query sub : ((SpanOrQuery) query).getClauses()) {
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
}
} else if (lookInSpan && query instanceof SpanNearQuery) {
for (Query sub : ((SpanNearQuery) query).getClauses()) {
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
}
} else if (lookInSpan && query instanceof SpanNotQuery) {
list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), fieldMatcher, lookInSpan, preRewriteFunc)));
} else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), fieldMatcher, lookInSpan, preRewriteFunc)));
} else if (lookInSpan && query instanceof SpanBoostQuery) {
list.addAll(Arrays.asList(extractAutomata(((SpanBoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
} else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
} else if (query instanceof PrefixQuery) {
final PrefixQuery pq = (PrefixQuery) query;
Term prefix = pq.getPrefix();
if (fieldMatcher.test(prefix.field())) {
list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()), Automata.makeAnyString())) {
@Override
public String toString() {
return pq.toString();
}
});
}
} else if (query instanceof FuzzyQuery) {
final FuzzyQuery fq = (FuzzyQuery) query;
if (fieldMatcher.test(fq.getField())) {
String utf16 = fq.getTerm().text();
int[] termText = new int[utf16.codePointCount(0, utf16.length())];
for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
termText[j++] = cp = utf16.codePointAt(i);
}
int termLength = termText.length;
int prefixLength = Math.min(fq.getPrefixLength(), termLength);
String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
String prefix = UnicodeUtil.newString(termText, 0, prefixLength);
Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix);
list.add(new CharacterRunAutomaton(automaton) {
@Override
public String toString() {
return fq.toString();
}
});
}
} else if (query instanceof TermRangeQuery) {
final TermRangeQuery tq = (TermRangeQuery) query;
if (fieldMatcher.test(tq.getField())) {
final CharsRef lowerBound;
if (tq.getLowerTerm() == null) {
lowerBound = null;
} else {
lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString());
}
final CharsRef upperBound;
if (tq.getUpperTerm() == null) {
upperBound = null;
} else {
upperBound = new CharsRef(tq.getUpperTerm().utf8ToString());
}
final boolean includeLower = tq.includesLower();
final boolean includeUpper = tq.includesUpper();
final CharsRef scratch = new CharsRef();
@SuppressWarnings("deprecation") final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();
// this is *not* an automaton, but its very simple
list.add(new CharacterRunAutomaton(Automata.makeEmpty()) {
@Override
public boolean run(char[] s, int offset, int length) {
scratch.chars = s;
scratch.offset = offset;
scratch.length = length;
if (lowerBound != null) {
int cmp = comparator.compare(scratch, lowerBound);
if (cmp < 0 || (!includeLower && cmp == 0)) {
return false;
}
}
if (upperBound != null) {
int cmp = comparator.compare(scratch, upperBound);
if (cmp > 0 || (!includeUpper && cmp == 0)) {
return false;
}
}
return true;
}
@Override
public String toString() {
return tq.toString();
}
});
}
} else if (query instanceof AutomatonQuery) {
final AutomatonQuery aq = (AutomatonQuery) query;
if (fieldMatcher.test(aq.getField())) {
list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
@Override
public String toString() {
return aq.toString();
}
});
}
}
return list.toArray(new CharacterRunAutomaton[list.size()]);
}
use of org.apache.lucene.search.DisjunctionMaxQuery in project lucene-solr by apache.
the class DisjunctionMaxQueryBuilder method getQuery.
/* (non-Javadoc)
* @see org.apache.lucene.xmlparser.QueryObjectBuilder#process(org.w3c.dom.Element)
*/
@Override
public Query getQuery(Element e) throws ParserException {
float tieBreaker = DOMUtils.getAttribute(e, "tieBreaker", 0.0f);
List<Query> disjuncts = new ArrayList<>();
NodeList nl = e.getChildNodes();
final int nlLen = nl.getLength();
for (int i = 0; i < nlLen; i++) {
Node node = nl.item(i);
if (node instanceof Element) {
// all elements are disjuncts.
Element queryElem = (Element) node;
Query q = factory.getQuery(queryElem);
disjuncts.add(q);
}
}
Query q = new DisjunctionMaxQuery(disjuncts, tieBreaker);
float boost = DOMUtils.getAttribute(e, "boost", 1.0f);
if (boost != 1f) {
q = new BoostQuery(q, boost);
}
return q;
}
use of org.apache.lucene.search.DisjunctionMaxQuery in project lucene-solr by apache.
the class TestUnifiedHighlighterMTQ method testWildcardInDisjunctionMax.
public void testWildcardInDisjunctionMax() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test.");
iw.addDocument(doc);
body.setStringValue("Test a one sentence document.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
DisjunctionMaxQuery query = new DisjunctionMaxQuery(Collections.singleton(new WildcardQuery(new Term("body", "te*"))), 0);
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
String[] snippets = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
ir.close();
}
use of org.apache.lucene.search.DisjunctionMaxQuery in project lucene-solr by apache.
the class MaxScoreQParser method parse.
/**
* Parses the query exactly like the Lucene parser does, but
* delegates all SHOULD clauses to DisjunctionMaxQuery with
* meaning only the clause with the max score will contribute
* to the overall score, unless the tie parameter is specified.
* <br>
* The max() is only calculated from the SHOULD clauses.
* Any MUST clauses will be passed through as separate
* BooleanClauses and thus always contribute to the score.
* @return the resulting Query
* @throws org.apache.solr.search.SyntaxError if parsing fails
*/
@Override
public Query parse() throws SyntaxError {
Query q = super.parse();
float boost = 1f;
if (q instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) q;
boost = bq.getBoost();
q = bq.getQuery();
}
if (q instanceof BooleanQuery == false) {
if (boost != 1f) {
q = new BoostQuery(q, boost);
}
return q;
}
BooleanQuery obq = (BooleanQuery) q;
Collection<Query> should = new ArrayList<>();
Collection<BooleanClause> prohibOrReq = new ArrayList<>();
BooleanQuery.Builder newqb = new BooleanQuery.Builder();
for (BooleanClause clause : obq) {
if (clause.isProhibited() || clause.isRequired()) {
prohibOrReq.add(clause);
} else {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(clause);
should.add(bq.build());
}
}
if (should.size() > 0) {
DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(should, tie);
newqb.add(dmq, BooleanClause.Occur.SHOULD);
}
for (BooleanClause c : prohibOrReq) {
newqb.add(c);
}
Query newq = newqb.build();
if (boost != 1f) {
newq = new BoostQuery(newq, boost);
}
return newq;
}
Aggregations