use of org.apache.lucene.search.BoostQuery in project Anserini by castorini.
the class Rm3Reranker method rerank.
@Override
public ScoredDocuments rerank(ScoredDocuments docs, RerankerContext context) {
assert (docs.documents.length == docs.scores.length);
IndexSearcher searcher = context.getIndexSearcher();
IndexReader reader = searcher.getIndexReader();
FeatureVector qfv = FeatureVector.fromTerms(AnalyzerUtils.analyze(analyzer, context.getQueryText())).scaleToUnitL1Norm();
boolean useRf = (context.getSearchArgs().rf_qrels != null);
FeatureVector rm = estimateRelevanceModel(docs, reader, context.getSearchArgs().searchtweets, useRf);
rm = FeatureVector.interpolate(qfv, rm, originalQueryWeight);
BooleanQuery.Builder feedbackQueryBuilder = new BooleanQuery.Builder();
Iterator<String> terms = rm.iterator();
while (terms.hasNext()) {
String term = terms.next();
float prob = rm.getFeatureWeight(term);
feedbackQueryBuilder.add(new BoostQuery(new TermQuery(new Term(this.field, term)), prob), BooleanClause.Occur.SHOULD);
}
Query feedbackQuery = feedbackQueryBuilder.build();
if (this.outputQuery) {
LOG.info("QID: " + context.getQueryId());
LOG.info("Original Query: " + context.getQuery().toString(this.field));
LOG.info("Running new query: " + feedbackQuery.toString(this.field));
}
TopDocs rs;
try {
Query finalQuery = feedbackQuery;
// Otherwise, just use the feedback query.
if (context.getFilter() != null) {
BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
bqBuilder.add(context.getFilter(), BooleanClause.Occur.FILTER);
bqBuilder.add(feedbackQuery, BooleanClause.Occur.MUST);
finalQuery = bqBuilder.build();
}
// Figure out how to break the scoring ties.
if (context.getSearchArgs().arbitraryScoreTieBreak) {
rs = searcher.search(finalQuery, context.getSearchArgs().hits);
} else if (context.getSearchArgs().searchtweets) {
rs = searcher.search(finalQuery, context.getSearchArgs().hits, BREAK_SCORE_TIES_BY_TWEETID, true);
} else {
rs = searcher.search(finalQuery, context.getSearchArgs().hits, BREAK_SCORE_TIES_BY_DOCID, true);
}
} catch (IOException e) {
e.printStackTrace();
return docs;
}
return ScoredDocuments.fromTopDocs(rs, searcher);
}
use of org.apache.lucene.search.BoostQuery in project elasticsearch by elastic.
the class QueryStringQueryBuilder method doToQuery.
@Override
protected Query doToQuery(QueryShardContext context) throws IOException {
//e.g. field names get expanded to concrete names, defaults get resolved sometimes to settings values etc.
if (splitOnWhitespace == false && autoGeneratePhraseQueries) {
throw new IllegalArgumentException("it is disallowed to disable [split_on_whitespace] " + "if [auto_generate_phrase_queries] is activated");
}
QueryParserSettings qpSettings;
if (this.escape) {
qpSettings = new QueryParserSettings(org.apache.lucene.queryparser.classic.QueryParser.escape(this.queryString));
} else {
qpSettings = new QueryParserSettings(this.queryString);
}
Map<String, Float> resolvedFields = new TreeMap<>();
if ((useAllFields != null && useAllFields) && (fieldsAndWeights.size() != 0 || this.defaultField != null)) {
throw addValidationError("cannot use [all_fields] parameter in conjunction with [default_field] or [fields]", null);
}
// - and no fields are specified in the request
if ((this.useAllFields != null && this.useAllFields) || (context.getMapperService().allEnabled() == false && "_all".equals(context.defaultField()) && this.defaultField == null && this.fieldsAndWeights.size() == 0)) {
// Use the automatically determined expansion of all queryable fields
resolvedFields = allQueryableDefaultFields(context);
// Automatically set leniency to "true" if unset so mismatched fields don't cause exceptions
qpSettings.lenient(lenient == null ? true : lenient);
} else {
qpSettings.defaultField(this.defaultField == null ? context.defaultField() : this.defaultField);
for (Map.Entry<String, Float> fieldsEntry : fieldsAndWeights.entrySet()) {
String fieldName = fieldsEntry.getKey();
Float weight = fieldsEntry.getValue();
if (Regex.isSimpleMatchPattern(fieldName)) {
for (String resolvedFieldName : context.getMapperService().simpleMatchToIndexNames(fieldName)) {
resolvedFields.put(resolvedFieldName, weight);
}
} else {
resolvedFields.put(fieldName, weight);
}
}
qpSettings.lenient(lenient == null ? context.queryStringLenient() : lenient);
}
qpSettings.fieldsAndWeights(resolvedFields);
qpSettings.defaultOperator(defaultOperator.toQueryParserOperator());
if (analyzer == null) {
qpSettings.defaultAnalyzer(context.getMapperService().searchAnalyzer());
} else {
NamedAnalyzer namedAnalyzer = context.getIndexAnalyzers().get(analyzer);
if (namedAnalyzer == null) {
throw new QueryShardException(context, "[query_string] analyzer [" + analyzer + "] not found");
}
qpSettings.forceAnalyzer(namedAnalyzer);
}
if (quoteAnalyzer != null) {
NamedAnalyzer namedAnalyzer = context.getIndexAnalyzers().get(quoteAnalyzer);
if (namedAnalyzer == null) {
throw new QueryShardException(context, "[query_string] quote_analyzer [" + quoteAnalyzer + "] not found");
}
qpSettings.forceQuoteAnalyzer(namedAnalyzer);
} else if (analyzer != null) {
qpSettings.forceQuoteAnalyzer(qpSettings.analyzer());
} else {
qpSettings.defaultQuoteAnalyzer(context.getMapperService().searchQuoteAnalyzer());
}
qpSettings.quoteFieldSuffix(quoteFieldSuffix);
qpSettings.autoGeneratePhraseQueries(autoGeneratePhraseQueries);
qpSettings.allowLeadingWildcard(allowLeadingWildcard == null ? context.queryStringAllowLeadingWildcard() : allowLeadingWildcard);
qpSettings.analyzeWildcard(analyzeWildcard == null ? context.queryStringAnalyzeWildcard() : analyzeWildcard);
qpSettings.enablePositionIncrements(enablePositionIncrements);
qpSettings.fuzziness(fuzziness);
qpSettings.fuzzyPrefixLength(fuzzyPrefixLength);
qpSettings.fuzzyMaxExpansions(fuzzyMaxExpansions);
qpSettings.fuzzyRewriteMethod(QueryParsers.parseRewriteMethod(this.fuzzyRewrite));
qpSettings.phraseSlop(phraseSlop);
qpSettings.useDisMax(useDisMax);
qpSettings.tieBreaker(tieBreaker);
qpSettings.rewriteMethod(QueryParsers.parseRewriteMethod(this.rewrite));
qpSettings.timeZone(timeZone);
qpSettings.maxDeterminizedStates(maxDeterminizedStates);
qpSettings.splitOnWhitespace(splitOnWhitespace);
MapperQueryParser queryParser = context.queryParser(qpSettings);
Query query;
try {
query = queryParser.parse(queryString);
} catch (org.apache.lucene.queryparser.classic.ParseException e) {
throw new QueryShardException(context, "Failed to parse query [" + this.queryString + "]", e);
}
if (query == null) {
return null;
}
//save the BoostQuery wrapped structure if present
List<Float> boosts = new ArrayList<>();
while (query instanceof BoostQuery) {
BoostQuery boostQuery = (BoostQuery) query;
boosts.add(boostQuery.getBoost());
query = boostQuery.getQuery();
}
query = Queries.fixNegativeQueryIfNeeded(query);
query = Queries.maybeApplyMinimumShouldMatch(query, this.minimumShouldMatch);
//restore the previous BoostQuery wrapping
for (int i = boosts.size() - 1; i >= 0; i--) {
query = new BoostQuery(query, boosts.get(i));
}
return query;
}
use of org.apache.lucene.search.BoostQuery in project elasticsearch by elastic.
the class MultiMatchQuery method blendTerms.
static Query blendTerms(QueryShardContext context, BytesRef[] values, Float commonTermsCutoff, float tieBreaker, FieldAndFieldType... blendedFields) {
List<Query> queries = new ArrayList<>();
Term[] terms = new Term[blendedFields.length * values.length];
float[] blendedBoost = new float[blendedFields.length * values.length];
int i = 0;
for (FieldAndFieldType ft : blendedFields) {
for (BytesRef term : values) {
Query query;
try {
query = ft.fieldType.termQuery(term, context);
} catch (IllegalArgumentException e) {
// field
continue;
} catch (ElasticsearchParseException parseException) {
// the case
if (parseException.getCause() instanceof IllegalArgumentException) {
continue;
}
throw parseException;
}
float boost = ft.boost;
while (query instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) query;
query = bq.getQuery();
boost *= bq.getBoost();
}
if (query.getClass() == TermQuery.class) {
terms[i] = ((TermQuery) query).getTerm();
blendedBoost[i] = boost;
i++;
} else {
if (boost != 1f) {
query = new BoostQuery(query, boost);
}
queries.add(query);
}
}
}
if (i > 0) {
terms = Arrays.copyOf(terms, i);
blendedBoost = Arrays.copyOf(blendedBoost, i);
if (commonTermsCutoff != null) {
queries.add(BlendedTermQuery.commonTermsBlendedQuery(terms, blendedBoost, false, commonTermsCutoff));
} else if (tieBreaker == 1.0f) {
queries.add(BlendedTermQuery.booleanBlendedQuery(terms, blendedBoost, false));
} else {
queries.add(BlendedTermQuery.dismaxBlendedQuery(terms, blendedBoost, tieBreaker));
}
}
if (queries.size() == 1) {
return queries.get(0);
} else {
// best effort: add clauses that are not term queries so that they have an opportunity to match
// however their score contribution will be different
// TODO: can we improve this?
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.setDisableCoord(true);
for (Query query : queries) {
bq.add(query, Occur.SHOULD);
}
return bq.build();
}
}
use of org.apache.lucene.search.BoostQuery in project elasticsearch by elastic.
the class SpanMultiTermQueryBuilder method doToQuery.
@Override
protected Query doToQuery(QueryShardContext context) throws IOException {
Query subQuery = multiTermQueryBuilder.toQuery(context);
float boost = AbstractQueryBuilder.DEFAULT_BOOST;
if (subQuery instanceof BoostQuery) {
BoostQuery boostQuery = (BoostQuery) subQuery;
subQuery = boostQuery.getQuery();
boost = boostQuery.getBoost();
}
//no MultiTermQuery extends SpanQuery, so SpanBoostQuery is not supported here
assert subQuery instanceof SpanBoostQuery == false;
if (subQuery instanceof MultiTermQuery == false) {
throw new UnsupportedOperationException("unsupported inner query, should be " + MultiTermQuery.class.getName() + " but was " + subQuery.getClass().getName());
}
SpanQuery wrapper = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery);
if (boost != AbstractQueryBuilder.DEFAULT_BOOST) {
wrapper = new SpanBoostQuery(wrapper, boost);
}
return wrapper;
}
use of org.apache.lucene.search.BoostQuery in project elasticsearch by elastic.
the class CustomFieldQuery method flatten.
@Override
void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries, float boost) throws IOException {
if (sourceQuery instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) sourceQuery;
sourceQuery = bq.getQuery();
boost *= bq.getBoost();
flatten(sourceQuery, reader, flatQueries, boost);
} else if (sourceQuery instanceof SpanTermQuery) {
super.flatten(new TermQuery(((SpanTermQuery) sourceQuery).getTerm()), reader, flatQueries, boost);
} else if (sourceQuery instanceof ConstantScoreQuery) {
flatten(((ConstantScoreQuery) sourceQuery).getQuery(), reader, flatQueries, boost);
} else if (sourceQuery instanceof FunctionScoreQuery) {
flatten(((FunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries, boost);
} else if (sourceQuery instanceof MultiPhrasePrefixQuery) {
flatten(sourceQuery.rewrite(reader), reader, flatQueries, boost);
} else if (sourceQuery instanceof FiltersFunctionScoreQuery) {
flatten(((FiltersFunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries, boost);
} else if (sourceQuery instanceof MultiPhraseQuery) {
MultiPhraseQuery q = ((MultiPhraseQuery) sourceQuery);
convertMultiPhraseQuery(0, new int[q.getTermArrays().length], q, q.getTermArrays(), q.getPositions(), reader, flatQueries);
} else if (sourceQuery instanceof BlendedTermQuery) {
final BlendedTermQuery blendedTermQuery = (BlendedTermQuery) sourceQuery;
flatten(blendedTermQuery.rewrite(reader), reader, flatQueries, boost);
} else if (sourceQuery instanceof ESToParentBlockJoinQuery) {
ESToParentBlockJoinQuery blockJoinQuery = (ESToParentBlockJoinQuery) sourceQuery;
flatten(blockJoinQuery.getChildQuery(), reader, flatQueries, boost);
} else if (sourceQuery instanceof BoostingQuery) {
BoostingQuery boostingQuery = (BoostingQuery) sourceQuery;
//flatten positive query with query boost
flatten(boostingQuery.getMatch(), reader, flatQueries, boost);
//flatten negative query with negative boost
flatten(boostingQuery.getContext(), reader, flatQueries, boostingQuery.getBoost());
} else if (sourceQuery instanceof SynonymQuery) {
// SynonymQuery should be handled by the parent class directly.
// This statement should be removed when https://issues.apache.org/jira/browse/LUCENE-7484 is merged.
SynonymQuery synQuery = (SynonymQuery) sourceQuery;
for (Term term : synQuery.getTerms()) {
flatten(new TermQuery(term), reader, flatQueries, boost);
}
} else {
super.flatten(sourceQuery, reader, flatQueries, boost);
}
}
Aggregations