Search in sources :

Example 66 with BoostQuery

use of org.apache.lucene.search.BoostQuery in project Anserini by castorini.

the class Rm3Reranker method rerank.

@Override
public ScoredDocuments rerank(ScoredDocuments docs, RerankerContext context) {
    assert (docs.documents.length == docs.scores.length);
    IndexSearcher searcher = context.getIndexSearcher();
    IndexReader reader = searcher.getIndexReader();
    FeatureVector qfv = FeatureVector.fromTerms(AnalyzerUtils.analyze(analyzer, context.getQueryText())).scaleToUnitL1Norm();
    boolean useRf = (context.getSearchArgs().rf_qrels != null);
    FeatureVector rm = estimateRelevanceModel(docs, reader, context.getSearchArgs().searchtweets, useRf);
    rm = FeatureVector.interpolate(qfv, rm, originalQueryWeight);
    BooleanQuery.Builder feedbackQueryBuilder = new BooleanQuery.Builder();
    Iterator<String> terms = rm.iterator();
    while (terms.hasNext()) {
        String term = terms.next();
        float prob = rm.getFeatureWeight(term);
        feedbackQueryBuilder.add(new BoostQuery(new TermQuery(new Term(this.field, term)), prob), BooleanClause.Occur.SHOULD);
    }
    Query feedbackQuery = feedbackQueryBuilder.build();
    if (this.outputQuery) {
        LOG.info("QID: " + context.getQueryId());
        LOG.info("Original Query: " + context.getQuery().toString(this.field));
        LOG.info("Running new query: " + feedbackQuery.toString(this.field));
    }
    TopDocs rs;
    try {
        Query finalQuery = feedbackQuery;
        // Otherwise, just use the feedback query.
        if (context.getFilter() != null) {
            BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
            bqBuilder.add(context.getFilter(), BooleanClause.Occur.FILTER);
            bqBuilder.add(feedbackQuery, BooleanClause.Occur.MUST);
            finalQuery = bqBuilder.build();
        }
        // Figure out how to break the scoring ties.
        if (context.getSearchArgs().arbitraryScoreTieBreak) {
            rs = searcher.search(finalQuery, context.getSearchArgs().hits);
        } else if (context.getSearchArgs().searchtweets) {
            rs = searcher.search(finalQuery, context.getSearchArgs().hits, BREAK_SCORE_TIES_BY_TWEETID, true);
        } else {
            rs = searcher.search(finalQuery, context.getSearchArgs().hits, BREAK_SCORE_TIES_BY_DOCID, true);
        }
    } catch (IOException e) {
        e.printStackTrace();
        return docs;
    }
    return ScoredDocuments.fromTopDocs(rs, searcher);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FeatureVector(io.anserini.util.FeatureVector) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) BoostQuery(org.apache.lucene.search.BoostQuery) TopDocs(org.apache.lucene.search.TopDocs) IndexReader(org.apache.lucene.index.IndexReader)

Example 67 with BoostQuery

use of org.apache.lucene.search.BoostQuery in project elasticsearch by elastic.

the class QueryStringQueryBuilder method doToQuery.

@Override
protected Query doToQuery(QueryShardContext context) throws IOException {
    //e.g. field names get expanded to concrete names, defaults get resolved sometimes to settings values etc.
    if (splitOnWhitespace == false && autoGeneratePhraseQueries) {
        throw new IllegalArgumentException("it is disallowed to disable [split_on_whitespace] " + "if [auto_generate_phrase_queries] is activated");
    }
    QueryParserSettings qpSettings;
    if (this.escape) {
        qpSettings = new QueryParserSettings(org.apache.lucene.queryparser.classic.QueryParser.escape(this.queryString));
    } else {
        qpSettings = new QueryParserSettings(this.queryString);
    }
    Map<String, Float> resolvedFields = new TreeMap<>();
    if ((useAllFields != null && useAllFields) && (fieldsAndWeights.size() != 0 || this.defaultField != null)) {
        throw addValidationError("cannot use [all_fields] parameter in conjunction with [default_field] or [fields]", null);
    }
    // - and no fields are specified in the request
    if ((this.useAllFields != null && this.useAllFields) || (context.getMapperService().allEnabled() == false && "_all".equals(context.defaultField()) && this.defaultField == null && this.fieldsAndWeights.size() == 0)) {
        // Use the automatically determined expansion of all queryable fields
        resolvedFields = allQueryableDefaultFields(context);
        // Automatically set leniency to "true" if unset so mismatched fields don't cause exceptions
        qpSettings.lenient(lenient == null ? true : lenient);
    } else {
        qpSettings.defaultField(this.defaultField == null ? context.defaultField() : this.defaultField);
        for (Map.Entry<String, Float> fieldsEntry : fieldsAndWeights.entrySet()) {
            String fieldName = fieldsEntry.getKey();
            Float weight = fieldsEntry.getValue();
            if (Regex.isSimpleMatchPattern(fieldName)) {
                for (String resolvedFieldName : context.getMapperService().simpleMatchToIndexNames(fieldName)) {
                    resolvedFields.put(resolvedFieldName, weight);
                }
            } else {
                resolvedFields.put(fieldName, weight);
            }
        }
        qpSettings.lenient(lenient == null ? context.queryStringLenient() : lenient);
    }
    qpSettings.fieldsAndWeights(resolvedFields);
    qpSettings.defaultOperator(defaultOperator.toQueryParserOperator());
    if (analyzer == null) {
        qpSettings.defaultAnalyzer(context.getMapperService().searchAnalyzer());
    } else {
        NamedAnalyzer namedAnalyzer = context.getIndexAnalyzers().get(analyzer);
        if (namedAnalyzer == null) {
            throw new QueryShardException(context, "[query_string] analyzer [" + analyzer + "] not found");
        }
        qpSettings.forceAnalyzer(namedAnalyzer);
    }
    if (quoteAnalyzer != null) {
        NamedAnalyzer namedAnalyzer = context.getIndexAnalyzers().get(quoteAnalyzer);
        if (namedAnalyzer == null) {
            throw new QueryShardException(context, "[query_string] quote_analyzer [" + quoteAnalyzer + "] not found");
        }
        qpSettings.forceQuoteAnalyzer(namedAnalyzer);
    } else if (analyzer != null) {
        qpSettings.forceQuoteAnalyzer(qpSettings.analyzer());
    } else {
        qpSettings.defaultQuoteAnalyzer(context.getMapperService().searchQuoteAnalyzer());
    }
    qpSettings.quoteFieldSuffix(quoteFieldSuffix);
    qpSettings.autoGeneratePhraseQueries(autoGeneratePhraseQueries);
    qpSettings.allowLeadingWildcard(allowLeadingWildcard == null ? context.queryStringAllowLeadingWildcard() : allowLeadingWildcard);
    qpSettings.analyzeWildcard(analyzeWildcard == null ? context.queryStringAnalyzeWildcard() : analyzeWildcard);
    qpSettings.enablePositionIncrements(enablePositionIncrements);
    qpSettings.fuzziness(fuzziness);
    qpSettings.fuzzyPrefixLength(fuzzyPrefixLength);
    qpSettings.fuzzyMaxExpansions(fuzzyMaxExpansions);
    qpSettings.fuzzyRewriteMethod(QueryParsers.parseRewriteMethod(this.fuzzyRewrite));
    qpSettings.phraseSlop(phraseSlop);
    qpSettings.useDisMax(useDisMax);
    qpSettings.tieBreaker(tieBreaker);
    qpSettings.rewriteMethod(QueryParsers.parseRewriteMethod(this.rewrite));
    qpSettings.timeZone(timeZone);
    qpSettings.maxDeterminizedStates(maxDeterminizedStates);
    qpSettings.splitOnWhitespace(splitOnWhitespace);
    MapperQueryParser queryParser = context.queryParser(qpSettings);
    Query query;
    try {
        query = queryParser.parse(queryString);
    } catch (org.apache.lucene.queryparser.classic.ParseException e) {
        throw new QueryShardException(context, "Failed to parse query [" + this.queryString + "]", e);
    }
    if (query == null) {
        return null;
    }
    //save the BoostQuery wrapped structure if present
    List<Float> boosts = new ArrayList<>();
    while (query instanceof BoostQuery) {
        BoostQuery boostQuery = (BoostQuery) query;
        boosts.add(boostQuery.getBoost());
        query = boostQuery.getQuery();
    }
    query = Queries.fixNegativeQueryIfNeeded(query);
    query = Queries.maybeApplyMinimumShouldMatch(query, this.minimumShouldMatch);
    //restore the previous BoostQuery wrapping
    for (int i = boosts.size() - 1; i >= 0; i--) {
        query = new BoostQuery(query, boosts.get(i));
    }
    return query;
}
Also used : MapperQueryParser(org.apache.lucene.queryparser.classic.MapperQueryParser) NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer) Query(org.apache.lucene.search.Query) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) BoostQuery(org.apache.lucene.search.BoostQuery) ArrayList(java.util.ArrayList) QueryParserSettings(org.apache.lucene.queryparser.classic.QueryParserSettings) TreeMap(java.util.TreeMap) BoostQuery(org.apache.lucene.search.BoostQuery) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap)

Example 68 with BoostQuery

use of org.apache.lucene.search.BoostQuery in project elasticsearch by elastic.

the class MultiMatchQuery method blendTerms.

static Query blendTerms(QueryShardContext context, BytesRef[] values, Float commonTermsCutoff, float tieBreaker, FieldAndFieldType... blendedFields) {
    List<Query> queries = new ArrayList<>();
    Term[] terms = new Term[blendedFields.length * values.length];
    float[] blendedBoost = new float[blendedFields.length * values.length];
    int i = 0;
    for (FieldAndFieldType ft : blendedFields) {
        for (BytesRef term : values) {
            Query query;
            try {
                query = ft.fieldType.termQuery(term, context);
            } catch (IllegalArgumentException e) {
                // field
                continue;
            } catch (ElasticsearchParseException parseException) {
                // the case
                if (parseException.getCause() instanceof IllegalArgumentException) {
                    continue;
                }
                throw parseException;
            }
            float boost = ft.boost;
            while (query instanceof BoostQuery) {
                BoostQuery bq = (BoostQuery) query;
                query = bq.getQuery();
                boost *= bq.getBoost();
            }
            if (query.getClass() == TermQuery.class) {
                terms[i] = ((TermQuery) query).getTerm();
                blendedBoost[i] = boost;
                i++;
            } else {
                if (boost != 1f) {
                    query = new BoostQuery(query, boost);
                }
                queries.add(query);
            }
        }
    }
    if (i > 0) {
        terms = Arrays.copyOf(terms, i);
        blendedBoost = Arrays.copyOf(blendedBoost, i);
        if (commonTermsCutoff != null) {
            queries.add(BlendedTermQuery.commonTermsBlendedQuery(terms, blendedBoost, false, commonTermsCutoff));
        } else if (tieBreaker == 1.0f) {
            queries.add(BlendedTermQuery.booleanBlendedQuery(terms, blendedBoost, false));
        } else {
            queries.add(BlendedTermQuery.dismaxBlendedQuery(terms, blendedBoost, tieBreaker));
        }
    }
    if (queries.size() == 1) {
        return queries.get(0);
    } else {
        // best effort: add clauses that are not term queries so that they have an opportunity to match
        // however their score contribution will be different
        // TODO: can we improve this?
        BooleanQuery.Builder bq = new BooleanQuery.Builder();
        bq.setDisableCoord(true);
        for (Query query : queries) {
            bq.add(query, Occur.SHOULD);
        }
        return bq.build();
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) BlendedTermQuery(org.apache.lucene.queries.BlendedTermQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) MultiMatchQueryBuilder(org.elasticsearch.index.query.MultiMatchQueryBuilder) AbstractQueryBuilder(org.elasticsearch.index.query.AbstractQueryBuilder) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) BoostQuery(org.apache.lucene.search.BoostQuery) ElasticsearchParseException(org.elasticsearch.ElasticsearchParseException) BytesRef(org.apache.lucene.util.BytesRef)

Example 69 with BoostQuery

use of org.apache.lucene.search.BoostQuery in project elasticsearch by elastic.

the class SpanMultiTermQueryBuilder method doToQuery.

@Override
protected Query doToQuery(QueryShardContext context) throws IOException {
    Query subQuery = multiTermQueryBuilder.toQuery(context);
    float boost = AbstractQueryBuilder.DEFAULT_BOOST;
    if (subQuery instanceof BoostQuery) {
        BoostQuery boostQuery = (BoostQuery) subQuery;
        subQuery = boostQuery.getQuery();
        boost = boostQuery.getBoost();
    }
    //no MultiTermQuery extends SpanQuery, so SpanBoostQuery is not supported here
    assert subQuery instanceof SpanBoostQuery == false;
    if (subQuery instanceof MultiTermQuery == false) {
        throw new UnsupportedOperationException("unsupported inner query, should be " + MultiTermQuery.class.getName() + " but was " + subQuery.getClass().getName());
    }
    SpanQuery wrapper = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery);
    if (boost != AbstractQueryBuilder.DEFAULT_BOOST) {
        wrapper = new SpanBoostQuery(wrapper, boost);
    }
    return wrapper;
}
Also used : Query(org.apache.lucene.search.Query) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) BoostQuery(org.apache.lucene.search.BoostQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) BoostQuery(org.apache.lucene.search.BoostQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Example 70 with BoostQuery

use of org.apache.lucene.search.BoostQuery in project elasticsearch by elastic.

the class CustomFieldQuery method flatten.

@Override
void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries, float boost) throws IOException {
    if (sourceQuery instanceof BoostQuery) {
        BoostQuery bq = (BoostQuery) sourceQuery;
        sourceQuery = bq.getQuery();
        boost *= bq.getBoost();
        flatten(sourceQuery, reader, flatQueries, boost);
    } else if (sourceQuery instanceof SpanTermQuery) {
        super.flatten(new TermQuery(((SpanTermQuery) sourceQuery).getTerm()), reader, flatQueries, boost);
    } else if (sourceQuery instanceof ConstantScoreQuery) {
        flatten(((ConstantScoreQuery) sourceQuery).getQuery(), reader, flatQueries, boost);
    } else if (sourceQuery instanceof FunctionScoreQuery) {
        flatten(((FunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries, boost);
    } else if (sourceQuery instanceof MultiPhrasePrefixQuery) {
        flatten(sourceQuery.rewrite(reader), reader, flatQueries, boost);
    } else if (sourceQuery instanceof FiltersFunctionScoreQuery) {
        flatten(((FiltersFunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries, boost);
    } else if (sourceQuery instanceof MultiPhraseQuery) {
        MultiPhraseQuery q = ((MultiPhraseQuery) sourceQuery);
        convertMultiPhraseQuery(0, new int[q.getTermArrays().length], q, q.getTermArrays(), q.getPositions(), reader, flatQueries);
    } else if (sourceQuery instanceof BlendedTermQuery) {
        final BlendedTermQuery blendedTermQuery = (BlendedTermQuery) sourceQuery;
        flatten(blendedTermQuery.rewrite(reader), reader, flatQueries, boost);
    } else if (sourceQuery instanceof ESToParentBlockJoinQuery) {
        ESToParentBlockJoinQuery blockJoinQuery = (ESToParentBlockJoinQuery) sourceQuery;
        flatten(blockJoinQuery.getChildQuery(), reader, flatQueries, boost);
    } else if (sourceQuery instanceof BoostingQuery) {
        BoostingQuery boostingQuery = (BoostingQuery) sourceQuery;
        //flatten positive query with query boost
        flatten(boostingQuery.getMatch(), reader, flatQueries, boost);
        //flatten negative query with negative boost
        flatten(boostingQuery.getContext(), reader, flatQueries, boostingQuery.getBoost());
    } else if (sourceQuery instanceof SynonymQuery) {
        // SynonymQuery should be handled by the parent class directly.
        // This statement should be removed when https://issues.apache.org/jira/browse/LUCENE-7484 is merged.
        SynonymQuery synQuery = (SynonymQuery) sourceQuery;
        for (Term term : synQuery.getTerms()) {
            flatten(new TermQuery(term), reader, flatQueries, boost);
        }
    } else {
        super.flatten(sourceQuery, reader, flatQueries, boost);
    }
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) BlendedTermQuery(org.apache.lucene.queries.BlendedTermQuery) TermQuery(org.apache.lucene.search.TermQuery) FiltersFunctionScoreQuery(org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery) FunctionScoreQuery(org.elasticsearch.common.lucene.search.function.FunctionScoreQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) ESToParentBlockJoinQuery(org.elasticsearch.index.search.ESToParentBlockJoinQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) BlendedTermQuery(org.apache.lucene.queries.BlendedTermQuery) Term(org.apache.lucene.index.Term) BoostQuery(org.apache.lucene.search.BoostQuery) FiltersFunctionScoreQuery(org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) BoostingQuery(org.apache.lucene.queries.BoostingQuery) MultiPhrasePrefixQuery(org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery)

Aggregations

BoostQuery (org.apache.lucene.search.BoostQuery)128 Query (org.apache.lucene.search.Query)107 BooleanQuery (org.apache.lucene.search.BooleanQuery)96 TermQuery (org.apache.lucene.search.TermQuery)84 Term (org.apache.lucene.index.Term)54 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)45 PhraseQuery (org.apache.lucene.search.PhraseQuery)35 DisjunctionMaxQuery (org.apache.lucene.search.DisjunctionMaxQuery)32 PrefixQuery (org.apache.lucene.search.PrefixQuery)29 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)27 MatchNoDocsQuery (org.apache.lucene.search.MatchNoDocsQuery)24 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)23 BooleanClause (org.apache.lucene.search.BooleanClause)20 MultiPhraseQuery (org.apache.lucene.search.MultiPhraseQuery)20 SynonymQuery (org.apache.lucene.search.SynonymQuery)19 WildcardQuery (org.apache.lucene.search.WildcardQuery)19 ArrayList (java.util.ArrayList)18 TermRangeQuery (org.apache.lucene.search.TermRangeQuery)18 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)17 MultiTermQuery (org.apache.lucene.search.MultiTermQuery)16