Search in sources :

Example 21 with DisjunctionMaxQuery

use of org.apache.lucene.search.DisjunctionMaxQuery in project ddf by codice.

the class GeoNamesQueryLuceneIndex method createQuery.

protected Query createQuery(final String queryString) throws ParseException {
    final StandardAnalyzer standardAnalyzer = new StandardAnalyzer();
    final QueryParser nameQueryParser = new QueryParser(GeoNamesLuceneConstants.NAME_FIELD, standardAnalyzer);
    nameQueryParser.setEnablePositionIncrements(false);
    /* For the name, we construct a query searching for exactly the query string (the phrase
        query), a query searching for all the terms in the query string (the AND query), and a
        query searching for any of the terms in the query string (the OR query). We take the
        maximum of the scores generated by these three queries and use that as the score for the
        name. */
    // Surround with quotes so Lucene looks for the words in the query as a phrase.
    // Phrase query gets the biggest boost - 3.2 was obtained after some experimentation.
    final Query phraseNameQuery = new BoostQuery(nameQueryParser.parse("\"" + queryString + "\""), 3.2f);
    // By default, QueryParser uses OR to separate terms.
    // We give OR queries the lowest boost because they're not as good as phrase matches or
    // AND matches - 1 (the default boost value) was obtained after some experimentation.
    final Query orNameQuery = nameQueryParser.parse(queryString);
    nameQueryParser.setDefaultOperator(QueryParser.AND_OPERATOR);
    // We give AND queries the second-biggest boost because they're better than OR matches but
    // not as good as phrase matches - 2 was obtained after some experimentation.
    final Query andNameQuery = new BoostQuery(nameQueryParser.parse(queryString), 2f);
    final List<Query> nameQueryList = Arrays.asList(phraseNameQuery, orNameQuery, andNameQuery);
    // This query will score each document by the maximum of the three sub-queries.
    final Query nameQuery = new DisjunctionMaxQuery(nameQueryList, 0);
    final QueryParser alternateNamesQueryParser = new QueryParser(GeoNamesLuceneConstants.ALTERNATE_NAMES_FIELD, standardAnalyzer);
    // For the alternate names, we perform an AND query and an OR query, both of which are
    // boosted less than the name query because the alternate names are generally not as
    // important.
    // The OR query gets a lower boost - 0.5 was obtained after some experimentation.
    final Query orAlternateNamesQuery = new BoostQuery(alternateNamesQueryParser.parse(queryString), 0.5f);
    alternateNamesQueryParser.setDefaultOperator(QueryParser.AND_OPERATOR);
    // The AND query gets a higher boost - 1 (the default boost value) was obtained after some
    // experimentation.
    final Query andAlternateNamesQuery = alternateNamesQueryParser.parse(queryString);
    final List<Query> alternateNamesQueryList = Arrays.asList(orAlternateNamesQuery, andAlternateNamesQuery);
    // This query will score each document by the maximum of the two sub-queries.
    final Query alternateNamesQuery = new DisjunctionMaxQuery(alternateNamesQueryList, 0);
    final List<Query> queryList = Arrays.asList(nameQuery, alternateNamesQuery);
    // This query will score each document by the sum of the two sub-queries, since both the
    // name and the alternate names are important.
    // The boost values ensure that how well the query matches the name has a bigger impact on
    // the final score than how well it matches the alternate names.
    final DisjunctionMaxQuery disjunctionMaxQuery = new DisjunctionMaxQuery(queryList, 1.0f);
    // This is the boost we calculated at index time, and it is applied in the CustomScoreQuery.
    final FunctionQuery boostQuery = new FunctionQuery(new FloatFieldSource(GeoNamesLuceneConstants.BOOST_FIELD));
    return new CustomScoreQuery(disjunctionMaxQuery, boostQuery);
}
Also used : QueryParser(org.apache.lucene.queryparser.classic.QueryParser) FunctionQuery(org.apache.lucene.queries.function.FunctionQuery) Query(org.apache.lucene.search.Query) FunctionQuery(org.apache.lucene.queries.function.FunctionQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) FloatFieldSource(org.apache.lucene.queries.function.valuesource.FloatFieldSource) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) BoostQuery(org.apache.lucene.search.BoostQuery)

Example 22 with DisjunctionMaxQuery

use of org.apache.lucene.search.DisjunctionMaxQuery in project vertexium by visallo.

the class VertexiumMapperQueryParser method newFieldQuery.

@Override
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted) throws ParseException {
    field = field.replace(".", FIELDNAME_DOT_REPLACEMENT);
    if (field == null || field.length() == 0) {
        return super.newFieldQuery(analyzer, field, queryText, quoted);
    }
    Matcher m = PROPERTY_NAME_PATTERN.matcher(field);
    if (m.matches() && m.group(2) != null) {
        String visibility = fieldNameToVisibilityMap.getFieldVisibility(field);
        if (VisibilityUtils.canRead(visibility, authorizations)) {
            return super.newFieldQuery(analyzer, field, queryText, quoted);
        }
        return null;
    }
    String fieldPrefix = field + "_";
    List<Query> disjucts = new ArrayList<>();
    for (String fieldName : fieldNameToVisibilityMap.getFieldNames()) {
        if (fieldName.startsWith(fieldPrefix)) {
            String visibility = fieldNameToVisibilityMap.getFieldVisibility(fieldName);
            if (VisibilityUtils.canRead(visibility, authorizations)) {
                Query termQuery = super.newFieldQuery(analyzer, fieldName, queryText, quoted);
                disjucts.add(termQuery);
            }
        }
    }
    DisjunctionMaxQuery query = new DisjunctionMaxQuery(disjucts, 0.0f);
    if (query.getDisjuncts().size() == 0) {
        return super.newFieldQuery(analyzer, field, queryText, quoted);
    }
    return query;
}
Also used : Query(org.apache.lucene.search.Query) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) Matcher(java.util.regex.Matcher) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) ArrayList(java.util.ArrayList)

Example 23 with DisjunctionMaxQuery

use of org.apache.lucene.search.DisjunctionMaxQuery in project crate by crate.

the class MultiMatchQuery method blendTerms.

static Query blendTerms(QueryShardContext context, BytesRef[] values, Float commonTermsCutoff, float tieBreaker, FieldAndFieldType... blendedFields) {
    List<Query> queries = new ArrayList<>();
    Term[] terms = new Term[blendedFields.length * values.length];
    float[] blendedBoost = new float[blendedFields.length * values.length];
    int i = 0;
    for (FieldAndFieldType ft : blendedFields) {
        for (BytesRef term : values) {
            Query query;
            try {
                query = new TermQuery(new Term(ft.fieldType.name(), term));
            } catch (IllegalArgumentException e) {
                // field
                continue;
            } catch (ElasticsearchParseException parseException) {
                // the case
                if (parseException.getCause() instanceof IllegalArgumentException) {
                    continue;
                }
                throw parseException;
            }
            float boost = ft.boost;
            while (query instanceof BoostQuery) {
                BoostQuery bq = (BoostQuery) query;
                query = bq.getQuery();
                boost *= bq.getBoost();
            }
            if (query.getClass() == TermQuery.class) {
                terms[i] = ((TermQuery) query).getTerm();
                blendedBoost[i] = boost;
                i++;
            } else {
                if (boost != 1f && query instanceof MatchNoDocsQuery == false) {
                    query = new BoostQuery(query, boost);
                }
                queries.add(query);
            }
        }
    }
    if (i > 0) {
        terms = Arrays.copyOf(terms, i);
        blendedBoost = Arrays.copyOf(blendedBoost, i);
        if (commonTermsCutoff != null) {
            queries.add(BlendedTermQuery.commonTermsBlendedQuery(terms, blendedBoost, commonTermsCutoff));
        } else {
            queries.add(BlendedTermQuery.dismaxBlendedQuery(terms, blendedBoost, tieBreaker));
        }
    }
    if (queries.size() == 1) {
        return queries.get(0);
    } else {
        // TODO: can we improve this?
        return new DisjunctionMaxQuery(queries, 1.0f);
    }
}
Also used : BlendedTermQuery(org.apache.lucene.queries.BlendedTermQuery) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) BlendedTermQuery(org.apache.lucene.queries.BlendedTermQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) TermQuery(org.apache.lucene.search.TermQuery) BoostQuery(org.apache.lucene.search.BoostQuery) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) BoostQuery(org.apache.lucene.search.BoostQuery) ElasticsearchParseException(org.elasticsearch.ElasticsearchParseException) BytesRef(org.apache.lucene.util.BytesRef)

Example 24 with DisjunctionMaxQuery

use of org.apache.lucene.search.DisjunctionMaxQuery in project crate by crate.

the class MultiMatchQuery method blendPhrase.

/**
 * Expand a {@link PhraseQuery} to multiple fields that share the same analyzer.
 * Returns a {@link DisjunctionMaxQuery} with a disjunction for each expanded field.
 */
static Query blendPhrase(PhraseQuery query, float tiebreaker, FieldAndFieldType... fields) {
    List<Query> disjunctions = new ArrayList<>();
    for (FieldAndFieldType field : fields) {
        int[] positions = query.getPositions();
        Term[] terms = query.getTerms();
        PhraseQuery.Builder builder = new PhraseQuery.Builder();
        for (int i = 0; i < terms.length; i++) {
            builder.add(new Term(field.fieldType.name(), terms[i].bytes()), positions[i]);
        }
        Query q = builder.build();
        if (field.boost != MultiMatchQuery.DEFAULT_BOOST) {
            q = new BoostQuery(q, field.boost);
        }
        disjunctions.add(q);
    }
    return new DisjunctionMaxQuery(disjunctions, tiebreaker);
}
Also used : Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) BlendedTermQuery(org.apache.lucene.queries.BlendedTermQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) TermQuery(org.apache.lucene.search.TermQuery) BoostQuery(org.apache.lucene.search.BoostQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) BoostQuery(org.apache.lucene.search.BoostQuery)

Example 25 with DisjunctionMaxQuery

use of org.apache.lucene.search.DisjunctionMaxQuery in project crate by crate.

the class ElasticsearchAssertions method assertDisjunctionSubQuery.

public static <T extends Query> T assertDisjunctionSubQuery(Query query, Class<T> subqueryType, int i) {
    assertThat(query, instanceOf(DisjunctionMaxQuery.class));
    DisjunctionMaxQuery q = (DisjunctionMaxQuery) query;
    assertThat(q.getDisjuncts().size(), greaterThan(i));
    assertThat(q.getDisjuncts().get(i), instanceOf(subqueryType));
    return subqueryType.cast(q.getDisjuncts().get(i));
}
Also used : DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery)

Aggregations

DisjunctionMaxQuery (org.apache.lucene.search.DisjunctionMaxQuery)49 Query (org.apache.lucene.search.Query)38 BooleanQuery (org.apache.lucene.search.BooleanQuery)34 BoostQuery (org.apache.lucene.search.BoostQuery)30 TermQuery (org.apache.lucene.search.TermQuery)26 Term (org.apache.lucene.index.Term)25 ArrayList (java.util.ArrayList)22 PhraseQuery (org.apache.lucene.search.PhraseQuery)20 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)17 MatchNoDocsQuery (org.apache.lucene.search.MatchNoDocsQuery)16 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)15 SynonymQuery (org.apache.lucene.search.SynonymQuery)15 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)15 BooleanClause (org.apache.lucene.search.BooleanClause)13 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)13 SpanQuery (org.apache.lucene.search.spans.SpanQuery)12 MultiPhraseQuery (org.apache.lucene.search.MultiPhraseQuery)10 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)9 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)9 TopDocs (org.apache.lucene.search.TopDocs)9