Search in sources :

Example 46 with BooleanClause

use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.

the class TestMoreLikeThis method testTopN.

public void testTopN() throws Exception {
    int numDocs = 100;
    int topN = 25;
    // add series of docs with terms of decreasing df
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    for (int i = 0; i < numDocs; i++) {
        addDoc(writer, generateStrSeq(0, i + 1));
    }
    IndexReader reader = writer.getReader();
    writer.close();
    // setup MLT query
    MoreLikeThis mlt = new MoreLikeThis(reader);
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    mlt.setAnalyzer(analyzer);
    mlt.setMaxQueryTerms(topN);
    mlt.setMinDocFreq(1);
    mlt.setMinTermFreq(1);
    mlt.setMinWordLen(1);
    mlt.setFieldNames(new String[] { "text" });
    // perform MLT query
    String likeText = "";
    for (String text : generateStrSeq(0, numDocs)) {
        likeText += text + " ";
    }
    BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader(likeText));
    // check best terms are topN of highest idf
    Collection<BooleanClause> clauses = query.clauses();
    assertEquals("Expected" + topN + "clauses only!", topN, clauses.size());
    Term[] expectedTerms = new Term[topN];
    int idx = 0;
    for (String text : generateStrSeq(numDocs - topN, topN)) {
        expectedTerms[idx++] = new Term("text", text);
    }
    for (BooleanClause clause : clauses) {
        Term term = ((TermQuery) clause.getQuery()).getTerm();
        assertTrue(Arrays.asList(expectedTerms).contains(term));
    }
    // clean up
    reader.close();
    dir.close();
    analyzer.close();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BooleanClause(org.apache.lucene.search.BooleanClause) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) StringReader(java.io.StringReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 47 with BooleanClause

use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.

the class CloudMLTQParser method parse.

public Query parse() {
    String id = localParams.get(QueryParsing.V);
    // Do a Real Time Get for the document
    SolrDocument doc = getDocument(id);
    if (doc == null) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " + "document with id [" + id + "]");
    }
    String[] qf = localParams.getParams("qf");
    Map<String, Float> boostFields = new HashMap<>();
    MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
    mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
    mlt.setMinDocFreq(localParams.getInt("mindf", 0));
    mlt.setMinWordLen(localParams.getInt("minwl", MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
    mlt.setMaxWordLen(localParams.getInt("maxwl", MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
    mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
    mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
    mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
    Boolean boost = localParams.getBool("boost", MoreLikeThis.DEFAULT_BOOST);
    mlt.setBoost(boost);
    mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
    Map<String, Collection<Object>> filteredDocument = new HashMap<>();
    String[] fieldNames;
    if (qf != null) {
        ArrayList<String> fields = new ArrayList();
        for (String fieldName : qf) {
            if (!StringUtils.isEmpty(fieldName)) {
                String[] strings = splitList.split(fieldName);
                for (String string : strings) {
                    if (!StringUtils.isEmpty(string)) {
                        fields.add(string);
                    }
                }
            }
        }
        // Parse field names and boosts from the fields
        boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
        fieldNames = boostFields.keySet().toArray(new String[0]);
    } else {
        ArrayList<String> fields = new ArrayList();
        for (String field : doc.getFieldNames()) {
            // Only use fields that are stored and have an explicit analyzer.
            // This makes sense as the query uses tf/idf/.. for query construction.
            // We might want to relook and change this in the future though.
            SchemaField f = req.getSchema().getFieldOrNull(field);
            if (f != null && f.stored() && f.getType().isExplicitAnalyzer()) {
                fields.add(field);
            }
        }
        fieldNames = fields.toArray(new String[0]);
    }
    if (fieldNames.length < 1) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "MoreLikeThis requires at least one similarity field: qf");
    }
    mlt.setFieldNames(fieldNames);
    for (String field : fieldNames) {
        Collection<Object> fieldValues = doc.getFieldValues(field);
        if (fieldValues != null) {
            Collection<Object> values = new ArrayList();
            for (Object val : fieldValues) {
                if (val instanceof IndexableField) {
                    values.add(((IndexableField) val).stringValue());
                } else {
                    values.add(val);
                }
            }
            filteredDocument.put(field, values);
        }
    }
    try {
        Query rawMLTQuery = mlt.like(filteredDocument);
        BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
        if (boost && boostFields.size() > 0) {
            BooleanQuery.Builder newQ = new BooleanQuery.Builder();
            newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
            for (BooleanClause clause : boostedMLTQuery) {
                Query q = clause.getQuery();
                float originalBoost = 1f;
                if (q instanceof BoostQuery) {
                    BoostQuery bq = (BoostQuery) q;
                    q = bq.getQuery();
                    originalBoost = bq.getBoost();
                }
                Float fieldBoost = boostFields.get(((TermQuery) q).getTerm().field());
                q = ((fieldBoost != null) ? new BoostQuery(q, fieldBoost * originalBoost) : clause.getQuery());
                newQ.add(q, clause.getOccur());
            }
            boostedMLTQuery = newQ.build();
        }
        // exclude current document from results
        BooleanQuery.Builder realMLTQuery = new BooleanQuery.Builder();
        realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
        realMLTQuery.add(createIdQuery(req.getSchema().getUniqueKeyField().getName(), id), BooleanClause.Occur.MUST_NOT);
        return realMLTQuery.build();
    } catch (IOException e) {
        e.printStackTrace();
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Bad Request");
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) HashMap(java.util.HashMap) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) ArrayList(java.util.ArrayList) MoreLikeThis(org.apache.lucene.queries.mlt.MoreLikeThis) BoostQuery(org.apache.lucene.search.BoostQuery) SolrDocument(org.apache.solr.common.SolrDocument) SolrException(org.apache.solr.common.SolrException) TermQuery(org.apache.lucene.search.TermQuery) IOException(java.io.IOException) SchemaField(org.apache.solr.schema.SchemaField) IndexableField(org.apache.lucene.index.IndexableField) BooleanClause(org.apache.lucene.search.BooleanClause) Collection(java.util.Collection)

Example 48 with BooleanClause

use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.

the class SimpleMLTQParser method parse.

public Query parse() {
    String defaultField = req.getSchema().getUniqueKeyField().getName();
    String uniqueValue = localParams.get(QueryParsing.V);
    String[] qf = localParams.getParams("qf");
    SolrIndexSearcher searcher = req.getSearcher();
    Query docIdQuery = createIdQuery(defaultField, uniqueValue);
    Map<String, Float> boostFields = new HashMap<>();
    try {
        TopDocs td = searcher.search(docIdQuery, 1);
        if (td.totalHits != 1)
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " + "document with id [" + uniqueValue + "]");
        ScoreDoc[] scoreDocs = td.scoreDocs;
        MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
        mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
        mlt.setMinDocFreq(localParams.getInt("mindf", MoreLikeThis.DEFAULT_MIN_DOC_FREQ));
        mlt.setMinWordLen(localParams.getInt("minwl", MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
        mlt.setMaxWordLen(localParams.getInt("maxwl", MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
        mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
        mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
        mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
        Boolean boost = localParams.getBool("boost", false);
        mlt.setBoost(boost);
        String[] fieldNames;
        if (qf != null) {
            ArrayList<String> fields = new ArrayList<>();
            for (String fieldName : qf) {
                if (!StringUtils.isEmpty(fieldName)) {
                    String[] strings = splitList.split(fieldName);
                    for (String string : strings) {
                        if (!StringUtils.isEmpty(string)) {
                            fields.add(string);
                        }
                    }
                }
            }
            // Parse field names and boosts from the fields
            boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
            fieldNames = boostFields.keySet().toArray(new String[0]);
        } else {
            Map<String, SchemaField> fieldDefinitions = req.getSearcher().getSchema().getFields();
            ArrayList<String> fields = new ArrayList();
            for (String fieldName : fieldDefinitions.keySet()) {
                if (fieldDefinitions.get(fieldName).indexed() && fieldDefinitions.get(fieldName).stored())
                    if (fieldDefinitions.get(fieldName).getType().getNumberType() == null)
                        fields.add(fieldName);
            }
            fieldNames = fields.toArray(new String[0]);
        }
        if (fieldNames.length < 1) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "MoreLikeThis requires at least one similarity field: qf");
        }
        mlt.setFieldNames(fieldNames);
        mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
        Query rawMLTQuery = mlt.like(scoreDocs[0].doc);
        BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
        if (boost && boostFields.size() > 0) {
            BooleanQuery.Builder newQ = new BooleanQuery.Builder();
            newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
            for (BooleanClause clause : boostedMLTQuery) {
                Query q = clause.getQuery();
                float originalBoost = 1f;
                if (q instanceof BoostQuery) {
                    BoostQuery bq = (BoostQuery) q;
                    q = bq.getQuery();
                    originalBoost = bq.getBoost();
                }
                Float fieldBoost = boostFields.get(((TermQuery) q).getTerm().field());
                q = ((fieldBoost != null) ? new BoostQuery(q, fieldBoost * originalBoost) : clause.getQuery());
                newQ.add(q, clause.getOccur());
            }
            boostedMLTQuery = newQ.build();
        }
        // exclude current document from results
        BooleanQuery.Builder realMLTQuery = new BooleanQuery.Builder();
        realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
        realMLTQuery.add(docIdQuery, BooleanClause.Occur.MUST_NOT);
        return realMLTQuery.build();
    } catch (IOException e) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request" + e.getMessage());
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) HashMap(java.util.HashMap) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) ArrayList(java.util.ArrayList) MoreLikeThis(org.apache.lucene.queries.mlt.MoreLikeThis) BoostQuery(org.apache.lucene.search.BoostQuery) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) SolrException(org.apache.solr.common.SolrException) TermQuery(org.apache.lucene.search.TermQuery) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) IOException(java.io.IOException) SchemaField(org.apache.solr.schema.SchemaField) BooleanClause(org.apache.lucene.search.BooleanClause)

Example 49 with BooleanClause

use of org.apache.lucene.search.BooleanClause in project elasticsearch by elastic.

the class SimpleQueryParser method newPossiblyAnalyzedQuery.

/**
     * Analyze the given string using its analyzer, constructing either a
     * {@code PrefixQuery} or a {@code BooleanQuery} made up
     * of {@code TermQuery}s and {@code PrefixQuery}s
     */
private Query newPossiblyAnalyzedQuery(String field, String termStr) {
    List<List<BytesRef>> tlist = new ArrayList<>();
    // get Analyzer from superclass and tokenize the term
    try (TokenStream source = getAnalyzer().tokenStream(field, termStr)) {
        source.reset();
        List<BytesRef> currentPos = new ArrayList<>();
        CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posAtt = source.addAttribute(PositionIncrementAttribute.class);
        try {
            boolean hasMoreTokens = source.incrementToken();
            while (hasMoreTokens) {
                if (currentPos.isEmpty() == false && posAtt.getPositionIncrement() > 0) {
                    tlist.add(currentPos);
                    currentPos = new ArrayList<>();
                }
                final BytesRef term = getAnalyzer().normalize(field, termAtt.toString());
                currentPos.add(term);
                hasMoreTokens = source.incrementToken();
            }
            if (currentPos.isEmpty() == false) {
                tlist.add(currentPos);
            }
        } catch (IOException e) {
        // ignore
        // TODO: we should not ignore the exception and return a prefix query with the original term ?
        }
    } catch (IOException e) {
        // Bail on any exceptions, going with a regular prefix query
        return new PrefixQuery(new Term(field, termStr));
    }
    if (tlist.size() == 0) {
        return null;
    }
    if (tlist.size() == 1 && tlist.get(0).size() == 1) {
        return new PrefixQuery(new Term(field, tlist.get(0).get(0)));
    }
    // build a boolean query with prefix on the last position only.
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    for (int pos = 0; pos < tlist.size(); pos++) {
        List<BytesRef> plist = tlist.get(pos);
        boolean isLastPos = (pos == tlist.size() - 1);
        Query posQuery;
        if (plist.size() == 1) {
            if (isLastPos) {
                posQuery = new PrefixQuery(new Term(field, plist.get(0)));
            } else {
                posQuery = newTermQuery(new Term(field, plist.get(0)));
            }
        } else if (isLastPos == false) {
            // build a synonym query for terms in the same position.
            Term[] terms = new Term[plist.size()];
            for (int i = 0; i < plist.size(); i++) {
                terms[i] = new Term(field, plist.get(i));
            }
            posQuery = new SynonymQuery(terms);
        } else {
            BooleanQuery.Builder innerBuilder = new BooleanQuery.Builder();
            for (BytesRef token : plist) {
                innerBuilder.add(new BooleanClause(new PrefixQuery(new Term(field, token)), BooleanClause.Occur.SHOULD));
            }
            posQuery = innerBuilder.setDisableCoord(true).build();
        }
        builder.add(new BooleanClause(posQuery, getDefaultOperator()));
    }
    return builder.build();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Term(org.apache.lucene.index.Term) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute) BooleanClause(org.apache.lucene.search.BooleanClause) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) PrefixQuery(org.apache.lucene.search.PrefixQuery) ArrayList(java.util.ArrayList) List(java.util.List) BytesRef(org.apache.lucene.util.BytesRef)

Example 50 with BooleanClause

use of org.apache.lucene.search.BooleanClause in project elasticsearch by elastic.

the class MapperQueryParser method getFuzzyQuery.

protected Query getFuzzyQuery(String field, String termStr, String minSimilarity) throws ParseException {
    Collection<String> fields = extractMultiFields(field);
    if (fields != null) {
        if (fields.size() == 1) {
            return getFuzzyQuerySingle(fields.iterator().next(), termStr, minSimilarity);
        }
        if (settings.useDisMax()) {
            List<Query> queries = new ArrayList<>();
            boolean added = false;
            for (String mField : fields) {
                Query q = getFuzzyQuerySingle(mField, termStr, minSimilarity);
                if (q != null) {
                    added = true;
                    queries.add(applyBoost(mField, q));
                }
            }
            if (!added) {
                return null;
            }
            return new DisjunctionMaxQuery(queries, settings.tieBreaker());
        } else {
            List<BooleanClause> clauses = new ArrayList<>();
            for (String mField : fields) {
                Query q = getFuzzyQuerySingle(mField, termStr, minSimilarity);
                if (q != null) {
                    clauses.add(new BooleanClause(applyBoost(mField, q), BooleanClause.Occur.SHOULD));
                }
            }
            return getBooleanQueryCoordDisabled(clauses);
        }
    } else {
        return getFuzzyQuerySingle(field, termStr, minSimilarity);
    }
}
Also used : BooleanClause(org.apache.lucene.search.BooleanClause) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) ArrayList(java.util.ArrayList)

Aggregations

BooleanClause (org.apache.lucene.search.BooleanClause)102 BooleanQuery (org.apache.lucene.search.BooleanQuery)92 Query (org.apache.lucene.search.Query)56 TermQuery (org.apache.lucene.search.TermQuery)46 Term (org.apache.lucene.index.Term)44 BoostQuery (org.apache.lucene.search.BoostQuery)33 ArrayList (java.util.ArrayList)23 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)21 PhraseQuery (org.apache.lucene.search.PhraseQuery)20 DisjunctionMaxQuery (org.apache.lucene.search.DisjunctionMaxQuery)19 SynonymQuery (org.apache.lucene.search.SynonymQuery)18 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)18 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)17 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)16 SpanQuery (org.apache.lucene.search.spans.SpanQuery)15 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)14 MatchNoDocsQuery (org.apache.lucene.search.MatchNoDocsQuery)14 WildcardQuery (org.apache.lucene.search.WildcardQuery)14 MultiPhraseQuery (org.apache.lucene.search.MultiPhraseQuery)13 PrefixQuery (org.apache.lucene.search.PrefixQuery)13