Search in sources :

Example 81 with BooleanClause

use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.

the class SolrQueryParserBase method getBooleanQuery.

/**
   * Factory method for generating query, given a set of clauses.
   * By default creates a boolean query composed of clauses passed in.
   *
   * Can be overridden by extending classes, to modify query being
   * returned.
   *
   * @param clauses List that contains {@link org.apache.lucene.search.BooleanClause} instances
   *    to join.
   *
   * @return Resulting {@link org.apache.lucene.search.Query} object.
   */
protected Query getBooleanQuery(List<BooleanClause> clauses) throws SyntaxError {
    if (clauses.size() == 0) {
        // all clause words were filtered away by the analyzer.
        return null;
    }
    SchemaField sfield = null;
    List<RawQuery> fieldValues = null;
    boolean onlyRawQueries = true;
    int allRawQueriesTermCount = 0;
    for (BooleanClause clause : clauses) {
        if (clause.getQuery() instanceof RawQuery) {
            allRawQueriesTermCount += ((RawQuery) clause.getQuery()).getTermCount();
        } else {
            onlyRawQueries = false;
        }
    }
    boolean useTermsQuery = (flags & QParser.FLAG_FILTER) != 0 && allRawQueriesTermCount > TERMS_QUERY_THRESHOLD;
    BooleanQuery.Builder booleanBuilder = newBooleanQuery();
    Map<SchemaField, List<RawQuery>> fmap = new HashMap<>();
    for (BooleanClause clause : clauses) {
        Query subq = clause.getQuery();
        if (subq instanceof RawQuery) {
            if (clause.getOccur() != BooleanClause.Occur.SHOULD) {
                // We only collect optional terms for set queries.  Since this isn't optional,
                // convert the raw query to a normal query and handle as usual.
                clause = new BooleanClause(rawToNormal(subq), clause.getOccur());
            } else {
                // Optional raw query.
                RawQuery rawq = (RawQuery) subq;
                // only look up fmap and type info on a field change
                if (sfield != rawq.sfield) {
                    sfield = rawq.sfield;
                    fieldValues = fmap.get(sfield);
                    // the "useTermQuery" check.
                    if ((fieldValues == null && useTermsQuery) || !sfield.indexed()) {
                        fieldValues = new ArrayList<>(2);
                        fmap.put(sfield, fieldValues);
                    }
                }
                if (fieldValues != null) {
                    fieldValues.add(rawq);
                    continue;
                }
                clause = new BooleanClause(rawToNormal(subq), clause.getOccur());
            }
        }
        booleanBuilder.add(clause);
    }
    for (Map.Entry<SchemaField, List<RawQuery>> entry : fmap.entrySet()) {
        sfield = entry.getKey();
        fieldValues = entry.getValue();
        FieldType ft = sfield.getType();
        // TODO: pull more of this logic out to FieldType?  We would need to be able to add clauses to our existing booleanBuilder.
        int termCount = fieldValues.stream().mapToInt(RawQuery::getTermCount).sum();
        if ((sfield.indexed() && termCount < TERMS_QUERY_THRESHOLD) || termCount == 1) {
            // use boolean query instead
            for (RawQuery rawq : fieldValues) {
                Query subq;
                if (ft.isTokenized() && sfield.indexed()) {
                    boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField) ft).getAutoGeneratePhraseQueries();
                    boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField) ft).getEnableGraphQueries();
                    subq = newFieldQuery(getAnalyzer(), sfield.getName(), rawq.getJoinedExternalVal(), false, fieldAutoGenPhraseQueries, fieldEnableGraphQueries);
                    booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
                } else {
                    for (String externalVal : rawq.getExternalVals()) {
                        subq = ft.getFieldQuery(this.parser, sfield, externalVal);
                        booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
                    }
                }
            }
        } else {
            List<String> externalVals = fieldValues.stream().flatMap(rawq -> rawq.getExternalVals().stream()).collect(Collectors.toList());
            Query subq = ft.getSetQuery(this.parser, sfield, externalVals);
            // if this is everything, don't wrap in a boolean query
            if (onlyRawQueries && termCount == allRawQueriesTermCount)
                return subq;
            booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
        }
    }
    BooleanQuery bq = booleanBuilder.build();
    if (bq.clauses().size() == 1) {
        // Unwrap single SHOULD query
        BooleanClause clause = bq.clauses().iterator().next();
        if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
            return clause.getQuery();
        }
    }
    return bq;
}
Also used : Query(org.apache.lucene.search.Query) QParser(org.apache.solr.search.QParser) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) SolrConstantScoreQuery(org.apache.solr.search.SolrConstantScoreQuery) FieldType(org.apache.solr.schema.FieldType) Term(org.apache.lucene.index.Term) PhraseQuery(org.apache.lucene.search.PhraseQuery) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) QueryBuilder(org.apache.lucene.util.QueryBuilder) SolrException(org.apache.solr.common.SolrException) SchemaField(org.apache.solr.schema.SchemaField) RegexpQuery(org.apache.lucene.search.RegexpQuery) Operations(org.apache.lucene.util.automaton.Operations) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) SyntaxError(org.apache.solr.search.SyntaxError) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) Map(java.util.Map) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) EnumSet(java.util.EnumSet) Automata(org.apache.lucene.util.automaton.Automata) ReversedWildcardFilterFactory(org.apache.solr.analysis.ReversedWildcardFilterFactory) TokenizerChain(org.apache.solr.analysis.TokenizerChain) TextField(org.apache.solr.schema.TextField) Automaton(org.apache.lucene.util.automaton.Automaton) Analyzer(org.apache.lucene.analysis.Analyzer) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) Collectors(java.util.stream.Collectors) WildcardQuery(org.apache.lucene.search.WildcardQuery) BooleanClause(org.apache.lucene.search.BooleanClause) IndexSchema(org.apache.solr.schema.IndexSchema) List(java.util.List) FilterQuery(org.apache.solr.query.FilterQuery) StringReader(java.io.StringReader) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory) Collections(java.util.Collections) ReverseStringFilter(org.apache.lucene.analysis.reverse.ReverseStringFilter) Operator(org.apache.solr.parser.QueryParser.Operator) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) SolrConstantScoreQuery(org.apache.solr.search.SolrConstantScoreQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) FilterQuery(org.apache.solr.query.FilterQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) HashMap(java.util.HashMap) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) BooleanClause(org.apache.lucene.search.BooleanClause) TextField(org.apache.solr.schema.TextField) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Example 82 with BooleanClause

use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.

the class TestMoreLikeThis method testMultiFieldShouldReturnPerFieldBooleanQuery.

@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-7161")
public void testMultiFieldShouldReturnPerFieldBooleanQuery() throws Exception {
    IndexReader reader = null;
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    try {
        int maxQueryTerms = 25;
        String[] itShopItemForSale = new String[] { "watch", "ipod", "asrock", "imac", "macbookpro", "monitor", "keyboard", "mouse", "speakers" };
        String[] itShopItemNotForSale = new String[] { "tie", "trousers", "shoes", "skirt", "hat" };
        String[] clothesShopItemForSale = new String[] { "tie", "trousers", "shoes", "skirt", "hat" };
        String[] clothesShopItemNotForSale = new String[] { "watch", "ipod", "asrock", "imac", "macbookpro", "monitor", "keyboard", "mouse", "speakers" };
        // add series of shop docs
        RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
        for (int i = 0; i < 300; i++) {
            addShopDoc(writer, "it", itShopItemForSale, itShopItemNotForSale);
        }
        for (int i = 0; i < 300; i++) {
            addShopDoc(writer, "clothes", clothesShopItemForSale, clothesShopItemNotForSale);
        }
        // Input Document is a clothes shop
        int inputDocId = addShopDoc(writer, "clothes", clothesShopItemForSale, clothesShopItemNotForSale);
        reader = writer.getReader();
        writer.close();
        // setup MLT query
        MoreLikeThis mlt = new MoreLikeThis(reader);
        mlt.setAnalyzer(analyzer);
        mlt.setMaxQueryTerms(maxQueryTerms);
        mlt.setMinDocFreq(1);
        mlt.setMinTermFreq(1);
        mlt.setMinWordLen(1);
        mlt.setFieldNames(new String[] { FOR_SALE, NOT_FOR_SALE });
        // perform MLT query
        BooleanQuery query = (BooleanQuery) mlt.like(inputDocId);
        Collection<BooleanClause> clauses = query.clauses();
        Collection<BooleanClause> expectedClothesShopClauses = new ArrayList<BooleanClause>();
        for (String itemForSale : clothesShopItemForSale) {
            BooleanClause booleanClause = new BooleanClause(new TermQuery(new Term(FOR_SALE, itemForSale)), BooleanClause.Occur.SHOULD);
            expectedClothesShopClauses.add(booleanClause);
        }
        for (String itemNotForSale : clothesShopItemNotForSale) {
            BooleanClause booleanClause = new BooleanClause(new TermQuery(new Term(NOT_FOR_SALE, itemNotForSale)), BooleanClause.Occur.SHOULD);
            expectedClothesShopClauses.add(booleanClause);
        }
        for (BooleanClause expectedClause : expectedClothesShopClauses) {
            assertTrue(clauses.contains(expectedClause));
        }
    } finally {
        // clean up
        if (reader != null) {
            reader.close();
        }
        dir.close();
        analyzer.close();
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BooleanClause(org.apache.lucene.search.BooleanClause) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 83 with BooleanClause

use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.

the class TestMoreLikeThis method testMultiValues.

// LUCENE-5725
public void testMultiValues() throws Exception {
    MoreLikeThis mlt = new MoreLikeThis(reader);
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
    mlt.setAnalyzer(analyzer);
    mlt.setMinDocFreq(1);
    mlt.setMinTermFreq(1);
    mlt.setMinWordLen(1);
    mlt.setFieldNames(new String[] { "text" });
    BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader("lucene"), new StringReader("lucene release"), new StringReader("apache"), new StringReader("apache lucene"));
    Collection<BooleanClause> clauses = query.clauses();
    assertEquals("Expected 2 clauses only!", 2, clauses.size());
    for (BooleanClause clause : clauses) {
        Term term = ((TermQuery) clause.getQuery()).getTerm();
        assertTrue(Arrays.asList(new Term("text", "lucene"), new Term("text", "apache")).contains(term));
    }
    analyzer.close();
}
Also used : BooleanClause(org.apache.lucene.search.BooleanClause) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringReader(java.io.StringReader) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer)

Example 84 with BooleanClause

use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.

the class TestMoreLikeThis method testBoostFactor.

public void testBoostFactor() throws Throwable {
    Map<String, Float> originalValues = getOriginalValues();
    MoreLikeThis mlt = new MoreLikeThis(reader);
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    mlt.setAnalyzer(analyzer);
    mlt.setMinDocFreq(1);
    mlt.setMinTermFreq(1);
    mlt.setMinWordLen(1);
    mlt.setFieldNames(new String[] { "text" });
    mlt.setBoost(true);
    // this mean that every term boost factor will be multiplied by this
    // number
    float boostFactor = 5;
    mlt.setBoostFactor(boostFactor);
    BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader("lucene release"));
    Collection<BooleanClause> clauses = query.clauses();
    assertEquals("Expected " + originalValues.size() + " clauses.", originalValues.size(), clauses.size());
    for (BooleanClause clause : clauses) {
        BoostQuery bq = (BoostQuery) clause.getQuery();
        TermQuery tq = (TermQuery) bq.getQuery();
        Float termBoost = originalValues.get(tq.getTerm().text());
        assertNotNull("Expected term " + tq.getTerm().text(), termBoost);
        float totalBoost = termBoost * boostFactor;
        assertEquals("Expected boost of " + totalBoost + " for term '" + tq.getTerm().text() + "' got " + bq.getBoost(), totalBoost, bq.getBoost(), 0.0001);
    }
    analyzer.close();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BoostQuery(org.apache.lucene.search.BoostQuery) BooleanClause(org.apache.lucene.search.BooleanClause) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringReader(java.io.StringReader)

Example 85 with BooleanClause

use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.

the class TestMoreLikeThis method getOriginalValues.

private Map<String, Float> getOriginalValues() throws IOException {
    Map<String, Float> originalValues = new HashMap<>();
    MoreLikeThis mlt = new MoreLikeThis(reader);
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    mlt.setAnalyzer(analyzer);
    mlt.setMinDocFreq(1);
    mlt.setMinTermFreq(1);
    mlt.setMinWordLen(1);
    mlt.setFieldNames(new String[] { "text" });
    mlt.setBoost(true);
    BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader("lucene release"));
    Collection<BooleanClause> clauses = query.clauses();
    for (BooleanClause clause : clauses) {
        BoostQuery bq = (BoostQuery) clause.getQuery();
        TermQuery tq = (TermQuery) bq.getQuery();
        originalValues.put(tq.getTerm().text(), bq.getBoost());
    }
    analyzer.close();
    return originalValues;
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) HashMap(java.util.HashMap) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BoostQuery(org.apache.lucene.search.BoostQuery) BooleanClause(org.apache.lucene.search.BooleanClause) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringReader(java.io.StringReader)

Aggregations

BooleanClause (org.apache.lucene.search.BooleanClause)102 BooleanQuery (org.apache.lucene.search.BooleanQuery)92 Query (org.apache.lucene.search.Query)56 TermQuery (org.apache.lucene.search.TermQuery)46 Term (org.apache.lucene.index.Term)44 BoostQuery (org.apache.lucene.search.BoostQuery)33 ArrayList (java.util.ArrayList)23 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)21 PhraseQuery (org.apache.lucene.search.PhraseQuery)20 DisjunctionMaxQuery (org.apache.lucene.search.DisjunctionMaxQuery)19 SynonymQuery (org.apache.lucene.search.SynonymQuery)18 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)18 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)17 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)16 SpanQuery (org.apache.lucene.search.spans.SpanQuery)15 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)14 MatchNoDocsQuery (org.apache.lucene.search.MatchNoDocsQuery)14 WildcardQuery (org.apache.lucene.search.WildcardQuery)14 MultiPhraseQuery (org.apache.lucene.search.MultiPhraseQuery)13 PrefixQuery (org.apache.lucene.search.PrefixQuery)13