Search in sources :

Example 91 with BooleanClause

use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.

the class SimpleNaiveBayesClassifier method getWordFreqForClass.

/**
   * Returns the number of documents of the input class ( from the whole index or from a subset)
   * that contains the word ( in a specific field or in all the fields if no one selected)
   * @param word the token produced by the analyzer
   * @param term the term representing the class
   * @return the number of documents of the input class
   * @throws IOException if a low level I/O problem happens
   */
private int getWordFreqForClass(String word, Term term) throws IOException {
    BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
    BooleanQuery.Builder subQuery = new BooleanQuery.Builder();
    for (String textFieldName : textFieldNames) {
        subQuery.add(new BooleanClause(new TermQuery(new Term(textFieldName, word)), BooleanClause.Occur.SHOULD));
    }
    booleanQuery.add(new BooleanClause(subQuery.build(), BooleanClause.Occur.MUST));
    booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST));
    if (query != null) {
        booleanQuery.add(query, BooleanClause.Occur.MUST);
    }
    TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
    indexSearcher.search(booleanQuery.build(), totalHitCountCollector);
    return totalHitCountCollector.getTotalHits();
}
Also used : BooleanClause(org.apache.lucene.search.BooleanClause) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) Term(org.apache.lucene.index.Term)

Example 92 with BooleanClause

use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.

the class SimpleNaiveBayesClassifier method countDocsWithClass.

/**
   * count the number of documents in the index having at least a value for the 'class' field
   *
   * @return the no. of documents having a value for the 'class' field
   * @throws IOException if accessing to term vectors or search fails
   */
protected int countDocsWithClass() throws IOException {
    Terms terms = MultiFields.getTerms(this.indexReader, this.classFieldName);
    int docCount;
    if (terms == null || terms.getDocCount() == -1) {
        // in case codec doesn't support getDocCount
        TotalHitCountCollector classQueryCountCollector = new TotalHitCountCollector();
        BooleanQuery.Builder q = new BooleanQuery.Builder();
        q.add(new BooleanClause(new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))), BooleanClause.Occur.MUST));
        if (query != null) {
            q.add(query, BooleanClause.Occur.MUST);
        }
        indexSearcher.search(q.build(), classQueryCountCollector);
        docCount = classQueryCountCollector.getTotalHits();
    } else {
        docCount = terms.getDocCount();
    }
    return docCount;
}
Also used : BooleanClause(org.apache.lucene.search.BooleanClause) BooleanQuery(org.apache.lucene.search.BooleanQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) Terms(org.apache.lucene.index.Terms) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) Term(org.apache.lucene.index.Term)

Example 93 with BooleanClause

use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.

the class KNearestNeighborDocumentClassifier method knnSearch.

/**
   * Returns the top k results from a More Like This query based on the input document
   *
   * @param document the document to use for More Like This search
   * @return the top results for the MLT query
   * @throws IOException If there is a low-level I/O error
   */
private TopDocs knnSearch(Document document) throws IOException {
    BooleanQuery.Builder mltQuery = new BooleanQuery.Builder();
    for (String fieldName : textFieldNames) {
        String boost = null;
        if (fieldName.contains("^")) {
            String[] field2boost = fieldName.split("\\^");
            fieldName = field2boost[0];
            boost = field2boost[1];
        }
        String[] fieldValues = document.getValues(fieldName);
        // we want always to use the boost coming from TF * IDF of the term
        mlt.setBoost(true);
        if (boost != null) {
            // this is an additional multiplicative boost coming from the field boost
            mlt.setBoostFactor(Float.parseFloat(boost));
        }
        mlt.setAnalyzer(field2analyzer.get(fieldName));
        for (String fieldContent : fieldValues) {
            mltQuery.add(new BooleanClause(mlt.like(fieldName, new StringReader(fieldContent)), BooleanClause.Occur.SHOULD));
        }
    }
    Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
    mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST));
    if (query != null) {
        mltQuery.add(query, BooleanClause.Occur.MUST);
    }
    return indexSearcher.search(mltQuery.build(), k);
}
Also used : BooleanClause(org.apache.lucene.search.BooleanClause) BooleanQuery(org.apache.lucene.search.BooleanQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) Query(org.apache.lucene.search.Query) WildcardQuery(org.apache.lucene.search.WildcardQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) StringReader(java.io.StringReader) Term(org.apache.lucene.index.Term)

Example 94 with BooleanClause

use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.

the class QueryBuilder method addMinShouldMatchToBoolean.

/**
   * Rebuilds a boolean query and sets a new minimum number should match value.
   */
private BooleanQuery addMinShouldMatchToBoolean(BooleanQuery query, float fraction) {
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.setMinimumNumberShouldMatch((int) (fraction * query.clauses().size()));
    for (BooleanClause clause : query) {
        builder.add(clause);
    }
    return builder.build();
}
Also used : BooleanClause(org.apache.lucene.search.BooleanClause) BooleanQuery(org.apache.lucene.search.BooleanQuery)

Example 95 with BooleanClause

use of org.apache.lucene.search.BooleanClause in project lucene-solr by apache.

the class MultiTermHighlighting method extractAutomata.

/**
   * Extracts MultiTermQueries that match the provided field predicate.
   * Returns equivalent automata that will match terms.
   */
public static CharacterRunAutomaton[] extractAutomata(Query query, Predicate<String> fieldMatcher, boolean lookInSpan, Function<Query, Collection<Query>> preRewriteFunc) {
    // TODO Lucene needs a Query visitor API!  LUCENE-3041
    List<CharacterRunAutomaton> list = new ArrayList<>();
    Collection<Query> customSubQueries = preRewriteFunc.apply(query);
    if (customSubQueries != null) {
        for (Query sub : customSubQueries) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (query instanceof BooleanQuery) {
        for (BooleanClause clause : (BooleanQuery) query) {
            if (!clause.isProhibited()) {
                list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
            }
        }
    } else if (query instanceof ConstantScoreQuery) {
        list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof BoostQuery) {
        list.addAll(Arrays.asList(extractAutomata(((BoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof DisjunctionMaxQuery) {
        for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanOrQuery) {
        for (Query sub : ((SpanOrQuery) query).getClauses()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanNearQuery) {
        for (Query sub : ((SpanNearQuery) query).getClauses()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanNotQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanBoostQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanBoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
        list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof PrefixQuery) {
        final PrefixQuery pq = (PrefixQuery) query;
        Term prefix = pq.getPrefix();
        if (fieldMatcher.test(prefix.field())) {
            list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()), Automata.makeAnyString())) {

                @Override
                public String toString() {
                    return pq.toString();
                }
            });
        }
    } else if (query instanceof FuzzyQuery) {
        final FuzzyQuery fq = (FuzzyQuery) query;
        if (fieldMatcher.test(fq.getField())) {
            String utf16 = fq.getTerm().text();
            int[] termText = new int[utf16.codePointCount(0, utf16.length())];
            for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
                termText[j++] = cp = utf16.codePointAt(i);
            }
            int termLength = termText.length;
            int prefixLength = Math.min(fq.getPrefixLength(), termLength);
            String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
            LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
            String prefix = UnicodeUtil.newString(termText, 0, prefixLength);
            Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix);
            list.add(new CharacterRunAutomaton(automaton) {

                @Override
                public String toString() {
                    return fq.toString();
                }
            });
        }
    } else if (query instanceof TermRangeQuery) {
        final TermRangeQuery tq = (TermRangeQuery) query;
        if (fieldMatcher.test(tq.getField())) {
            final CharsRef lowerBound;
            if (tq.getLowerTerm() == null) {
                lowerBound = null;
            } else {
                lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString());
            }
            final CharsRef upperBound;
            if (tq.getUpperTerm() == null) {
                upperBound = null;
            } else {
                upperBound = new CharsRef(tq.getUpperTerm().utf8ToString());
            }
            final boolean includeLower = tq.includesLower();
            final boolean includeUpper = tq.includesUpper();
            final CharsRef scratch = new CharsRef();
            @SuppressWarnings("deprecation") final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();
            // this is *not* an automaton, but its very simple
            list.add(new CharacterRunAutomaton(Automata.makeEmpty()) {

                @Override
                public boolean run(char[] s, int offset, int length) {
                    scratch.chars = s;
                    scratch.offset = offset;
                    scratch.length = length;
                    if (lowerBound != null) {
                        int cmp = comparator.compare(scratch, lowerBound);
                        if (cmp < 0 || (!includeLower && cmp == 0)) {
                            return false;
                        }
                    }
                    if (upperBound != null) {
                        int cmp = comparator.compare(scratch, upperBound);
                        if (cmp > 0 || (!includeUpper && cmp == 0)) {
                            return false;
                        }
                    }
                    return true;
                }

                @Override
                public String toString() {
                    return tq.toString();
                }
            });
        }
    } else if (query instanceof AutomatonQuery) {
        final AutomatonQuery aq = (AutomatonQuery) query;
        if (fieldMatcher.test(aq.getField())) {
            list.add(new CharacterRunAutomaton(aq.getAutomaton()) {

                @Override
                public String toString() {
                    return aq.toString();
                }
            });
        }
    }
    return list.toArray(new CharacterRunAutomaton[list.size()]);
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) SpanPositionCheckQuery(org.apache.lucene.search.spans.SpanPositionCheckQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) ArrayList(java.util.ArrayList) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) BoostQuery(org.apache.lucene.search.BoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) Comparator(java.util.Comparator) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) Automaton(org.apache.lucene.util.automaton.Automaton) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) LevenshteinAutomata(org.apache.lucene.util.automaton.LevenshteinAutomata) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) CharsRef(org.apache.lucene.util.CharsRef) BooleanClause(org.apache.lucene.search.BooleanClause) SpanPositionCheckQuery(org.apache.lucene.search.spans.SpanPositionCheckQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Aggregations

BooleanClause (org.apache.lucene.search.BooleanClause)102 BooleanQuery (org.apache.lucene.search.BooleanQuery)92 Query (org.apache.lucene.search.Query)56 TermQuery (org.apache.lucene.search.TermQuery)46 Term (org.apache.lucene.index.Term)44 BoostQuery (org.apache.lucene.search.BoostQuery)33 ArrayList (java.util.ArrayList)23 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)21 PhraseQuery (org.apache.lucene.search.PhraseQuery)20 DisjunctionMaxQuery (org.apache.lucene.search.DisjunctionMaxQuery)19 SynonymQuery (org.apache.lucene.search.SynonymQuery)18 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)18 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)17 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)16 SpanQuery (org.apache.lucene.search.spans.SpanQuery)15 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)14 MatchNoDocsQuery (org.apache.lucene.search.MatchNoDocsQuery)14 WildcardQuery (org.apache.lucene.search.WildcardQuery)14 MultiPhraseQuery (org.apache.lucene.search.MultiPhraseQuery)13 PrefixQuery (org.apache.lucene.search.PrefixQuery)13