Search in sources :

Example 81 with BooleanQuery

use of org.apache.lucene.search.BooleanQuery in project lucene-solr by apache.

the class CachingNaiveBayesClassifier method getWordFreqForClassess.

private Map<BytesRef, Integer> getWordFreqForClassess(String word) throws IOException {
    Map<BytesRef, Integer> insertPoint;
    insertPoint = termCClassHitCache.get(word);
    // if we get the answer from the cache
    if (insertPoint != null) {
        if (!insertPoint.isEmpty()) {
            return insertPoint;
        }
    }
    Map<BytesRef, Integer> searched = new ConcurrentHashMap<>();
    // if we dont get the answer, but it's relevant we must search it and insert to the cache
    if (insertPoint != null || !justCachedTerms) {
        for (BytesRef cclass : cclasses) {
            BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
            BooleanQuery.Builder subQuery = new BooleanQuery.Builder();
            for (String textFieldName : textFieldNames) {
                subQuery.add(new BooleanClause(new TermQuery(new Term(textFieldName, word)), BooleanClause.Occur.SHOULD));
            }
            booleanQuery.add(new BooleanClause(subQuery.build(), BooleanClause.Occur.MUST));
            booleanQuery.add(new BooleanClause(new TermQuery(new Term(classFieldName, cclass)), BooleanClause.Occur.MUST));
            if (query != null) {
                booleanQuery.add(query, BooleanClause.Occur.MUST);
            }
            TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
            indexSearcher.search(booleanQuery.build(), totalHitCountCollector);
            int ret = totalHitCountCollector.getTotalHits();
            if (ret != 0) {
                searched.put(cclass, ret);
            }
        }
        if (insertPoint != null) {
            // threadsafe and concurrent write
            termCClassHitCache.put(word, searched);
        }
    }
    return searched;
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) BooleanClause(org.apache.lucene.search.BooleanClause) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) BytesRef(org.apache.lucene.util.BytesRef)

Example 82 with BooleanQuery

use of org.apache.lucene.search.BooleanQuery in project sppanblog4springboot by whoismy8023.

the class LuceneSearcher method getQuery.

/**
 * 获取Query 对象
 *
 * @param keyword
 * @param module
 * @return
 */
private Query getQuery(String keyword) {
    try {
        QueryParser queryParser1 = new QueryParser(Version.LUCENE_47, "content", analyzer);
        Query termQuery1 = queryParser1.parse(keyword);
        QueryParser queryParser2 = new QueryParser(Version.LUCENE_47, "title", analyzer);
        Query termQuery2 = queryParser2.parse(keyword);
        QueryParser queryParser3 = new QueryParser(Version.LUCENE_47, "summary", analyzer);
        Query termQuery3 = queryParser3.parse(keyword);
        BooleanQuery booleanClauses = new BooleanQuery();
        booleanClauses.add(new BooleanClause(termQuery1, BooleanClause.Occur.SHOULD));
        booleanClauses.add(new BooleanClause(termQuery2, BooleanClause.Occur.SHOULD));
        booleanClauses.add(new BooleanClause(termQuery3, BooleanClause.Occur.SHOULD));
        booleanClauses.setMinimumNumberShouldMatch(1);
        return booleanClauses;
    } catch (ParseException e) {
        e.printStackTrace();
    }
    return null;
}
Also used : BooleanClause(org.apache.lucene.search.BooleanClause) BooleanQuery(org.apache.lucene.search.BooleanQuery) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Query(org.apache.lucene.search.Query) BooleanQuery(org.apache.lucene.search.BooleanQuery) ParseException(org.apache.lucene.queryparser.classic.ParseException)

Example 83 with BooleanQuery

use of org.apache.lucene.search.BooleanQuery in project nifi by apache.

the class LineageQuery method computeLineageForFlowFiles.

public static Set<ProvenanceEventRecord> computeLineageForFlowFiles(final IndexManager indexManager, final File indexDirectory, final String lineageIdentifier, final Collection<String> flowFileUuids, final DocumentToEventConverter docsToEventConverter) throws IOException {
    if (requireNonNull(flowFileUuids).size() > MAX_LINEAGE_UUIDS) {
        throw new IllegalArgumentException(String.format("Cannot compute lineage for more than %s FlowFiles. This lineage contains %s.", MAX_LINEAGE_UUIDS, flowFileUuids.size()));
    }
    if (lineageIdentifier == null && (flowFileUuids == null || flowFileUuids.isEmpty())) {
        throw new IllegalArgumentException("Must specify either Lineage Identifier or FlowFile UUIDs to compute lineage");
    }
    final EventIndexSearcher searcher;
    try {
        searcher = indexManager.borrowIndexSearcher(indexDirectory);
        try {
            // Create a query for all Events related to the FlowFiles of interest. We do this by adding all ID's as
            // "SHOULD" clauses and then setting the minimum required to 1.
            final BooleanQuery flowFileIdQuery;
            if (flowFileUuids == null || flowFileUuids.isEmpty()) {
                flowFileIdQuery = null;
            } else {
                flowFileIdQuery = new BooleanQuery();
                for (final String flowFileUuid : flowFileUuids) {
                    flowFileIdQuery.add(new TermQuery(new Term(SearchableFields.FlowFileUUID.getSearchableFieldName(), flowFileUuid)), Occur.SHOULD);
                }
                flowFileIdQuery.setMinimumNumberShouldMatch(1);
            }
            final long searchStart = System.nanoTime();
            logger.debug("Searching {} for {}", indexDirectory, flowFileIdQuery);
            final TopDocs uuidQueryTopDocs = searcher.getIndexSearcher().search(flowFileIdQuery, MAX_QUERY_RESULTS);
            final long searchEnd = System.nanoTime();
            final Set<ProvenanceEventRecord> recs = docsToEventConverter.convert(uuidQueryTopDocs, searcher.getIndexSearcher().getIndexReader());
            final long readDocsEnd = System.nanoTime();
            logger.debug("Finished Lineage Query against {}; Lucene search took {} millis, reading records took {} millis", indexDirectory, TimeUnit.NANOSECONDS.toMillis(searchEnd - searchStart), TimeUnit.NANOSECONDS.toMillis(readDocsEnd - searchEnd));
            return recs;
        } finally {
            indexManager.returnIndexSearcher(searcher);
        }
    } catch (final FileNotFoundException fnfe) {
        // nothing has been indexed yet, or the data has already aged off
        logger.warn("Attempted to search Provenance Index {} but could not find the file due to {}", indexDirectory, fnfe);
        if (logger.isDebugEnabled()) {
            logger.warn("", fnfe);
        }
        return Collections.emptySet();
    }
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) EventIndexSearcher(org.apache.nifi.provenance.index.EventIndexSearcher) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) FileNotFoundException(java.io.FileNotFoundException) Term(org.apache.lucene.index.Term)

Example 84 with BooleanQuery

use of org.apache.lucene.search.BooleanQuery in project nifi by apache.

the class LuceneUtil method convertQuery.

public static org.apache.lucene.search.Query convertQuery(final org.apache.nifi.provenance.search.Query query) {
    if (query.getStartDate() == null && query.getEndDate() == null && query.getSearchTerms().isEmpty()) {
        return new MatchAllDocsQuery();
    }
    final BooleanQuery luceneQuery = new BooleanQuery();
    for (final SearchTerm searchTerm : query.getSearchTerms()) {
        final String searchValue = searchTerm.getValue();
        if (searchValue == null) {
            throw new IllegalArgumentException("Empty search value not allowed (for term '" + searchTerm.getSearchableField().getFriendlyName() + "')");
        }
        if (searchValue.contains("*") || searchValue.contains("?")) {
            luceneQuery.add(new BooleanClause(new WildcardQuery(new Term(searchTerm.getSearchableField().getSearchableFieldName(), searchTerm.getValue().toLowerCase())), Occur.MUST));
        } else {
            luceneQuery.add(new BooleanClause(new TermQuery(new Term(searchTerm.getSearchableField().getSearchableFieldName(), searchTerm.getValue().toLowerCase())), Occur.MUST));
        }
    }
    final Long minBytes = query.getMinFileSize() == null ? null : DataUnit.parseDataSize(query.getMinFileSize(), DataUnit.B).longValue();
    final Long maxBytes = query.getMaxFileSize() == null ? null : DataUnit.parseDataSize(query.getMaxFileSize(), DataUnit.B).longValue();
    if (minBytes != null || maxBytes != null) {
        luceneQuery.add(NumericRangeQuery.newLongRange(SearchableFields.FileSize.getSearchableFieldName(), minBytes, maxBytes, true, true), Occur.MUST);
    }
    final Long minDateTime = query.getStartDate() == null ? null : query.getStartDate().getTime();
    final Long maxDateTime = query.getEndDate() == null ? null : query.getEndDate().getTime();
    if (maxDateTime != null || minDateTime != null) {
        luceneQuery.add(NumericRangeQuery.newLongRange(SearchableFields.EventTime.getSearchableFieldName(), minDateTime, maxDateTime, true, true), Occur.MUST);
    }
    return luceneQuery;
}
Also used : BooleanClause(org.apache.lucene.search.BooleanClause) BooleanQuery(org.apache.lucene.search.BooleanQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) SearchTerm(org.apache.nifi.provenance.search.SearchTerm) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) SearchTerm(org.apache.nifi.provenance.search.SearchTerm)

Example 85 with BooleanQuery

use of org.apache.lucene.search.BooleanQuery in project entando-core by entando.

the class SearcherDAO method createQuery.

private Query createQuery(SearchEngineFilter filter) {
    BooleanQuery fieldQuery = new BooleanQuery();
    String key = filter.getKey();
    String attachmentKey = key + IIndexerDAO.ATTACHMENT_FIELD_SUFFIX;
    Object value = filter.getValue();
    if (null != value) {
        if (value instanceof String) {
            SearchEngineFilter.TextSearchOption option = filter.getTextSearchOption();
            if (null == option) {
                option = SearchEngineFilter.TextSearchOption.AT_LEAST_ONE_WORD;
            }
            String stringValue = value.toString();
            String[] values = stringValue.split("\\s+");
            if (!option.equals(SearchEngineFilter.TextSearchOption.EXACT)) {
                BooleanClause.Occur bc = BooleanClause.Occur.SHOULD;
                if (option.equals(SearchEngineFilter.TextSearchOption.ALL_WORDS)) {
                    bc = BooleanClause.Occur.MUST;
                } else if (option.equals(SearchEngineFilter.TextSearchOption.ANY_WORD)) {
                    bc = BooleanClause.Occur.MUST_NOT;
                }
                for (int i = 0; i < values.length; i++) {
                    TermQuery term = new TermQuery(new Term(key, values[i].toLowerCase()));
                    // NOTE: search lower case....
                    if (filter.isIncludeAttachments()) {
                        BooleanQuery compositeQuery = new BooleanQuery();
                        compositeQuery.add(term, BooleanClause.Occur.SHOULD);
                        TermQuery termAttachment = new TermQuery(new Term(attachmentKey, values[i].toLowerCase()));
                        compositeQuery.add(termAttachment, BooleanClause.Occur.SHOULD);
                        fieldQuery.add(compositeQuery, bc);
                    } else {
                        fieldQuery.add(term, bc);
                    }
                }
            } else {
                PhraseQuery phraseQuery = new PhraseQuery();
                for (int i = 0; i < values.length; i++) {
                    // NOTE: search lower case....
                    phraseQuery.add(new Term(key, values[i].toLowerCase()));
                }
                if (filter.isIncludeAttachments()) {
                    fieldQuery.add(phraseQuery, BooleanClause.Occur.SHOULD);
                    PhraseQuery phraseQuery2 = new PhraseQuery();
                    for (int i = 0; i < values.length; i++) {
                        // NOTE: search lower case....
                        phraseQuery2.add(new Term(attachmentKey, values[i].toLowerCase()));
                    }
                    fieldQuery.add(phraseQuery2, BooleanClause.Occur.SHOULD);
                } else {
                    return phraseQuery;
                }
            }
        } else if (value instanceof Date) {
            String toString = DateTools.timeToString(((Date) value).getTime(), DateTools.Resolution.MINUTE);
            TermQuery term = new TermQuery(new Term(filter.getKey(), toString));
            fieldQuery.add(term, BooleanClause.Occur.MUST);
        } else if (value instanceof Number) {
            TermQuery term = new TermQuery(new Term(filter.getKey(), value.toString()));
            fieldQuery.add(term, BooleanClause.Occur.MUST);
        }
    } else {
        if (filter.getStart() instanceof Number || filter.getEnd() instanceof Number) {
        // .............................. TODO
        } else {
            String start = null;
            String end = null;
            if (filter.getStart() instanceof Date || filter.getEnd() instanceof Date) {
                if (null != filter.getStart()) {
                    start = DateTools.timeToString(((Date) filter.getStart()).getTime(), DateTools.Resolution.MINUTE);
                }
                if (null != filter.getEnd()) {
                    end = DateTools.timeToString(((Date) filter.getEnd()).getTime(), DateTools.Resolution.MINUTE);
                }
            } else {
                start = (null != filter.getStart()) ? filter.getStart().toString().toLowerCase() : null;
                end = (null != filter.getEnd()) ? filter.getEnd().toString().toLowerCase() : null;
            }
            BytesRef byteStart = (null != start) ? new BytesRef(start.getBytes()) : null;
            BytesRef byteEnd = (null != end) ? new BytesRef(end.getBytes()) : null;
            TermRangeQuery range = new TermRangeQuery(filter.getKey(), byteStart, byteEnd, true, true);
            fieldQuery.add(range, BooleanClause.Occur.MUST);
        }
    }
    return fieldQuery;
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) Term(org.apache.lucene.index.Term) Date(java.util.Date) BooleanClause(org.apache.lucene.search.BooleanClause) SearchEngineFilter(org.entando.entando.aps.system.services.searchengine.SearchEngineFilter) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

BooleanQuery (org.apache.lucene.search.BooleanQuery)297 TermQuery (org.apache.lucene.search.TermQuery)176 Term (org.apache.lucene.index.Term)144 Query (org.apache.lucene.search.Query)129 BooleanClause (org.apache.lucene.search.BooleanClause)89 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)70 BoostQuery (org.apache.lucene.search.BoostQuery)58 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)52 PhraseQuery (org.apache.lucene.search.PhraseQuery)50 ArrayList (java.util.ArrayList)47 TopDocs (org.apache.lucene.search.TopDocs)47 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)43 WildcardQuery (org.apache.lucene.search.WildcardQuery)42 IndexSearcher (org.apache.lucene.search.IndexSearcher)40 IndexReader (org.apache.lucene.index.IndexReader)39 PrefixQuery (org.apache.lucene.search.PrefixQuery)39 Test (org.junit.Test)39 TermRangeQuery (org.apache.lucene.search.TermRangeQuery)38 Document (org.apache.lucene.document.Document)36 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)33