use of org.apache.lucene.search.BooleanQuery in project lucene-solr by apache.
the class CachingNaiveBayesClassifier method getWordFreqForClassess.
private Map<BytesRef, Integer> getWordFreqForClassess(String word) throws IOException {
Map<BytesRef, Integer> insertPoint;
insertPoint = termCClassHitCache.get(word);
// if we get the answer from the cache
if (insertPoint != null) {
if (!insertPoint.isEmpty()) {
return insertPoint;
}
}
Map<BytesRef, Integer> searched = new ConcurrentHashMap<>();
// if we dont get the answer, but it's relevant we must search it and insert to the cache
if (insertPoint != null || !justCachedTerms) {
for (BytesRef cclass : cclasses) {
BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
BooleanQuery.Builder subQuery = new BooleanQuery.Builder();
for (String textFieldName : textFieldNames) {
subQuery.add(new BooleanClause(new TermQuery(new Term(textFieldName, word)), BooleanClause.Occur.SHOULD));
}
booleanQuery.add(new BooleanClause(subQuery.build(), BooleanClause.Occur.MUST));
booleanQuery.add(new BooleanClause(new TermQuery(new Term(classFieldName, cclass)), BooleanClause.Occur.MUST));
if (query != null) {
booleanQuery.add(query, BooleanClause.Occur.MUST);
}
TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
indexSearcher.search(booleanQuery.build(), totalHitCountCollector);
int ret = totalHitCountCollector.getTotalHits();
if (ret != 0) {
searched.put(cclass, ret);
}
}
if (insertPoint != null) {
// threadsafe and concurrent write
termCClassHitCache.put(word, searched);
}
}
return searched;
}
use of org.apache.lucene.search.BooleanQuery in project sppanblog4springboot by whoismy8023.
the class LuceneSearcher method getQuery.
/**
* 获取Query 对象
*
* @param keyword
* @param module
* @return
*/
private Query getQuery(String keyword) {
try {
QueryParser queryParser1 = new QueryParser(Version.LUCENE_47, "content", analyzer);
Query termQuery1 = queryParser1.parse(keyword);
QueryParser queryParser2 = new QueryParser(Version.LUCENE_47, "title", analyzer);
Query termQuery2 = queryParser2.parse(keyword);
QueryParser queryParser3 = new QueryParser(Version.LUCENE_47, "summary", analyzer);
Query termQuery3 = queryParser3.parse(keyword);
BooleanQuery booleanClauses = new BooleanQuery();
booleanClauses.add(new BooleanClause(termQuery1, BooleanClause.Occur.SHOULD));
booleanClauses.add(new BooleanClause(termQuery2, BooleanClause.Occur.SHOULD));
booleanClauses.add(new BooleanClause(termQuery3, BooleanClause.Occur.SHOULD));
booleanClauses.setMinimumNumberShouldMatch(1);
return booleanClauses;
} catch (ParseException e) {
e.printStackTrace();
}
return null;
}
use of org.apache.lucene.search.BooleanQuery in project nifi by apache.
the class LineageQuery method computeLineageForFlowFiles.
public static Set<ProvenanceEventRecord> computeLineageForFlowFiles(final IndexManager indexManager, final File indexDirectory, final String lineageIdentifier, final Collection<String> flowFileUuids, final DocumentToEventConverter docsToEventConverter) throws IOException {
if (requireNonNull(flowFileUuids).size() > MAX_LINEAGE_UUIDS) {
throw new IllegalArgumentException(String.format("Cannot compute lineage for more than %s FlowFiles. This lineage contains %s.", MAX_LINEAGE_UUIDS, flowFileUuids.size()));
}
if (lineageIdentifier == null && (flowFileUuids == null || flowFileUuids.isEmpty())) {
throw new IllegalArgumentException("Must specify either Lineage Identifier or FlowFile UUIDs to compute lineage");
}
final EventIndexSearcher searcher;
try {
searcher = indexManager.borrowIndexSearcher(indexDirectory);
try {
// Create a query for all Events related to the FlowFiles of interest. We do this by adding all ID's as
// "SHOULD" clauses and then setting the minimum required to 1.
final BooleanQuery flowFileIdQuery;
if (flowFileUuids == null || flowFileUuids.isEmpty()) {
flowFileIdQuery = null;
} else {
flowFileIdQuery = new BooleanQuery();
for (final String flowFileUuid : flowFileUuids) {
flowFileIdQuery.add(new TermQuery(new Term(SearchableFields.FlowFileUUID.getSearchableFieldName(), flowFileUuid)), Occur.SHOULD);
}
flowFileIdQuery.setMinimumNumberShouldMatch(1);
}
final long searchStart = System.nanoTime();
logger.debug("Searching {} for {}", indexDirectory, flowFileIdQuery);
final TopDocs uuidQueryTopDocs = searcher.getIndexSearcher().search(flowFileIdQuery, MAX_QUERY_RESULTS);
final long searchEnd = System.nanoTime();
final Set<ProvenanceEventRecord> recs = docsToEventConverter.convert(uuidQueryTopDocs, searcher.getIndexSearcher().getIndexReader());
final long readDocsEnd = System.nanoTime();
logger.debug("Finished Lineage Query against {}; Lucene search took {} millis, reading records took {} millis", indexDirectory, TimeUnit.NANOSECONDS.toMillis(searchEnd - searchStart), TimeUnit.NANOSECONDS.toMillis(readDocsEnd - searchEnd));
return recs;
} finally {
indexManager.returnIndexSearcher(searcher);
}
} catch (final FileNotFoundException fnfe) {
// nothing has been indexed yet, or the data has already aged off
logger.warn("Attempted to search Provenance Index {} but could not find the file due to {}", indexDirectory, fnfe);
if (logger.isDebugEnabled()) {
logger.warn("", fnfe);
}
return Collections.emptySet();
}
}
use of org.apache.lucene.search.BooleanQuery in project nifi by apache.
the class LuceneUtil method convertQuery.
public static org.apache.lucene.search.Query convertQuery(final org.apache.nifi.provenance.search.Query query) {
if (query.getStartDate() == null && query.getEndDate() == null && query.getSearchTerms().isEmpty()) {
return new MatchAllDocsQuery();
}
final BooleanQuery luceneQuery = new BooleanQuery();
for (final SearchTerm searchTerm : query.getSearchTerms()) {
final String searchValue = searchTerm.getValue();
if (searchValue == null) {
throw new IllegalArgumentException("Empty search value not allowed (for term '" + searchTerm.getSearchableField().getFriendlyName() + "')");
}
if (searchValue.contains("*") || searchValue.contains("?")) {
luceneQuery.add(new BooleanClause(new WildcardQuery(new Term(searchTerm.getSearchableField().getSearchableFieldName(), searchTerm.getValue().toLowerCase())), Occur.MUST));
} else {
luceneQuery.add(new BooleanClause(new TermQuery(new Term(searchTerm.getSearchableField().getSearchableFieldName(), searchTerm.getValue().toLowerCase())), Occur.MUST));
}
}
final Long minBytes = query.getMinFileSize() == null ? null : DataUnit.parseDataSize(query.getMinFileSize(), DataUnit.B).longValue();
final Long maxBytes = query.getMaxFileSize() == null ? null : DataUnit.parseDataSize(query.getMaxFileSize(), DataUnit.B).longValue();
if (minBytes != null || maxBytes != null) {
luceneQuery.add(NumericRangeQuery.newLongRange(SearchableFields.FileSize.getSearchableFieldName(), minBytes, maxBytes, true, true), Occur.MUST);
}
final Long minDateTime = query.getStartDate() == null ? null : query.getStartDate().getTime();
final Long maxDateTime = query.getEndDate() == null ? null : query.getEndDate().getTime();
if (maxDateTime != null || minDateTime != null) {
luceneQuery.add(NumericRangeQuery.newLongRange(SearchableFields.EventTime.getSearchableFieldName(), minDateTime, maxDateTime, true, true), Occur.MUST);
}
return luceneQuery;
}
use of org.apache.lucene.search.BooleanQuery in project entando-core by entando.
the class SearcherDAO method createQuery.
private Query createQuery(SearchEngineFilter filter) {
BooleanQuery fieldQuery = new BooleanQuery();
String key = filter.getKey();
String attachmentKey = key + IIndexerDAO.ATTACHMENT_FIELD_SUFFIX;
Object value = filter.getValue();
if (null != value) {
if (value instanceof String) {
SearchEngineFilter.TextSearchOption option = filter.getTextSearchOption();
if (null == option) {
option = SearchEngineFilter.TextSearchOption.AT_LEAST_ONE_WORD;
}
String stringValue = value.toString();
String[] values = stringValue.split("\\s+");
if (!option.equals(SearchEngineFilter.TextSearchOption.EXACT)) {
BooleanClause.Occur bc = BooleanClause.Occur.SHOULD;
if (option.equals(SearchEngineFilter.TextSearchOption.ALL_WORDS)) {
bc = BooleanClause.Occur.MUST;
} else if (option.equals(SearchEngineFilter.TextSearchOption.ANY_WORD)) {
bc = BooleanClause.Occur.MUST_NOT;
}
for (int i = 0; i < values.length; i++) {
TermQuery term = new TermQuery(new Term(key, values[i].toLowerCase()));
// NOTE: search lower case....
if (filter.isIncludeAttachments()) {
BooleanQuery compositeQuery = new BooleanQuery();
compositeQuery.add(term, BooleanClause.Occur.SHOULD);
TermQuery termAttachment = new TermQuery(new Term(attachmentKey, values[i].toLowerCase()));
compositeQuery.add(termAttachment, BooleanClause.Occur.SHOULD);
fieldQuery.add(compositeQuery, bc);
} else {
fieldQuery.add(term, bc);
}
}
} else {
PhraseQuery phraseQuery = new PhraseQuery();
for (int i = 0; i < values.length; i++) {
// NOTE: search lower case....
phraseQuery.add(new Term(key, values[i].toLowerCase()));
}
if (filter.isIncludeAttachments()) {
fieldQuery.add(phraseQuery, BooleanClause.Occur.SHOULD);
PhraseQuery phraseQuery2 = new PhraseQuery();
for (int i = 0; i < values.length; i++) {
// NOTE: search lower case....
phraseQuery2.add(new Term(attachmentKey, values[i].toLowerCase()));
}
fieldQuery.add(phraseQuery2, BooleanClause.Occur.SHOULD);
} else {
return phraseQuery;
}
}
} else if (value instanceof Date) {
String toString = DateTools.timeToString(((Date) value).getTime(), DateTools.Resolution.MINUTE);
TermQuery term = new TermQuery(new Term(filter.getKey(), toString));
fieldQuery.add(term, BooleanClause.Occur.MUST);
} else if (value instanceof Number) {
TermQuery term = new TermQuery(new Term(filter.getKey(), value.toString()));
fieldQuery.add(term, BooleanClause.Occur.MUST);
}
} else {
if (filter.getStart() instanceof Number || filter.getEnd() instanceof Number) {
// .............................. TODO
} else {
String start = null;
String end = null;
if (filter.getStart() instanceof Date || filter.getEnd() instanceof Date) {
if (null != filter.getStart()) {
start = DateTools.timeToString(((Date) filter.getStart()).getTime(), DateTools.Resolution.MINUTE);
}
if (null != filter.getEnd()) {
end = DateTools.timeToString(((Date) filter.getEnd()).getTime(), DateTools.Resolution.MINUTE);
}
} else {
start = (null != filter.getStart()) ? filter.getStart().toString().toLowerCase() : null;
end = (null != filter.getEnd()) ? filter.getEnd().toString().toLowerCase() : null;
}
BytesRef byteStart = (null != start) ? new BytesRef(start.getBytes()) : null;
BytesRef byteEnd = (null != end) ? new BytesRef(end.getBytes()) : null;
TermRangeQuery range = new TermRangeQuery(filter.getKey(), byteStart, byteEnd, true, true);
fieldQuery.add(range, BooleanClause.Occur.MUST);
}
}
return fieldQuery;
}
Aggregations