use of org.apache.lucene.queryparser.classic.MultiFieldQueryParser in project Anserini by castorini.
the class EntityLinking method search.
/**
* Returns a list of query results.
*
* @param queryName the entity name to search
* @throws Exception on error
* @return a list of top ranked entities
*/
public List<RankedEntity> search(String queryName, int numHits) throws Exception {
List<RankedEntity> rankedEntities = new ArrayList<>();
// Initialize index searcher
IndexSearcher searcher = new IndexSearcher(reader);
// do exact search on query name
QueryParser queryParser = new QueryParser(IndexTopics.FIELD_NAME, new SimpleAnalyzer());
queryParser.setAutoGeneratePhraseQueries(true);
queryParser.setPhraseSlop(3);
queryName = "\"" + queryName + "\"";
Query query = queryParser.parse(queryName);
TopDocs rs = searcher.search(query, numHits);
ScoredDocuments docs = ScoredDocuments.fromTopDocs(rs, searcher);
for (int i = 0; i < docs.documents.length; i++) {
float score = docs.scores[i];
String mid = docs.documents[i].getField(IndexTopics.FIELD_TOPIC_MID).stringValue();
String shortMid = getShortMid(mid);
String name = docs.documents[i].getField(IndexTopics.FIELD_NAME).stringValue();
String label = docs.documents[i].getField(IndexTopics.FIELD_LABEL).stringValue();
rankedEntities.add(new RankedEntity(shortMid, score, name, label));
}
if (docs.documents.length >= numHits) {
return rankedEntities;
}
int numHitsLeft = numHits - docs.documents.length;
// do TFIDF search
Similarity similarity = new ClassicSimilarity();
searcher.setSimilarity(similarity);
queryParser = new MultiFieldQueryParser(new String[] { IndexTopics.FIELD_NAME, IndexTopics.FIELD_LABEL }, new SimpleAnalyzer());
queryParser.setDefaultOperator(QueryParser.Operator.AND);
query = queryParser.parse(queryName);
rs = searcher.search(query, numHitsLeft);
docs = ScoredDocuments.fromTopDocs(rs, searcher);
for (int i = 0; i < docs.documents.length; i++) {
float score = docs.scores[i];
String mid = docs.documents[i].getField(IndexTopics.FIELD_TOPIC_MID).stringValue();
String shortMid = getShortMid(mid);
String name = docs.documents[i].getField(IndexTopics.FIELD_NAME).stringValue();
String label = docs.documents[i].getField(IndexTopics.FIELD_LABEL).stringValue();
rankedEntities.add(new RankedEntity(shortMid, score, name, label));
}
return rankedEntities;
}
use of org.apache.lucene.queryparser.classic.MultiFieldQueryParser in project ignite by apache.
the class GridLuceneIndex method query.
/**
* Runs lucene fulltext query over this index.
*
* @param qry Query.
* @param filters Filters over result.
* @return Query result.
* @throws IgniteCheckedException If failed.
*/
public <K, V> GridCloseableIterator<IgniteBiTuple<K, V>> query(String qry, IndexingQueryFilter filters) throws IgniteCheckedException {
IndexReader reader;
try {
long updates = updateCntr.get();
if (updates != 0) {
writer.commit();
updateCntr.addAndGet(-updates);
}
// We can cache reader\searcher and change this to 'openIfChanged'
reader = DirectoryReader.open(writer, true);
} catch (IOException e) {
throw new IgniteCheckedException(e);
}
IndexSearcher searcher;
TopDocs docs;
try {
searcher = new IndexSearcher(reader);
MultiFieldQueryParser parser = new MultiFieldQueryParser(idxdFields, writer.getAnalyzer());
// parser.setAllowLeadingWildcard(true);
// Filter expired items.
Query filter = NumericRangeQuery.newLongRange(EXPIRATION_TIME_FIELD_NAME, U.currentTimeMillis(), null, false, false);
BooleanQuery query = new BooleanQuery.Builder().add(parser.parse(qry), BooleanClause.Occur.MUST).add(filter, BooleanClause.Occur.FILTER).build();
docs = searcher.search(query, Integer.MAX_VALUE);
} catch (Exception e) {
U.closeQuiet(reader);
throw new IgniteCheckedException(e);
}
IndexingQueryCacheFilter fltr = null;
if (filters != null)
fltr = filters.forCache(cacheName);
return new It<>(reader, searcher, docs.scoreDocs, fltr);
}
use of org.apache.lucene.queryparser.classic.MultiFieldQueryParser in project jspwiki by apache.
the class LuceneSearchProvider method findPages.
/**
* Searches pages using a particular combination of flags.
*
* @param query The query to perform in Lucene query language
* @param flags A set of flags
* @return A Collection of SearchResult instances
* @throws ProviderException if there is a problem with the backend
*/
public Collection findPages(String query, int flags, WikiContext wikiContext) throws ProviderException {
IndexSearcher searcher = null;
ArrayList<SearchResult> list = null;
Highlighter highlighter = null;
try {
String[] queryfields = { LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS };
QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_47, queryfields, getLuceneAnalyzer());
// QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() );
Query luceneQuery = qp.parse(query);
if ((flags & FLAG_CONTEXTS) != 0) {
highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"), new SimpleHTMLEncoder(), new QueryScorer(luceneQuery));
}
try {
File dir = new File(m_luceneDirectory);
Directory luceneDir = new SimpleFSDirectory(dir, null);
IndexReader reader = DirectoryReader.open(luceneDir);
searcher = new IndexSearcher(reader);
} catch (Exception ex) {
log.info("Lucene not yet ready; indexing not started", ex);
return null;
}
ScoreDoc[] hits = searcher.search(luceneQuery, MAX_SEARCH_HITS).scoreDocs;
AuthorizationManager mgr = m_engine.getAuthorizationManager();
list = new ArrayList<SearchResult>(hits.length);
for (int curr = 0; curr < hits.length; curr++) {
int docID = hits[curr].doc;
Document doc = searcher.doc(docID);
String pageName = doc.get(LUCENE_ID);
WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION);
if (page != null) {
if (page instanceof Attachment) {
// Currently attachments don't look nice on the search-results page
// When the search-results are cleaned up this can be enabled again.
}
PagePermission pp = new PagePermission(page, PagePermission.VIEW_ACTION);
if (mgr.checkPermission(wikiContext.getWikiSession(), pp)) {
int score = (int) (hits[curr].score * 100);
// Get highlighted search contexts
String text = doc.get(LUCENE_PAGE_CONTENTS);
String[] fragments = new String[0];
if (text != null && highlighter != null) {
TokenStream tokenStream = getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS, new StringReader(text));
fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS);
}
SearchResult result = new SearchResultImpl(page, score, fragments);
list.add(result);
}
} else {
log.error("Lucene found a result page '" + pageName + "' that could not be loaded, removing from Lucene cache");
pageRemoved(new WikiPage(m_engine, pageName));
}
}
} catch (IOException e) {
log.error("Failed during lucene search", e);
} catch (ParseException e) {
log.info("Broken query; cannot parse query ", e);
throw new ProviderException("You have entered a query Lucene cannot process: " + e.getMessage());
} catch (InvalidTokenOffsetsException e) {
log.error("Tokens are incompatible with provided text ", e);
} finally {
if (searcher != null) {
try {
searcher.getIndexReader().close();
} catch (IOException e) {
log.error(e);
}
}
}
return list;
}
use of org.apache.lucene.queryparser.classic.MultiFieldQueryParser in project rubia-forums by flashboss.
the class ForumsSearchModuleImpl method findTopics.
@SuppressWarnings("unchecked")
public ResultPage<Topic> findTopics(SearchCriteria criteria) throws ModuleException {
if (criteria != null) {
try {
EntityManager session = getSession();
FullTextSession fullTextSession = getFullTextSession((Session) session.getDelegate());
Builder builder = new Builder();
String keywords = criteria.getKeywords();
if (keywords != null && keywords.length() != 0) {
String[] fields = null;
Searching searching = Searching.valueOf(criteria.getSearching());
switch(searching) {
case TITLE_MSG:
fields = new String[] { "message.text", "topic.subject" };
break;
case MSG:
fields = new String[] { "message.text" };
break;
}
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, new StandardAnalyzer());
builder.add(parser.parse(keywords), MUST);
}
String forumId = criteria.getForum();
if (forumId != null && forumId.length() != 0) {
builder.add(new TermQuery(new Term("topic.forum.id", forumId)), MUST);
}
String categoryId = criteria.getCategory();
if (categoryId != null && categoryId.length() != 0) {
builder.add(new TermQuery(new Term("topic.forum.category.id", categoryId)), MUST);
}
String userName = criteria.getAuthor();
if (userName != null && userName.length() != 0) {
builder.add(new WildcardQuery(new Term("poster.userId", userName)), MUST);
}
String timePeriod = criteria.getTimePeriod();
if (timePeriod != null && timePeriod.length() != 0) {
addPostTimeQuery(builder, TimePeriod.valueOf(timePeriod));
}
FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(builder.build(), Post.class);
SortOrder sortOrder = SortOrder.valueOf(criteria.getSortOrder());
SortBy sortBy = valueOf(criteria.getSortBy());
fullTextQuery.setSort(getSort(sortBy, sortOrder));
fullTextQuery.setProjection("topic.id");
LinkedHashSet<Integer> topicIds = new LinkedHashSet<Integer>();
LinkedHashSet<Integer> topicToDispIds = new LinkedHashSet<Integer>();
int start = criteria.getPageSize() * criteria.getPageNumber();
int end = start + criteria.getPageSize();
int index = 0;
for (Object o : fullTextQuery.list()) {
Integer id = (Integer) ((Object[]) o)[0];
if (topicIds.add(id)) {
if (index >= start && index < end) {
topicToDispIds.add(id);
}
index++;
}
}
List<Topic> topics = null;
if (topicToDispIds.size() > 0) {
Query q = session.createQuery("from Topic as t join fetch t.poster where t.id IN ( :topicIds )");
q.setParameter("topicIds", topicToDispIds);
List<Topic> results = q.getResultList();
topics = new LinkedList<Topic>();
for (Integer id : topicToDispIds) {
for (Topic topic : results) {
if (id.equals(topic.getId())) {
topics.add(topic);
break;
}
}
}
}
ResultPage<Topic> resultPage = new ResultPage<Topic>();
resultPage.setPage(topics);
resultPage.setResultSize(topicIds.size());
return resultPage;
} catch (ParseException e) {
return null;
} catch (Exception e) {
throw new ModuleException(e.getMessage(), e);
}
} else {
throw new IllegalArgumentException("criteria cannot be null");
}
}
use of org.apache.lucene.queryparser.classic.MultiFieldQueryParser in project rubia-forums by flashboss.
the class ForumsSearchModuleImpl method findTopics.
@SuppressWarnings("unchecked")
public ResultPage<Topic> findTopics(SearchCriteria criteria) throws ModuleException {
if (criteria != null) {
try {
EntityManager session = getSession();
FullTextSession fullTextSession = getFullTextSession((Session) session.getDelegate());
Builder builder = new Builder();
String keywords = criteria.getKeywords();
if (keywords != null && keywords.length() != 0) {
String[] fields = null;
Searching searching = Searching.valueOf(criteria.getSearching());
switch(searching) {
case TITLE_MSG:
fields = new String[] { "message.text", "topic.subject" };
break;
case MSG:
fields = new String[] { "message.text" };
break;
}
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, new StandardAnalyzer());
builder.add(parser.parse(keywords), MUST);
}
String forumId = criteria.getForum();
if (forumId != null && forumId.length() != 0) {
builder.add(new TermQuery(new Term("topic.forum.id", forumId)), MUST);
}
String categoryId = criteria.getCategory();
if (categoryId != null && categoryId.length() != 0) {
builder.add(new TermQuery(new Term("topic.forum.category.id", categoryId)), MUST);
}
String userName = criteria.getAuthor();
if (userName != null && userName.length() != 0) {
builder.add(new WildcardQuery(new Term("poster.userId", userName)), MUST);
}
String timePeriod = criteria.getTimePeriod();
if (timePeriod != null && timePeriod.length() != 0) {
addPostTimeQuery(builder, TimePeriod.valueOf(timePeriod));
}
FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(builder.build(), Post.class);
SortOrder sortOrder = SortOrder.valueOf(criteria.getSortOrder());
SortBy sortBy = valueOf(criteria.getSortBy());
fullTextQuery.setSort(getSort(sortBy, sortOrder));
fullTextQuery.setProjection("topic.id");
LinkedHashSet<Integer> topicIds = new LinkedHashSet<Integer>();
LinkedHashSet<Integer> topicToDispIds = new LinkedHashSet<Integer>();
int start = criteria.getPageSize() * criteria.getPageNumber();
int end = start + criteria.getPageSize();
int index = 0;
for (Object o : fullTextQuery.list()) {
Integer id = (Integer) ((Object[]) o)[0];
if (topicIds.add(id)) {
if (index >= start && index < end) {
topicToDispIds.add(id);
}
index++;
}
}
List<Topic> topics = null;
if (topicToDispIds.size() > 0) {
Query q = session.createQuery("from Topic as t join fetch t.poster where t.id IN ( :topicIds )");
q.setParameter("topicIds", topicToDispIds);
List<Topic> results = q.getResultList();
topics = new LinkedList<Topic>();
for (Integer id : topicToDispIds) {
for (Topic topic : results) {
if (id.equals(topic.getId())) {
topics.add(topic);
break;
}
}
}
}
ResultPage<Topic> resultPage = new ResultPage<Topic>();
resultPage.setPage(topics);
resultPage.setResultSize(topicIds.size());
return resultPage;
} catch (ParseException e) {
return null;
} catch (Exception e) {
throw new ModuleException(e.getMessage(), e);
}
} else {
throw new IllegalArgumentException("criteria cannot be null");
}
}
Aggregations