use of org.apache.lucene.search.ScoreDoc in project languagetool by languagetool-org.
the class SimilarWordFinder method findSimilarWordsFor.
private List<SimWord> findSimilarWordsFor(DirectoryReader reader, String word, TopDocs topDocs) throws IOException {
List<SimWord> result = new ArrayList<>();
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
String simWord = reader.document(scoreDoc.doc).get("word");
//System.out.println(" sim: " + simWord);
if (!simWord.equalsIgnoreCase(word) && !knownPairs.contains(simWord, word)) {
int firstDiffPos = getDiffPos(simWord.toLowerCase(), word.toLowerCase());
int limit = Math.min(word.length(), simWord.length()) - 1;
if (firstDiffPos > limit) {
//System.out.println("FILTERED: " + word + " -> " + simWord + " [" + firstDiffPos + " <= " + limit + "]");
} else {
int dist = StringUtils.getLevenshteinDistance(word, simWord);
if (dist <= MAX_DIST) {
//System.out.println(word + " -> " + simWord + " [" + firstDiffPos + "]");
result.add(new SimWord(simWord, dist));
}
}
knownPairs.add(simWord, word);
}
}
return result;
}
use of org.apache.lucene.search.ScoreDoc in project bigbluebutton by bigbluebutton.
the class SearchController method onSubmit.
/*
* (non-Javadoc)
*
* @see
* org.springframework.web.servlet.mvc.SimpleFormController#onSubmit(javax
* .servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse,
* java.lang.Object, org.springframework.validation.BindException)
*/
@Override
protected ModelAndView onSubmit(HttpServletRequest request, HttpServletResponse response, Object command, BindException errors) throws Exception {
SearchCriteriaCommand srchCriteriaCommand = (SearchCriteriaCommand) command;
int startFrom = (new Integer(srchCriteriaCommand.getStartFrom())).intValue();
int endIndex = 0;
String queryStr = srchCriteriaCommand.getKeyWords();
String sortBy = srchCriteriaCommand.getSort();
String operator = srchCriteriaCommand.getOperator();
String relRange = srchCriteriaCommand.getRangeValue();
boolean bSmart = (relRange != null) && (!relRange.isEmpty());
boolean bSortByScore = sortBy.equalsIgnoreCase("byScore");
if (logger.isInfoEnabled()) {
logger.info("---search offset=" + startFrom + " sortBy=" + sortBy + "qryString=" + queryStr + "operator=" + operator);
}
Map<String, Object> model = new HashMap<String, Object>();
LinkedHashMap<String, MatchVO> sortedMap = new LinkedHashMap<String, MatchVO>();
Map<String, SessionHitsOrganizer> hitsOrganizerMap = new HashMap<String, SessionHitsOrganizer>();
Map<String, String> resultMap = new HashMap<String, String>();
synchronized (Index.getInstance()) {
Search search = Search.getInstance();
search.startSearch();
TopDocs tps = null;
Searcher searcher = null;
ScoreDoc[] hits = null;
if (bSortByScore) {
Search.TopDocCollectorSearchResult result = search.searchByScore(queryStr, startFrom, operator);
TopDocCollector collector = result.getCollector();
if (collector != null) {
tps = collector.topDocs();
}
hits = tps.scoreDocs;
searcher = result.getSearcher();
} else {
Search.TopFieldDocsSearchResult result = search.searchBySession(queryStr, startFrom, operator);
TopFieldDocs tfd = result.getTopFieldDocs();
if (tfd != null) {
hits = tfd.scoreDocs;
}
searcher = result.getSearcher();
}
if (hits == null) {
if (logger.isInfoEnabled()) {
logger.info("---No hit");
}
} else {
int start = startFrom;
int end = hits.length;
endIndex = end;
if (logger.isInfoEnabled()) {
logger.info("total match number=" + endIndex);
}
String currentSession = "0";
String lastSession = "0";
SessionHitsOrganizer hitsOrganizer = null;
for (int i = start; i < end; i++) {
float score = hits[i].score;
Document doc = searcher.doc(hits[i].doc);
String path = doc.get("path");
if (path != null) {
MatchVO matchVO = new MatchVO();
matchVO.setFilePath(path);
String fullContent = doc.get("title");
String summary = getKeywordContext(queryStr, fullContent);
matchVO.setContentSummary(summary);
String fileName = doc.get("fileName");
matchVO.setFileName(fileName);
String indexSummary = doc.get("summary");
matchVO.setIndexingSummary(indexSummary);
matchVO.setScore(score);
String title = indexSummary + ": " + fileName + " (Match Score = " + score + ")";
//String content = doc.get("contents");
String allData = title + "%" + summary;
if (doc.get("slideTime") != null) {
allData += "%" + doc.get("slideTime");
matchVO.setSlidePlayTime(doc.get("slideTime"));
}
//sortedMap.put(path, allData);
sortedMap.put(path, matchVO);
//model.put(path, newTitle+"%"+doc.get("summary")+"%"+doc.get("slideTime"));
if (logger.isInfoEnabled()) {
logger.info("----" + allData);
logger.info((i + 1) + ". " + path);
}
if (title != null) {
if (logger.isInfoEnabled()) {
logger.info(" Title: " + doc.get("title"));
}
}
if (bSmart) {
//Prepare for the grouping results
currentSession = getSessionNumberFromFileURL(path);
//get existing current session organizer
hitsOrganizer = hitsOrganizerMap.get(currentSession);
if (hitsOrganizer == null) {
//create a new session organizer object
hitsOrganizer = new SessionHitsOrganizer();
hitsOrganizer.setSessionNum(currentSession);
hitsOrganizerMap.put(currentSession, hitsOrganizer);
}
hitsOrganizer.setReleventRange((new Float(relRange)).floatValue());
hitsOrganizer.addExactHits(path, score);
matchVO.setSessionHitOrganier(hitsOrganizer);
}
} else {
System.out.println((i + 1) + ". " + "No path for this document");
}
}
}
search.finishSearch();
//post processing for result grouping...
Iterator hitsOrganizerIt = hitsOrganizerMap.keySet().iterator();
while (hitsOrganizerIt.hasNext()) {
String key = (String) hitsOrganizerIt.next();
SessionHitsOrganizer organizer = hitsOrganizerMap.get(key);
organizer.generateResultGroup();
}
model.put("result", sortedMap);
if (bSmart) {
model.put("hitsOrganizer", hitsOrganizerMap);
}
model.put("searchKeyword", queryStr);
model.put("startFrom", (new Integer(startFrom)).toString());
model.put("endAt", (new Integer(endIndex)).toString());
model.put("sortBy", sortBy);
model.put("operator", operator);
model.put("rangeValue", relRange);
}
ModelAndView mav = new ModelAndView(this.getSuccessView(), model);
return mav;
}
use of org.apache.lucene.search.ScoreDoc in project gitblit by gitblit.
the class TicketIndexer method searchFor.
/**
* Search for tickets matching the query. The returned tickets are
* shadows of the real ticket, but suitable for a results list.
*
* @param repository
* @param text
* @param page
* @param pageSize
* @return search results
*/
public List<QueryResult> searchFor(RepositoryModel repository, String text, int page, int pageSize) {
if (StringUtils.isEmpty(text)) {
return Collections.emptyList();
}
Set<QueryResult> results = new LinkedHashSet<QueryResult>();
StandardAnalyzer analyzer = new StandardAnalyzer();
try {
// search the title, description and content
BooleanQuery.Builder bldr = new BooleanQuery.Builder();
QueryParser qp;
qp = new QueryParser(Lucene.title.name(), analyzer);
qp.setAllowLeadingWildcard(true);
bldr.add(qp.parse(text), Occur.SHOULD);
qp = new QueryParser(Lucene.body.name(), analyzer);
qp.setAllowLeadingWildcard(true);
bldr.add(qp.parse(text), Occur.SHOULD);
qp = new QueryParser(Lucene.content.name(), analyzer);
qp.setAllowLeadingWildcard(true);
bldr.add(qp.parse(text), Occur.SHOULD);
IndexSearcher searcher = getSearcher();
Query rewrittenQuery = searcher.rewrite(bldr.build());
log.debug(rewrittenQuery.toString());
TopScoreDocCollector collector = TopScoreDocCollector.create(5000);
searcher.search(rewrittenQuery, collector);
int offset = Math.max(0, (page - 1) * pageSize);
ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
for (int i = 0; i < hits.length; i++) {
int docId = hits[i].doc;
Document doc = searcher.doc(docId);
QueryResult result = docToQueryResult(doc);
if (repository != null) {
if (!result.repository.equalsIgnoreCase(repository.name)) {
continue;
}
}
results.add(result);
}
} catch (Exception e) {
log.error(MessageFormat.format("Exception while searching for {0}", text), e);
}
return new ArrayList<QueryResult>(results);
}
use of org.apache.lucene.search.ScoreDoc in project zm-mailbox by Zimbra.
the class RemoteMailQueue method search0.
private void search0(SearchResult result, IndexReader indexReader, Query query, int offset, int limit) throws IOException {
if (ZimbraLog.rmgmt.isDebugEnabled()) {
ZimbraLog.rmgmt.debug("searching query=" + query + " offset=" + offset + " limit=" + limit + " " + this);
}
Searcher searcher = null;
try {
searcher = new IndexSearcher(indexReader);
TopDocs topDocs = searcher.search(query, (Filter) null, limit);
ScoreDoc[] hits = topDocs.scoreDocs;
if (offset < hits.length) {
int n;
if (limit <= 0) {
n = hits.length;
} else {
n = Math.min(offset + limit, hits.length);
}
for (int i = offset; i < n; i++) {
Document doc = searcher.doc(hits[i].doc);
Map<QueueAttr, String> qitem = docToQueueItem(doc);
result.qitems.add(qitem);
}
}
result.hits = hits.length;
} finally {
if (searcher != null) {
searcher.close();
}
}
}
use of org.apache.lucene.search.ScoreDoc in project jackrabbit-oak by apache.
the class FilteredSortedSetDocValuesFacetCounts method getTopChildren.
@Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
FacetResult topChildren = super.getTopChildren(topN, dim, path);
LabelAndValue[] labelAndValues = topChildren.labelValues;
for (ScoreDoc scoreDoc : docs.scoreDocs) {
labelAndValues = filterFacet(scoreDoc.doc, dim, labelAndValues);
}
int childCount = labelAndValues.length;
Number value = 0;
for (LabelAndValue lv : labelAndValues) {
value = value.longValue() + lv.value.longValue();
}
return new FacetResult(dim, path, value, labelAndValues, childCount);
}
Aggregations