Search in sources :

Example 21 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project languagetool by languagetool-org.

the class SimilarWordFinder method findSimilarWordsFor.

private List<SimWord> findSimilarWordsFor(DirectoryReader reader, String word, TopDocs topDocs) throws IOException {
    List<SimWord> result = new ArrayList<>();
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        String simWord = reader.document(scoreDoc.doc).get("word");
        //System.out.println(" sim: " + simWord);
        if (!simWord.equalsIgnoreCase(word) && !knownPairs.contains(simWord, word)) {
            int firstDiffPos = getDiffPos(simWord.toLowerCase(), word.toLowerCase());
            int limit = Math.min(word.length(), simWord.length()) - 1;
            if (firstDiffPos > limit) {
            //System.out.println("FILTERED: " + word + " -> " + simWord + " [" + firstDiffPos + " <= " + limit + "]");
            } else {
                int dist = StringUtils.getLevenshteinDistance(word, simWord);
                if (dist <= MAX_DIST) {
                    //System.out.println(word + " -> " + simWord + " [" + firstDiffPos + "]");
                    result.add(new SimWord(simWord, dist));
                }
            }
            knownPairs.add(simWord, word);
        }
    }
    return result;
}
Also used : ScoreDoc(org.apache.lucene.search.ScoreDoc)

Example 22 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project bigbluebutton by bigbluebutton.

the class SearchController method onSubmit.

/*
	 * (non-Javadoc)
	 * 
	 * @see
	 * org.springframework.web.servlet.mvc.SimpleFormController#onSubmit(javax
	 * .servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse,
	 * java.lang.Object, org.springframework.validation.BindException)
	 */
@Override
protected ModelAndView onSubmit(HttpServletRequest request, HttpServletResponse response, Object command, BindException errors) throws Exception {
    SearchCriteriaCommand srchCriteriaCommand = (SearchCriteriaCommand) command;
    int startFrom = (new Integer(srchCriteriaCommand.getStartFrom())).intValue();
    int endIndex = 0;
    String queryStr = srchCriteriaCommand.getKeyWords();
    String sortBy = srchCriteriaCommand.getSort();
    String operator = srchCriteriaCommand.getOperator();
    String relRange = srchCriteriaCommand.getRangeValue();
    boolean bSmart = (relRange != null) && (!relRange.isEmpty());
    boolean bSortByScore = sortBy.equalsIgnoreCase("byScore");
    if (logger.isInfoEnabled()) {
        logger.info("---search offset=" + startFrom + " sortBy=" + sortBy + "qryString=" + queryStr + "operator=" + operator);
    }
    Map<String, Object> model = new HashMap<String, Object>();
    LinkedHashMap<String, MatchVO> sortedMap = new LinkedHashMap<String, MatchVO>();
    Map<String, SessionHitsOrganizer> hitsOrganizerMap = new HashMap<String, SessionHitsOrganizer>();
    Map<String, String> resultMap = new HashMap<String, String>();
    synchronized (Index.getInstance()) {
        Search search = Search.getInstance();
        search.startSearch();
        TopDocs tps = null;
        Searcher searcher = null;
        ScoreDoc[] hits = null;
        if (bSortByScore) {
            Search.TopDocCollectorSearchResult result = search.searchByScore(queryStr, startFrom, operator);
            TopDocCollector collector = result.getCollector();
            if (collector != null) {
                tps = collector.topDocs();
            }
            hits = tps.scoreDocs;
            searcher = result.getSearcher();
        } else {
            Search.TopFieldDocsSearchResult result = search.searchBySession(queryStr, startFrom, operator);
            TopFieldDocs tfd = result.getTopFieldDocs();
            if (tfd != null) {
                hits = tfd.scoreDocs;
            }
            searcher = result.getSearcher();
        }
        if (hits == null) {
            if (logger.isInfoEnabled()) {
                logger.info("---No hit");
            }
        } else {
            int start = startFrom;
            int end = hits.length;
            endIndex = end;
            if (logger.isInfoEnabled()) {
                logger.info("total match number=" + endIndex);
            }
            String currentSession = "0";
            String lastSession = "0";
            SessionHitsOrganizer hitsOrganizer = null;
            for (int i = start; i < end; i++) {
                float score = hits[i].score;
                Document doc = searcher.doc(hits[i].doc);
                String path = doc.get("path");
                if (path != null) {
                    MatchVO matchVO = new MatchVO();
                    matchVO.setFilePath(path);
                    String fullContent = doc.get("title");
                    String summary = getKeywordContext(queryStr, fullContent);
                    matchVO.setContentSummary(summary);
                    String fileName = doc.get("fileName");
                    matchVO.setFileName(fileName);
                    String indexSummary = doc.get("summary");
                    matchVO.setIndexingSummary(indexSummary);
                    matchVO.setScore(score);
                    String title = indexSummary + ": " + fileName + " (Match Score = " + score + ")";
                    //String content = doc.get("contents");								
                    String allData = title + "%" + summary;
                    if (doc.get("slideTime") != null) {
                        allData += "%" + doc.get("slideTime");
                        matchVO.setSlidePlayTime(doc.get("slideTime"));
                    }
                    //sortedMap.put(path, allData);
                    sortedMap.put(path, matchVO);
                    //model.put(path, newTitle+"%"+doc.get("summary")+"%"+doc.get("slideTime"));
                    if (logger.isInfoEnabled()) {
                        logger.info("----" + allData);
                        logger.info((i + 1) + ". " + path);
                    }
                    if (title != null) {
                        if (logger.isInfoEnabled()) {
                            logger.info("   Title: " + doc.get("title"));
                        }
                    }
                    if (bSmart) {
                        //Prepare for the grouping results						
                        currentSession = getSessionNumberFromFileURL(path);
                        //get existing current session organizer
                        hitsOrganizer = hitsOrganizerMap.get(currentSession);
                        if (hitsOrganizer == null) {
                            //create a new session organizer object
                            hitsOrganizer = new SessionHitsOrganizer();
                            hitsOrganizer.setSessionNum(currentSession);
                            hitsOrganizerMap.put(currentSession, hitsOrganizer);
                        }
                        hitsOrganizer.setReleventRange((new Float(relRange)).floatValue());
                        hitsOrganizer.addExactHits(path, score);
                        matchVO.setSessionHitOrganier(hitsOrganizer);
                    }
                } else {
                    System.out.println((i + 1) + ". " + "No path for this document");
                }
            }
        }
        search.finishSearch();
        //post processing for result grouping...			
        Iterator hitsOrganizerIt = hitsOrganizerMap.keySet().iterator();
        while (hitsOrganizerIt.hasNext()) {
            String key = (String) hitsOrganizerIt.next();
            SessionHitsOrganizer organizer = hitsOrganizerMap.get(key);
            organizer.generateResultGroup();
        }
        model.put("result", sortedMap);
        if (bSmart) {
            model.put("hitsOrganizer", hitsOrganizerMap);
        }
        model.put("searchKeyword", queryStr);
        model.put("startFrom", (new Integer(startFrom)).toString());
        model.put("endAt", (new Integer(endIndex)).toString());
        model.put("sortBy", sortBy);
        model.put("operator", operator);
        model.put("rangeValue", relRange);
    }
    ModelAndView mav = new ModelAndView(this.getSuccessView(), model);
    return mav;
}
Also used : SearchCriteriaCommand(org.bigbluebutton.webminer.web.model.SearchCriteriaCommand) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ModelAndView(org.springframework.web.servlet.ModelAndView) MatchVO(org.bigbluebutton.webminer.web.model.MatchVO) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) Document(org.apache.lucene.document.Document) LinkedHashMap(java.util.LinkedHashMap) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) SessionHitsOrganizer(org.bigbluebutton.webminer.web.model.SessionHitsOrganizer) Search(org.bigbluebutton.webminer.search.Search) Iterator(java.util.Iterator) Searcher(org.apache.lucene.search.Searcher) TopDocCollector(org.apache.lucene.search.TopDocCollector)

Example 23 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project gitblit by gitblit.

the class TicketIndexer method searchFor.

/**
	 * Search for tickets matching the query.  The returned tickets are
	 * shadows of the real ticket, but suitable for a results list.
	 *
	 * @param repository
	 * @param text
	 * @param page
	 * @param pageSize
	 * @return search results
	 */
public List<QueryResult> searchFor(RepositoryModel repository, String text, int page, int pageSize) {
    if (StringUtils.isEmpty(text)) {
        return Collections.emptyList();
    }
    Set<QueryResult> results = new LinkedHashSet<QueryResult>();
    StandardAnalyzer analyzer = new StandardAnalyzer();
    try {
        // search the title, description and content
        BooleanQuery.Builder bldr = new BooleanQuery.Builder();
        QueryParser qp;
        qp = new QueryParser(Lucene.title.name(), analyzer);
        qp.setAllowLeadingWildcard(true);
        bldr.add(qp.parse(text), Occur.SHOULD);
        qp = new QueryParser(Lucene.body.name(), analyzer);
        qp.setAllowLeadingWildcard(true);
        bldr.add(qp.parse(text), Occur.SHOULD);
        qp = new QueryParser(Lucene.content.name(), analyzer);
        qp.setAllowLeadingWildcard(true);
        bldr.add(qp.parse(text), Occur.SHOULD);
        IndexSearcher searcher = getSearcher();
        Query rewrittenQuery = searcher.rewrite(bldr.build());
        log.debug(rewrittenQuery.toString());
        TopScoreDocCollector collector = TopScoreDocCollector.create(5000);
        searcher.search(rewrittenQuery, collector);
        int offset = Math.max(0, (page - 1) * pageSize);
        ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
        for (int i = 0; i < hits.length; i++) {
            int docId = hits[i].doc;
            Document doc = searcher.doc(docId);
            QueryResult result = docToQueryResult(doc);
            if (repository != null) {
                if (!result.repository.equalsIgnoreCase(repository.name)) {
                    continue;
                }
            }
            results.add(result);
        }
    } catch (Exception e) {
        log.error(MessageFormat.format("Exception while searching for {0}", text), e);
    }
    return new ArrayList<QueryResult>(results);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) BooleanQuery(org.apache.lucene.search.BooleanQuery) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) ParseException(java.text.ParseException) IOException(java.io.IOException) ScoreDoc(org.apache.lucene.search.ScoreDoc) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer)

Example 24 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project zm-mailbox by Zimbra.

the class RemoteMailQueue method search0.

private void search0(SearchResult result, IndexReader indexReader, Query query, int offset, int limit) throws IOException {
    if (ZimbraLog.rmgmt.isDebugEnabled()) {
        ZimbraLog.rmgmt.debug("searching query=" + query + " offset=" + offset + " limit=" + limit + " " + this);
    }
    Searcher searcher = null;
    try {
        searcher = new IndexSearcher(indexReader);
        TopDocs topDocs = searcher.search(query, (Filter) null, limit);
        ScoreDoc[] hits = topDocs.scoreDocs;
        if (offset < hits.length) {
            int n;
            if (limit <= 0) {
                n = hits.length;
            } else {
                n = Math.min(offset + limit, hits.length);
            }
            for (int i = offset; i < n; i++) {
                Document doc = searcher.doc(hits[i].doc);
                Map<QueueAttr, String> qitem = docToQueueItem(doc);
                result.qitems.add(qitem);
            }
        }
        result.hits = hits.length;
    } finally {
        if (searcher != null) {
            searcher.close();
        }
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TopDocs(org.apache.lucene.search.TopDocs) Searcher(org.apache.lucene.search.Searcher) IndexSearcher(org.apache.lucene.search.IndexSearcher) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc)

Example 25 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project jackrabbit-oak by apache.

the class FilteredSortedSetDocValuesFacetCounts method getTopChildren.

@Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
    FacetResult topChildren = super.getTopChildren(topN, dim, path);
    LabelAndValue[] labelAndValues = topChildren.labelValues;
    for (ScoreDoc scoreDoc : docs.scoreDocs) {
        labelAndValues = filterFacet(scoreDoc.doc, dim, labelAndValues);
    }
    int childCount = labelAndValues.length;
    Number value = 0;
    for (LabelAndValue lv : labelAndValues) {
        value = value.longValue() + lv.value.longValue();
    }
    return new FacetResult(dim, path, value, labelAndValues, childCount);
}
Also used : FacetResult(org.apache.lucene.facet.FacetResult) LabelAndValue(org.apache.lucene.facet.LabelAndValue) ScoreDoc(org.apache.lucene.search.ScoreDoc)

Aggregations

ScoreDoc (org.apache.lucene.search.ScoreDoc)211 TopDocs (org.apache.lucene.search.TopDocs)119 IndexSearcher (org.apache.lucene.search.IndexSearcher)94 Document (org.apache.lucene.document.Document)89 Query (org.apache.lucene.search.Query)65 TermQuery (org.apache.lucene.search.TermQuery)49 ArrayList (java.util.ArrayList)46 IOException (java.io.IOException)44 IndexReader (org.apache.lucene.index.IndexReader)42 Term (org.apache.lucene.index.Term)38 Directory (org.apache.lucene.store.Directory)37 BooleanQuery (org.apache.lucene.search.BooleanQuery)26 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)23 Sort (org.apache.lucene.search.Sort)22 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)21 Test (org.junit.Test)21 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)20 FieldDoc (org.apache.lucene.search.FieldDoc)20 HashMap (java.util.HashMap)18 HashSet (java.util.HashSet)17