Search in sources :

Example 6 with TopScoreDocCollector

use of org.apache.lucene.search.TopScoreDocCollector in project gitblit by gitblit.

the class LuceneService method search.

/**
	 * Searches the specified repositories for the given text or query
	 *
	 * @param text
	 *            if the text is null or empty, null is returned
	 * @param page
	 *            the page number to retrieve. page is 1-indexed.
	 * @param pageSize
	 *            the number of elements to return for this page
	 * @param repositories
	 *            a list of repositories to search. if no repositories are
	 *            specified null is returned.
	 * @return a list of SearchResults in order from highest to the lowest score
	 *
	 */
public List<SearchResult> search(String text, int page, int pageSize, String... repositories) {
    if (StringUtils.isEmpty(text)) {
        return null;
    }
    if (ArrayUtils.isEmpty(repositories)) {
        return null;
    }
    Set<SearchResult> results = new LinkedHashSet<SearchResult>();
    StandardAnalyzer analyzer = new StandardAnalyzer();
    try {
        // default search checks summary and content
        BooleanQuery.Builder bldr = new BooleanQuery.Builder();
        QueryParser qp;
        qp = new QueryParser(FIELD_SUMMARY, analyzer);
        qp.setAllowLeadingWildcard(true);
        bldr.add(qp.parse(text), Occur.SHOULD);
        qp = new QueryParser(FIELD_CONTENT, analyzer);
        qp.setAllowLeadingWildcard(true);
        bldr.add(qp.parse(text), Occur.SHOULD);
        IndexSearcher searcher;
        if (repositories.length == 1) {
            // single repository search
            searcher = getIndexSearcher(repositories[0]);
        } else {
            // multiple repository search
            List<IndexReader> readers = new ArrayList<IndexReader>();
            for (String repository : repositories) {
                IndexSearcher repositoryIndex = getIndexSearcher(repository);
                readers.add(repositoryIndex.getIndexReader());
            }
            IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
            MultiSourceReader reader = new MultiSourceReader(rdrs);
            searcher = new IndexSearcher(reader);
        }
        BooleanQuery query = bldr.build();
        Query rewrittenQuery = searcher.rewrite(query);
        logger.debug(rewrittenQuery.toString());
        TopScoreDocCollector collector = TopScoreDocCollector.create(5000);
        searcher.search(rewrittenQuery, collector);
        int offset = Math.max(0, (page - 1) * pageSize);
        ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
        int totalHits = collector.getTotalHits();
        for (int i = 0; i < hits.length; i++) {
            int docId = hits[i].doc;
            Document doc = searcher.doc(docId);
            SearchResult result = createSearchResult(doc, hits[i].score, offset + i + 1, totalHits);
            if (repositories.length == 1) {
                // single repository search
                result.repository = repositories[0];
            } else {
                // multi-repository search
                MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader();
                int index = reader.getSourceIndex(docId);
                result.repository = repositories[index];
            }
            String content = doc.get(FIELD_CONTENT);
            result.fragment = getHighlightedFragment(analyzer, query, content, result);
            results.add(result);
        }
    } catch (Exception e) {
        logger.error(MessageFormat.format("Exception while searching for {0}", text), e);
    }
    return new ArrayList<SearchResult>(results);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) BooleanQuery(org.apache.lucene.search.BooleanQuery) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) ArrayList(java.util.ArrayList) SearchResult(com.gitblit.models.SearchResult) Document(org.apache.lucene.document.Document) ParseException(java.text.ParseException) InvalidTokenOffsetsException(org.apache.lucene.search.highlight.InvalidTokenOffsetsException) IOException(java.io.IOException) ScoreDoc(org.apache.lucene.search.ScoreDoc) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) IndexReader(org.apache.lucene.index.IndexReader)

Example 7 with TopScoreDocCollector

use of org.apache.lucene.search.TopScoreDocCollector in project neo4j by neo4j.

the class DocValuesCollector method getTopDocs.

private TopDocs getTopDocs(Sort sort, int size) throws IOException {
    TopDocs topDocs;
    if (sort == Sort.RELEVANCE) {
        TopScoreDocCollector collector = TopScoreDocCollector.create(size);
        replayTo(collector);
        topDocs = collector.topDocs();
    } else {
        TopFieldCollector collector = TopFieldCollector.create(sort, size, false, true, false);
        replayTo(collector);
        topDocs = collector.topDocs();
    }
    return topDocs;
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) TopFieldCollector(org.apache.lucene.search.TopFieldCollector)

Example 8 with TopScoreDocCollector

use of org.apache.lucene.search.TopScoreDocCollector in project lucene-solr by apache.

the class DrillSideways method search.

/**
   * Search, sorting by score, and computing
   * drill down and sideways counts.
   */
public DrillSidewaysResult search(ScoreDoc after, DrillDownQuery query, int topN) throws IOException {
    int limit = searcher.getIndexReader().maxDoc();
    if (limit == 0) {
        // the collector does not alow numHits = 0
        limit = 1;
    }
    final int fTopN = Math.min(topN, limit);
    if (executor != null) {
        // We have an executor, let use the multi-threaded version
        final CollectorManager<TopScoreDocCollector, TopDocs> collectorManager = new CollectorManager<TopScoreDocCollector, TopDocs>() {

            @Override
            public TopScoreDocCollector newCollector() throws IOException {
                return TopScoreDocCollector.create(fTopN, after);
            }

            @Override
            public TopDocs reduce(Collection<TopScoreDocCollector> collectors) throws IOException {
                final TopDocs[] topDocs = new TopDocs[collectors.size()];
                int pos = 0;
                for (TopScoreDocCollector collector : collectors) topDocs[pos++] = collector.topDocs();
                return TopDocs.merge(topN, topDocs);
            }
        };
        ConcurrentDrillSidewaysResult<TopDocs> r = search(query, collectorManager);
        return new DrillSidewaysResult(r.facets, r.collectorResult);
    } else {
        TopScoreDocCollector hitCollector = TopScoreDocCollector.create(topN, after);
        DrillSidewaysResult r = search(query, hitCollector);
        return new DrillSidewaysResult(r.facets, hitCollector.topDocs());
    }
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) Collection(java.util.Collection) MultiCollectorManager(org.apache.lucene.search.MultiCollectorManager) CollectorManager(org.apache.lucene.search.CollectorManager)

Example 9 with TopScoreDocCollector

use of org.apache.lucene.search.TopScoreDocCollector in project lucene-solr by apache.

the class TestJoinUtil method testRandomOrdinalsJoin.

public void testRandomOrdinalsJoin() throws Exception {
    IndexIterationContext context = createContext(512, false, true);
    int searchIters = 10;
    IndexSearcher indexSearcher = context.searcher;
    for (int i = 0; i < searchIters; i++) {
        if (VERBOSE) {
            System.out.println("search iter=" + i);
        }
        int r = random().nextInt(context.randomUniqueValues.length);
        boolean from = context.randomFrom[r];
        String randomValue = context.randomUniqueValues[r];
        BitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context);
        final Query actualQuery = new TermQuery(new Term("value", randomValue));
        if (VERBOSE) {
            System.out.println("actualQuery=" + actualQuery);
        }
        final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
        if (VERBOSE) {
            System.out.println("scoreMode=" + scoreMode);
        }
        final Query joinQuery;
        if (from) {
            BooleanQuery.Builder fromQuery = new BooleanQuery.Builder();
            fromQuery.add(new TermQuery(new Term("type", "from")), BooleanClause.Occur.FILTER);
            fromQuery.add(actualQuery, BooleanClause.Occur.MUST);
            Query toQuery = new TermQuery(new Term("type", "to"));
            joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery.build(), toQuery, indexSearcher, scoreMode, context.ordinalMap);
        } else {
            BooleanQuery.Builder fromQuery = new BooleanQuery.Builder();
            fromQuery.add(new TermQuery(new Term("type", "to")), BooleanClause.Occur.FILTER);
            fromQuery.add(actualQuery, BooleanClause.Occur.MUST);
            Query toQuery = new TermQuery(new Term("type", "from"));
            joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery.build(), toQuery, indexSearcher, scoreMode, context.ordinalMap);
        }
        if (VERBOSE) {
            System.out.println("joinQuery=" + joinQuery);
        }
        final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
        final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10);
        indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector));
        assertBitSet(expectedResult, actualResult, indexSearcher);
        TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
        TopDocs actualTopDocs = topScoreDocCollector.topDocs();
        assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery);
    }
    context.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) FieldValueQuery(org.apache.lucene.search.FieldValueQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) BitSet(org.apache.lucene.util.BitSet) FixedBitSet(org.apache.lucene.util.FixedBitSet) Term(org.apache.lucene.index.Term) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) TopDocs(org.apache.lucene.search.TopDocs) FixedBitSet(org.apache.lucene.util.FixedBitSet)

Example 10 with TopScoreDocCollector

use of org.apache.lucene.search.TopScoreDocCollector in project tika by apache.

the class RecentFiles method generateRSS.

public String generateRSS(File indexFile) throws CorruptIndexException, IOException {
    StringBuffer output = new StringBuffer();
    output.append(getRSSHeaders());
    IndexSearcher searcher = null;
    try {
        reader = IndexReader.open(new SimpleFSDirectory(indexFile));
        searcher = new IndexSearcher(reader);
        GregorianCalendar gc = new java.util.GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
        gc.setTime(new Date());
        String nowDateTime = ISO8601.format(gc);
        gc.add(java.util.GregorianCalendar.MINUTE, -5);
        String fiveMinsAgo = ISO8601.format(gc);
        TermRangeQuery query = new TermRangeQuery(Metadata.DATE.toString(), fiveMinsAgo, nowDateTime, true, true);
        TopScoreDocCollector collector = TopScoreDocCollector.create(20, true);
        searcher.search(query, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        for (int i = 0; i < hits.length; i++) {
            Document doc = searcher.doc(hits[i].doc);
            output.append(getRSSItem(doc));
        }
    } finally {
        if (reader != null)
            reader.close();
        if (searcher != null)
            searcher.close();
    }
    output.append(getRSSFooters());
    return output.toString();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) GregorianCalendar(java.util.GregorianCalendar) Document(org.apache.lucene.document.Document) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory) Date(java.util.Date) ScoreDoc(org.apache.lucene.search.ScoreDoc)

Aggregations

TopScoreDocCollector (org.apache.lucene.search.TopScoreDocCollector)10 IndexSearcher (org.apache.lucene.search.IndexSearcher)7 Query (org.apache.lucene.search.Query)7 Document (org.apache.lucene.document.Document)5 QueryParser (org.apache.lucene.queryparser.classic.QueryParser)5 ScoreDoc (org.apache.lucene.search.ScoreDoc)5 IOException (java.io.IOException)4 BooleanQuery (org.apache.lucene.search.BooleanQuery)4 TopDocs (org.apache.lucene.search.TopDocs)4 ArrayList (java.util.ArrayList)3 IndexReader (org.apache.lucene.index.IndexReader)3 ParseException (java.text.ParseException)2 LinkedHashSet (java.util.LinkedHashSet)2 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)2 DoublePoint (org.apache.lucene.document.DoublePoint)2 FloatPoint (org.apache.lucene.document.FloatPoint)2 IntPoint (org.apache.lucene.document.IntPoint)2 LongPoint (org.apache.lucene.document.LongPoint)2 Term (org.apache.lucene.index.Term)2 FieldValueQuery (org.apache.lucene.search.FieldValueQuery)2