use of org.apache.lucene.search.TopScoreDocCollector in project gitblit by gitblit.
the class LuceneService method search.
/**
* Searches the specified repositories for the given text or query
*
* @param text
* if the text is null or empty, null is returned
* @param page
* the page number to retrieve. page is 1-indexed.
* @param pageSize
* the number of elements to return for this page
* @param repositories
* a list of repositories to search. if no repositories are
* specified null is returned.
* @return a list of SearchResults in order from highest to the lowest score
*
*/
public List<SearchResult> search(String text, int page, int pageSize, String... repositories) {
if (StringUtils.isEmpty(text)) {
return null;
}
if (ArrayUtils.isEmpty(repositories)) {
return null;
}
Set<SearchResult> results = new LinkedHashSet<SearchResult>();
StandardAnalyzer analyzer = new StandardAnalyzer();
try {
// default search checks summary and content
BooleanQuery.Builder bldr = new BooleanQuery.Builder();
QueryParser qp;
qp = new QueryParser(FIELD_SUMMARY, analyzer);
qp.setAllowLeadingWildcard(true);
bldr.add(qp.parse(text), Occur.SHOULD);
qp = new QueryParser(FIELD_CONTENT, analyzer);
qp.setAllowLeadingWildcard(true);
bldr.add(qp.parse(text), Occur.SHOULD);
IndexSearcher searcher;
if (repositories.length == 1) {
// single repository search
searcher = getIndexSearcher(repositories[0]);
} else {
// multiple repository search
List<IndexReader> readers = new ArrayList<IndexReader>();
for (String repository : repositories) {
IndexSearcher repositoryIndex = getIndexSearcher(repository);
readers.add(repositoryIndex.getIndexReader());
}
IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
MultiSourceReader reader = new MultiSourceReader(rdrs);
searcher = new IndexSearcher(reader);
}
BooleanQuery query = bldr.build();
Query rewrittenQuery = searcher.rewrite(query);
logger.debug(rewrittenQuery.toString());
TopScoreDocCollector collector = TopScoreDocCollector.create(5000);
searcher.search(rewrittenQuery, collector);
int offset = Math.max(0, (page - 1) * pageSize);
ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
int totalHits = collector.getTotalHits();
for (int i = 0; i < hits.length; i++) {
int docId = hits[i].doc;
Document doc = searcher.doc(docId);
SearchResult result = createSearchResult(doc, hits[i].score, offset + i + 1, totalHits);
if (repositories.length == 1) {
// single repository search
result.repository = repositories[0];
} else {
// multi-repository search
MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader();
int index = reader.getSourceIndex(docId);
result.repository = repositories[index];
}
String content = doc.get(FIELD_CONTENT);
result.fragment = getHighlightedFragment(analyzer, query, content, result);
results.add(result);
}
} catch (Exception e) {
logger.error(MessageFormat.format("Exception while searching for {0}", text), e);
}
return new ArrayList<SearchResult>(results);
}
use of org.apache.lucene.search.TopScoreDocCollector in project neo4j by neo4j.
the class DocValuesCollector method getTopDocs.
private TopDocs getTopDocs(Sort sort, int size) throws IOException {
TopDocs topDocs;
if (sort == Sort.RELEVANCE) {
TopScoreDocCollector collector = TopScoreDocCollector.create(size);
replayTo(collector);
topDocs = collector.topDocs();
} else {
TopFieldCollector collector = TopFieldCollector.create(sort, size, false, true, false);
replayTo(collector);
topDocs = collector.topDocs();
}
return topDocs;
}
use of org.apache.lucene.search.TopScoreDocCollector in project lucene-solr by apache.
the class DrillSideways method search.
/**
* Search, sorting by score, and computing
* drill down and sideways counts.
*/
public DrillSidewaysResult search(ScoreDoc after, DrillDownQuery query, int topN) throws IOException {
int limit = searcher.getIndexReader().maxDoc();
if (limit == 0) {
// the collector does not alow numHits = 0
limit = 1;
}
final int fTopN = Math.min(topN, limit);
if (executor != null) {
// We have an executor, let use the multi-threaded version
final CollectorManager<TopScoreDocCollector, TopDocs> collectorManager = new CollectorManager<TopScoreDocCollector, TopDocs>() {
@Override
public TopScoreDocCollector newCollector() throws IOException {
return TopScoreDocCollector.create(fTopN, after);
}
@Override
public TopDocs reduce(Collection<TopScoreDocCollector> collectors) throws IOException {
final TopDocs[] topDocs = new TopDocs[collectors.size()];
int pos = 0;
for (TopScoreDocCollector collector : collectors) topDocs[pos++] = collector.topDocs();
return TopDocs.merge(topN, topDocs);
}
};
ConcurrentDrillSidewaysResult<TopDocs> r = search(query, collectorManager);
return new DrillSidewaysResult(r.facets, r.collectorResult);
} else {
TopScoreDocCollector hitCollector = TopScoreDocCollector.create(topN, after);
DrillSidewaysResult r = search(query, hitCollector);
return new DrillSidewaysResult(r.facets, hitCollector.topDocs());
}
}
use of org.apache.lucene.search.TopScoreDocCollector in project lucene-solr by apache.
the class TestJoinUtil method testRandomOrdinalsJoin.
public void testRandomOrdinalsJoin() throws Exception {
IndexIterationContext context = createContext(512, false, true);
int searchIters = 10;
IndexSearcher indexSearcher = context.searcher;
for (int i = 0; i < searchIters; i++) {
if (VERBOSE) {
System.out.println("search iter=" + i);
}
int r = random().nextInt(context.randomUniqueValues.length);
boolean from = context.randomFrom[r];
String randomValue = context.randomUniqueValues[r];
BitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context);
final Query actualQuery = new TermQuery(new Term("value", randomValue));
if (VERBOSE) {
System.out.println("actualQuery=" + actualQuery);
}
final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
if (VERBOSE) {
System.out.println("scoreMode=" + scoreMode);
}
final Query joinQuery;
if (from) {
BooleanQuery.Builder fromQuery = new BooleanQuery.Builder();
fromQuery.add(new TermQuery(new Term("type", "from")), BooleanClause.Occur.FILTER);
fromQuery.add(actualQuery, BooleanClause.Occur.MUST);
Query toQuery = new TermQuery(new Term("type", "to"));
joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery.build(), toQuery, indexSearcher, scoreMode, context.ordinalMap);
} else {
BooleanQuery.Builder fromQuery = new BooleanQuery.Builder();
fromQuery.add(new TermQuery(new Term("type", "to")), BooleanClause.Occur.FILTER);
fromQuery.add(actualQuery, BooleanClause.Occur.MUST);
Query toQuery = new TermQuery(new Term("type", "from"));
joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery.build(), toQuery, indexSearcher, scoreMode, context.ordinalMap);
}
if (VERBOSE) {
System.out.println("joinQuery=" + joinQuery);
}
final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10);
indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector));
assertBitSet(expectedResult, actualResult, indexSearcher);
TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
TopDocs actualTopDocs = topScoreDocCollector.topDocs();
assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery);
}
context.close();
}
use of org.apache.lucene.search.TopScoreDocCollector in project tika by apache.
the class RecentFiles method generateRSS.
public String generateRSS(File indexFile) throws CorruptIndexException, IOException {
StringBuffer output = new StringBuffer();
output.append(getRSSHeaders());
IndexSearcher searcher = null;
try {
reader = IndexReader.open(new SimpleFSDirectory(indexFile));
searcher = new IndexSearcher(reader);
GregorianCalendar gc = new java.util.GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
gc.setTime(new Date());
String nowDateTime = ISO8601.format(gc);
gc.add(java.util.GregorianCalendar.MINUTE, -5);
String fiveMinsAgo = ISO8601.format(gc);
TermRangeQuery query = new TermRangeQuery(Metadata.DATE.toString(), fiveMinsAgo, nowDateTime, true, true);
TopScoreDocCollector collector = TopScoreDocCollector.create(20, true);
searcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
for (int i = 0; i < hits.length; i++) {
Document doc = searcher.doc(hits[i].doc);
output.append(getRSSItem(doc));
}
} finally {
if (reader != null)
reader.close();
if (searcher != null)
searcher.close();
}
output.append(getRSSFooters());
return output.toString();
}
Aggregations