use of org.apache.lucene.search.TopScoreDocCollector in project camel by apache.
the class LuceneSearcher method doSearch.
private int doSearch(String searchPhrase, int maxNumberOfHits, Version luceneVersion) throws NullPointerException, ParseException, IOException {
LOG.trace("*** Search Phrase: {} ***", searchPhrase);
QueryParser parser = new QueryParser("contents", analyzer);
Query query = parser.parse(searchPhrase);
TopScoreDocCollector collector = TopScoreDocCollector.create(maxNumberOfHits);
indexSearcher.search(query, collector);
hits = collector.topDocs().scoreDocs;
LOG.trace("*** Search generated {} hits ***", hits.length);
return hits.length;
}
use of org.apache.lucene.search.TopScoreDocCollector in project lucene-solr by apache.
the class TestJoinUtil method executeRandomJoin.
private void executeRandomJoin(boolean multipleValuesPerDocument, int maxIndexIter, int maxSearchIter, int numberOfDocumentsToIndex) throws Exception {
for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++) {
if (VERBOSE) {
System.out.println("indexIter=" + indexIter);
}
IndexIterationContext context = createContext(numberOfDocumentsToIndex, multipleValuesPerDocument, false);
IndexSearcher indexSearcher = context.searcher;
for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++) {
if (VERBOSE) {
System.out.println("searchIter=" + searchIter);
}
int r = random().nextInt(context.randomUniqueValues.length);
boolean from = context.randomFrom[r];
String randomValue = context.randomUniqueValues[r];
BitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context);
final Query actualQuery = new TermQuery(new Term("value", randomValue));
if (VERBOSE) {
System.out.println("actualQuery=" + actualQuery);
}
final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
if (VERBOSE) {
System.out.println("scoreMode=" + scoreMode);
}
final Query joinQuery;
{
// single val can be handled by multiple-vals
final boolean muliValsQuery = multipleValuesPerDocument || random().nextBoolean();
final String fromField = from ? "from" : "to";
final String toField = from ? "to" : "from";
int surpriseMe = random().nextInt(2);
switch(surpriseMe) {
case 0:
Class<? extends Number> numType;
String suffix;
if (random().nextBoolean()) {
numType = Integer.class;
suffix = "INT";
} else if (random().nextBoolean()) {
numType = Float.class;
suffix = "FLOAT";
} else if (random().nextBoolean()) {
numType = Long.class;
suffix = "LONG";
} else {
numType = Double.class;
suffix = "DOUBLE";
}
joinQuery = JoinUtil.createJoinQuery(fromField + suffix, muliValsQuery, toField + suffix, numType, actualQuery, indexSearcher, scoreMode);
break;
case 1:
joinQuery = JoinUtil.createJoinQuery(fromField, muliValsQuery, toField, actualQuery, indexSearcher, scoreMode);
break;
default:
throw new RuntimeException("unexpected value " + surpriseMe);
}
}
if (VERBOSE) {
System.out.println("joinQuery=" + joinQuery);
}
// Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10);
indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector));
// Asserting bit set...
assertBitSet(expectedResult, actualResult, indexSearcher);
// Asserting TopDocs...
TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
TopDocs actualTopDocs = topScoreDocCollector.topDocs();
assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery);
}
context.close();
}
}
use of org.apache.lucene.search.TopScoreDocCollector in project Anserini by castorini.
the class TweetSearcherAPI method search.
@POST
@Path("search")
@Produces(MediaType.APPLICATION_JSON)
public List<SearchResult> search(SearchAPIQuery query) {
try {
Query q = new QueryParser(TweetStreamIndexer.StatusField.TEXT.name, TweetSearcher.ANALYZER).parse(query.getQuery());
try {
reader = DirectoryReader.open(TweetSearcher.indexWriter, true, true);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) reader, TweetSearcher.indexWriter, true);
if (newReader != null) {
reader.close();
reader = newReader;
}
IndexSearcher searcher = new IndexSearcher(reader);
int topN = query.getCount();
TopScoreDocCollector collector = TopScoreDocCollector.create(topN);
searcher.search(q, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
List<SearchResult> resultHits = new ArrayList<>();
for (int i = 0; i < hits.length && i < topN; ++i) {
int docId = hits[i].doc;
Document d = searcher.doc(docId);
resultHits.add(new SearchResult(String.valueOf(d.get(TweetStreamIndexer.StatusField.ID.name))));
}
return resultHits;
} catch (Exception e) {
e.printStackTrace();
return new ArrayList<>();
}
}
use of org.apache.lucene.search.TopScoreDocCollector in project Anserini by castorini.
the class TweetServlet method doGet.
@Override
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
if (request.getRequestURI().equals("/search")) {
response.setStatus(HttpServletResponse.SC_OK);
response.setContentType("text/html");
request.setCharacterEncoding("UTF-8");
Query q;
try {
q = new QueryParser(StatusField.TEXT.name, TweetSearcher.ANALYZER).parse(request.getParameter("query"));
try {
reader = DirectoryReader.open(TweetSearcher.indexWriter, true, true);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) reader, TweetSearcher.indexWriter, true);
if (newReader != null) {
reader.close();
reader = newReader;
}
IndexSearcher searcher = new IndexSearcher(reader);
int topN;
if (request.getParameter("top") != null) {
topN = Integer.parseInt(request.getParameter("top"));
} else {
// TODO configurable, default(parameter unspecified in url) topN = 20
topN = 20;
}
TopScoreDocCollector collector = TopScoreDocCollector.create(topN);
searcher.search(q, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
TweetHits tweetHits = new TweetHits(request.getParameter("query"), hits.length);
for (int i = 0; i < hits.length; ++i) {
int docId = hits[i].doc;
Document d = searcher.doc(docId);
tweetHits.addHit(i, String.valueOf(d.get(StatusField.ID.name)));
}
MustacheFactory mf = new DefaultMustacheFactory();
Mustache mustache = mf.compile(MustacheTemplatePath);
mustache.execute(response.getWriter(), tweetHits).flush();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} else {
response.setStatus(HttpServletResponse.SC_NOT_FOUND);
}
}
use of org.apache.lucene.search.TopScoreDocCollector in project gitblit by gitblit.
the class TicketIndexer method searchFor.
/**
* Search for tickets matching the query. The returned tickets are
* shadows of the real ticket, but suitable for a results list.
*
* @param repository
* @param text
* @param page
* @param pageSize
* @return search results
*/
public List<QueryResult> searchFor(RepositoryModel repository, String text, int page, int pageSize) {
if (StringUtils.isEmpty(text)) {
return Collections.emptyList();
}
Set<QueryResult> results = new LinkedHashSet<QueryResult>();
StandardAnalyzer analyzer = new StandardAnalyzer();
try {
// search the title, description and content
BooleanQuery.Builder bldr = new BooleanQuery.Builder();
QueryParser qp;
qp = new QueryParser(Lucene.title.name(), analyzer);
qp.setAllowLeadingWildcard(true);
bldr.add(qp.parse(text), Occur.SHOULD);
qp = new QueryParser(Lucene.body.name(), analyzer);
qp.setAllowLeadingWildcard(true);
bldr.add(qp.parse(text), Occur.SHOULD);
qp = new QueryParser(Lucene.content.name(), analyzer);
qp.setAllowLeadingWildcard(true);
bldr.add(qp.parse(text), Occur.SHOULD);
IndexSearcher searcher = getSearcher();
Query rewrittenQuery = searcher.rewrite(bldr.build());
log.debug(rewrittenQuery.toString());
TopScoreDocCollector collector = TopScoreDocCollector.create(5000);
searcher.search(rewrittenQuery, collector);
int offset = Math.max(0, (page - 1) * pageSize);
ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
for (int i = 0; i < hits.length; i++) {
int docId = hits[i].doc;
Document doc = searcher.doc(docId);
QueryResult result = docToQueryResult(doc);
if (repository != null) {
if (!result.repository.equalsIgnoreCase(repository.name)) {
continue;
}
}
results.add(result);
}
} catch (Exception e) {
log.error(MessageFormat.format("Exception while searching for {0}", text), e);
}
return new ArrayList<QueryResult>(results);
}
Aggregations