Search in sources :

Example 1 with TopScoreDocCollector

use of org.apache.lucene.search.TopScoreDocCollector in project camel by apache.

the class LuceneSearcher method doSearch.

private int doSearch(String searchPhrase, int maxNumberOfHits, Version luceneVersion) throws NullPointerException, ParseException, IOException {
    LOG.trace("*** Search Phrase: {} ***", searchPhrase);
    QueryParser parser = new QueryParser("contents", analyzer);
    Query query = parser.parse(searchPhrase);
    TopScoreDocCollector collector = TopScoreDocCollector.create(maxNumberOfHits);
    indexSearcher.search(query, collector);
    hits = collector.topDocs().scoreDocs;
    LOG.trace("*** Search generated {} hits ***", hits.length);
    return hits.length;
}
Also used : QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Query(org.apache.lucene.search.Query) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector)

Example 2 with TopScoreDocCollector

use of org.apache.lucene.search.TopScoreDocCollector in project lucene-solr by apache.

the class TestJoinUtil method executeRandomJoin.

private void executeRandomJoin(boolean multipleValuesPerDocument, int maxIndexIter, int maxSearchIter, int numberOfDocumentsToIndex) throws Exception {
    for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++) {
        if (VERBOSE) {
            System.out.println("indexIter=" + indexIter);
        }
        IndexIterationContext context = createContext(numberOfDocumentsToIndex, multipleValuesPerDocument, false);
        IndexSearcher indexSearcher = context.searcher;
        for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++) {
            if (VERBOSE) {
                System.out.println("searchIter=" + searchIter);
            }
            int r = random().nextInt(context.randomUniqueValues.length);
            boolean from = context.randomFrom[r];
            String randomValue = context.randomUniqueValues[r];
            BitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context);
            final Query actualQuery = new TermQuery(new Term("value", randomValue));
            if (VERBOSE) {
                System.out.println("actualQuery=" + actualQuery);
            }
            final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
            if (VERBOSE) {
                System.out.println("scoreMode=" + scoreMode);
            }
            final Query joinQuery;
            {
                // single val can be handled by multiple-vals
                final boolean muliValsQuery = multipleValuesPerDocument || random().nextBoolean();
                final String fromField = from ? "from" : "to";
                final String toField = from ? "to" : "from";
                int surpriseMe = random().nextInt(2);
                switch(surpriseMe) {
                    case 0:
                        Class<? extends Number> numType;
                        String suffix;
                        if (random().nextBoolean()) {
                            numType = Integer.class;
                            suffix = "INT";
                        } else if (random().nextBoolean()) {
                            numType = Float.class;
                            suffix = "FLOAT";
                        } else if (random().nextBoolean()) {
                            numType = Long.class;
                            suffix = "LONG";
                        } else {
                            numType = Double.class;
                            suffix = "DOUBLE";
                        }
                        joinQuery = JoinUtil.createJoinQuery(fromField + suffix, muliValsQuery, toField + suffix, numType, actualQuery, indexSearcher, scoreMode);
                        break;
                    case 1:
                        joinQuery = JoinUtil.createJoinQuery(fromField, muliValsQuery, toField, actualQuery, indexSearcher, scoreMode);
                        break;
                    default:
                        throw new RuntimeException("unexpected value " + surpriseMe);
                }
            }
            if (VERBOSE) {
                System.out.println("joinQuery=" + joinQuery);
            }
            // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
            final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
            final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10);
            indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector));
            // Asserting bit set...
            assertBitSet(expectedResult, actualResult, indexSearcher);
            // Asserting TopDocs...
            TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
            TopDocs actualTopDocs = topScoreDocCollector.topDocs();
            assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery);
        }
        context.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) FieldValueQuery(org.apache.lucene.search.FieldValueQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) BitSet(org.apache.lucene.util.BitSet) FixedBitSet(org.apache.lucene.util.FixedBitSet) Term(org.apache.lucene.index.Term) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) TopDocs(org.apache.lucene.search.TopDocs) FixedBitSet(org.apache.lucene.util.FixedBitSet)

Example 3 with TopScoreDocCollector

use of org.apache.lucene.search.TopScoreDocCollector in project Anserini by castorini.

the class TweetSearcherAPI method search.

@POST
@Path("search")
@Produces(MediaType.APPLICATION_JSON)
public List<SearchResult> search(SearchAPIQuery query) {
    try {
        Query q = new QueryParser(TweetStreamIndexer.StatusField.TEXT.name, TweetSearcher.ANALYZER).parse(query.getQuery());
        try {
            reader = DirectoryReader.open(TweetSearcher.indexWriter, true, true);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) reader, TweetSearcher.indexWriter, true);
        if (newReader != null) {
            reader.close();
            reader = newReader;
        }
        IndexSearcher searcher = new IndexSearcher(reader);
        int topN = query.getCount();
        TopScoreDocCollector collector = TopScoreDocCollector.create(topN);
        searcher.search(q, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        List<SearchResult> resultHits = new ArrayList<>();
        for (int i = 0; i < hits.length && i < topN; ++i) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            resultHits.add(new SearchResult(String.valueOf(d.get(TweetStreamIndexer.StatusField.ID.name))));
        }
        return resultHits;
    } catch (Exception e) {
        e.printStackTrace();
        return new ArrayList<>();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) IOException(java.io.IOException) ScoreDoc(org.apache.lucene.search.ScoreDoc) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) IndexReader(org.apache.lucene.index.IndexReader) Path(javax.ws.rs.Path) POST(javax.ws.rs.POST) Produces(javax.ws.rs.Produces)

Example 4 with TopScoreDocCollector

use of org.apache.lucene.search.TopScoreDocCollector in project Anserini by castorini.

the class TweetServlet method doGet.

@Override
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    if (request.getRequestURI().equals("/search")) {
        response.setStatus(HttpServletResponse.SC_OK);
        response.setContentType("text/html");
        request.setCharacterEncoding("UTF-8");
        Query q;
        try {
            q = new QueryParser(StatusField.TEXT.name, TweetSearcher.ANALYZER).parse(request.getParameter("query"));
            try {
                reader = DirectoryReader.open(TweetSearcher.indexWriter, true, true);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) reader, TweetSearcher.indexWriter, true);
            if (newReader != null) {
                reader.close();
                reader = newReader;
            }
            IndexSearcher searcher = new IndexSearcher(reader);
            int topN;
            if (request.getParameter("top") != null) {
                topN = Integer.parseInt(request.getParameter("top"));
            } else {
                // TODO configurable, default(parameter unspecified in url) topN = 20
                topN = 20;
            }
            TopScoreDocCollector collector = TopScoreDocCollector.create(topN);
            searcher.search(q, collector);
            ScoreDoc[] hits = collector.topDocs().scoreDocs;
            TweetHits tweetHits = new TweetHits(request.getParameter("query"), hits.length);
            for (int i = 0; i < hits.length; ++i) {
                int docId = hits[i].doc;
                Document d = searcher.doc(docId);
                tweetHits.addHit(i, String.valueOf(d.get(StatusField.ID.name)));
            }
            MustacheFactory mf = new DefaultMustacheFactory();
            Mustache mustache = mf.compile(MustacheTemplatePath);
            mustache.execute(response.getWriter(), tweetHits).flush();
        } catch (ParseException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    } else {
        response.setStatus(HttpServletResponse.SC_NOT_FOUND);
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) DefaultMustacheFactory(com.github.mustachejava.DefaultMustacheFactory) Mustache(com.github.mustachejava.Mustache) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) DefaultMustacheFactory(com.github.mustachejava.DefaultMustacheFactory) MustacheFactory(com.github.mustachejava.MustacheFactory) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) IndexReader(org.apache.lucene.index.IndexReader) ParseException(org.apache.lucene.queryparser.classic.ParseException)

Example 5 with TopScoreDocCollector

use of org.apache.lucene.search.TopScoreDocCollector in project gitblit by gitblit.

the class TicketIndexer method searchFor.

/**
 * Search for tickets matching the query.  The returned tickets are
 * shadows of the real ticket, but suitable for a results list.
 *
 * @param repository
 * @param text
 * @param page
 * @param pageSize
 * @return search results
 */
public List<QueryResult> searchFor(RepositoryModel repository, String text, int page, int pageSize) {
    if (StringUtils.isEmpty(text)) {
        return Collections.emptyList();
    }
    Set<QueryResult> results = new LinkedHashSet<QueryResult>();
    StandardAnalyzer analyzer = new StandardAnalyzer();
    try {
        // search the title, description and content
        BooleanQuery.Builder bldr = new BooleanQuery.Builder();
        QueryParser qp;
        qp = new QueryParser(Lucene.title.name(), analyzer);
        qp.setAllowLeadingWildcard(true);
        bldr.add(qp.parse(text), Occur.SHOULD);
        qp = new QueryParser(Lucene.body.name(), analyzer);
        qp.setAllowLeadingWildcard(true);
        bldr.add(qp.parse(text), Occur.SHOULD);
        qp = new QueryParser(Lucene.content.name(), analyzer);
        qp.setAllowLeadingWildcard(true);
        bldr.add(qp.parse(text), Occur.SHOULD);
        IndexSearcher searcher = getSearcher();
        Query rewrittenQuery = searcher.rewrite(bldr.build());
        log.debug(rewrittenQuery.toString());
        TopScoreDocCollector collector = TopScoreDocCollector.create(5000);
        searcher.search(rewrittenQuery, collector);
        int offset = Math.max(0, (page - 1) * pageSize);
        ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
        for (int i = 0; i < hits.length; i++) {
            int docId = hits[i].doc;
            Document doc = searcher.doc(docId);
            QueryResult result = docToQueryResult(doc);
            if (repository != null) {
                if (!result.repository.equalsIgnoreCase(repository.name)) {
                    continue;
                }
            }
            results.add(result);
        }
    } catch (Exception e) {
        log.error(MessageFormat.format("Exception while searching for {0}", text), e);
    }
    return new ArrayList<QueryResult>(results);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) BooleanQuery(org.apache.lucene.search.BooleanQuery) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) ParseException(java.text.ParseException) IOException(java.io.IOException) ScoreDoc(org.apache.lucene.search.ScoreDoc) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer)

Aggregations

TopScoreDocCollector (org.apache.lucene.search.TopScoreDocCollector)15 IndexSearcher (org.apache.lucene.search.IndexSearcher)11 Query (org.apache.lucene.search.Query)9 ScoreDoc (org.apache.lucene.search.ScoreDoc)8 Document (org.apache.lucene.document.Document)7 IOException (java.io.IOException)6 QueryParser (org.apache.lucene.queryparser.classic.QueryParser)6 TopDocs (org.apache.lucene.search.TopDocs)5 Term (org.apache.lucene.index.Term)4 BooleanQuery (org.apache.lucene.search.BooleanQuery)4 ArrayList (java.util.ArrayList)3 Analyzer (org.apache.lucene.analysis.Analyzer)3 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)3 TermQuery (org.apache.lucene.search.TermQuery)3 FakeWordsEncoderAnalyzer (io.anserini.ann.fw.FakeWordsEncoderAnalyzer)2 LexicalLshAnalyzer (io.anserini.ann.lexlsh.LexicalLshAnalyzer)2 Path (java.nio.file.Path)2 ParseException (java.text.ParseException)2 LinkedHashSet (java.util.LinkedHashSet)2 LinkedList (java.util.LinkedList)2