Search in sources :

Example 16 with ScoredDocuments

use of io.anserini.rerank.ScoredDocuments in project Anserini by castorini.

the class SimpleGeoSearcher method searchGeo.

public Result[] searchGeo(Query query, int k) throws IOException {
    if (searcher == null) {
        searcher = new IndexSearcher(reader);
    }
    TopDocs rs = searcher.search(query, k);
    ScoredDocuments hits = ScoredDocuments.fromTopDocs(rs, searcher);
    Result[] results = new Result[hits.ids.length];
    for (int i = 0; i < hits.ids.length; i++) {
        Document doc = hits.documents[i];
        String docId = doc.getField(IndexArgs.ID).stringValue();
        IndexableField field;
        field = doc.getField(IndexArgs.CONTENTS);
        String contents = field == null ? null : field.stringValue();
        field = doc.getField(IndexArgs.RAW);
        String raw = field == null ? null : field.stringValue();
        results[i] = new Result(docId, hits.ids[i], hits.scores[i], contents, raw, doc);
    }
    return results;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TopDocs(org.apache.lucene.search.TopDocs) IndexableField(org.apache.lucene.index.IndexableField) ScoredDocuments(io.anserini.rerank.ScoredDocuments) Document(org.apache.lucene.document.Document)

Example 17 with ScoredDocuments

use of io.anserini.rerank.ScoredDocuments in project Anserini by castorini.

the class SimpleImpactSearcher method _search.

// internal implementation
protected Result[] _search(Query query, int k) throws IOException {
    // Create an IndexSearch only once. Note that the object is thread safe.
    if (searcher == null) {
        searcher = new IndexSearcher(reader);
        searcher.setSimilarity(similarity);
    }
    SearchArgs searchArgs = new SearchArgs();
    searchArgs.arbitraryScoreTieBreak = false;
    searchArgs.hits = k;
    TopDocs rs;
    RerankerContext context;
    rs = searcher.search(query, k, BREAK_SCORE_TIES_BY_DOCID, true);
    context = new RerankerContext<>(searcher, null, query, null, null, null, null, searchArgs);
    ScoredDocuments hits = cascade.run(ScoredDocuments.fromTopDocs(rs, searcher), context);
    Result[] results = new Result[hits.ids.length];
    for (int i = 0; i < hits.ids.length; i++) {
        Document doc = hits.documents[i];
        String docid = doc.getField(IndexArgs.ID).stringValue();
        IndexableField field;
        field = doc.getField(IndexArgs.CONTENTS);
        String contents = field == null ? null : field.stringValue();
        field = doc.getField(IndexArgs.RAW);
        String raw = field == null ? null : field.stringValue();
        results[i] = new Result(docid, hits.ids[i], hits.scores[i], contents, raw, doc);
    }
    return results;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TopDocs(org.apache.lucene.search.TopDocs) IndexableField(org.apache.lucene.index.IndexableField) ScoredDocuments(io.anserini.rerank.ScoredDocuments) Document(org.apache.lucene.document.Document) RerankerContext(io.anserini.rerank.RerankerContext)

Example 18 with ScoredDocuments

use of io.anserini.rerank.ScoredDocuments in project Anserini by castorini.

the class SimpleTweetSearcher method searchTweets.

protected Result[] searchTweets(Query query, List<String> queryTokens, String queryString, int k, long t) throws IOException {
    // Create an IndexSearch only once. Note that the object is thread safe.
    if (searcher == null) {
        searcher = new IndexSearcher(reader);
        searcher.setSimilarity(similarity);
    }
    SearchArgs searchArgs = new SearchArgs();
    searchArgs.arbitraryScoreTieBreak = false;
    searchArgs.hits = k;
    searchArgs.searchtweets = true;
    TopDocs rs;
    RerankerContext context;
    // Do not consider the tweets with tweet ids that are beyond the queryTweetTime
    // <querytweettime> tag contains the timestamp of the query in terms of the
    // chronologically nearest tweet id within the corpus
    Query filter = LongPoint.newRangeQuery(TweetGenerator.TweetField.ID_LONG.name, 0L, t);
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(filter, BooleanClause.Occur.FILTER);
    builder.add(query, BooleanClause.Occur.MUST);
    Query compositeQuery = builder.build();
    rs = searcher.search(compositeQuery, useRM3 ? searchArgs.rerankcutoff : k, BREAK_SCORE_TIES_BY_TWEETID, true);
    context = new RerankerContext<>(searcher, null, compositeQuery, null, queryString, queryTokens, filter, searchArgs);
    ScoredDocuments hits = cascade.run(ScoredDocuments.fromTopDocs(rs, searcher), context);
    Result[] results = new Result[hits.ids.length];
    for (int i = 0; i < hits.ids.length; i++) {
        Document doc = hits.documents[i];
        String docid = doc.getField(IndexArgs.ID).stringValue();
        IndexableField field;
        field = doc.getField(IndexArgs.CONTENTS);
        String contents = field == null ? null : field.stringValue();
        field = doc.getField(IndexArgs.RAW);
        String raw = field == null ? null : field.stringValue();
        results[i] = new Result(docid, hits.ids[i], hits.scores[i], contents, raw, doc);
    }
    return results;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) BooleanQuery(org.apache.lucene.search.BooleanQuery) ScoredDocuments(io.anserini.rerank.ScoredDocuments) Document(org.apache.lucene.document.Document) LongPoint(org.apache.lucene.document.LongPoint) TopDocs(org.apache.lucene.search.TopDocs) IndexableField(org.apache.lucene.index.IndexableField) RerankerContext(io.anserini.rerank.RerankerContext)

Aggregations

ScoredDocuments (io.anserini.rerank.ScoredDocuments)18 TopDocs (org.apache.lucene.search.TopDocs)15 Query (org.apache.lucene.search.Query)12 IndexSearcher (org.apache.lucene.search.IndexSearcher)11 RerankerContext (io.anserini.rerank.RerankerContext)9 QueryParser (org.apache.lucene.queryparser.classic.QueryParser)6 Document (org.apache.lucene.document.Document)5 SimpleAnalyzer (org.apache.lucene.analysis.core.SimpleAnalyzer)4 EnglishAnalyzer (org.apache.lucene.analysis.en.EnglishAnalyzer)4 IndexableField (org.apache.lucene.index.IndexableField)4 MultiFieldQueryParser (org.apache.lucene.queryparser.classic.MultiFieldQueryParser)4 BooleanQuery (org.apache.lucene.search.BooleanQuery)4 RerankerCascade (io.anserini.rerank.RerankerCascade)3 ArrayList (java.util.ArrayList)3 QueryNodeException (org.apache.lucene.queryparser.flexible.core.QueryNodeException)3 ScoreDoc (org.apache.lucene.search.ScoreDoc)3 TermInSetQuery (org.apache.lucene.search.TermInSetQuery)3 BM25Similarity (org.apache.lucene.search.similarities.BM25Similarity)3 Similarity (org.apache.lucene.search.similarities.Similarity)3 ScoreTiesAdjusterReranker (io.anserini.rerank.lib.ScoreTiesAdjusterReranker)2