use of io.anserini.rerank.RerankerContext in project Anserini by castorini.
the class SearchCollection method searchTweets.
public <K> ScoredDocuments searchTweets(IndexSearcher searcher, K qid, String queryString, long t, RerankerCascade cascade, ScoredDocuments queryQrels, boolean hasRelDocs) throws IOException {
Query keywordQuery;
if (args.sdm) {
keywordQuery = new SdmQueryGenerator(args.sdm_tw, args.sdm_ow, args.sdm_uw).buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
} else {
try {
QueryGenerator generator = (QueryGenerator) Class.forName("io.anserini.search.query." + args.queryGenerator).getConstructor().newInstance();
keywordQuery = generator.buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
} catch (Exception e) {
e.printStackTrace();
throw new IllegalArgumentException("Unable to load QueryGenerator: " + args.topicReader);
}
}
List<String> queryTokens = AnalyzerUtils.analyze(analyzer, queryString);
// Do not consider the tweets with tweet ids that are beyond the queryTweetTime
// <querytweettime> tag contains the timestamp of the query in terms of the
// chronologically nearest tweet id within the corpus
Query filter = LongPoint.newRangeQuery(TweetGenerator.TweetField.ID_LONG.name, 0L, t);
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(filter, BooleanClause.Occur.FILTER);
builder.add(keywordQuery, BooleanClause.Occur.MUST);
Query compositeQuery = builder.build();
TopDocs rs = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {});
if (!isRerank || (args.rerankcutoff > 0 && args.rf_qrels == null) || (args.rf_qrels != null && !hasRelDocs)) {
if (args.arbitraryScoreTieBreak) {
// Figure out how to break the scoring ties.
rs = searcher.search(compositeQuery, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits);
} else {
rs = searcher.search(compositeQuery, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits, BREAK_SCORE_TIES_BY_TWEETID, true);
}
}
RerankerContext context = new RerankerContext<>(searcher, qid, keywordQuery, null, queryString, queryTokens, filter, args);
ScoredDocuments scoredFbDocs;
if (isRerank && args.rf_qrels != null) {
if (hasRelDocs) {
scoredFbDocs = queryQrels;
} else {
// if no relevant documents, only perform score based tie breaking next
scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
cascade = new RerankerCascade();
cascade.add(new ScoreTiesAdjusterReranker());
}
} else {
scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
}
return cascade.run(scoredFbDocs, context);
}
use of io.anserini.rerank.RerankerContext in project Anserini by castorini.
the class SearchCollection method search.
public <K> ScoredDocuments search(IndexSearcher searcher, K qid, String queryString, RerankerCascade cascade, ScoredDocuments queryQrels, boolean hasRelDocs) throws IOException {
Query query = null;
if (args.sdm) {
query = new SdmQueryGenerator(args.sdm_tw, args.sdm_ow, args.sdm_uw).buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
} else {
QueryGenerator generator;
try {
generator = (QueryGenerator) Class.forName("io.anserini.search.query." + args.queryGenerator).getConstructor().newInstance();
} catch (Exception e) {
e.printStackTrace();
throw new IllegalArgumentException("Unable to load QueryGenerator: " + args.topicReader);
}
query = generator.buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
}
TopDocs rs = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {});
if (!isRerank || (args.rerankcutoff > 0 && args.rf_qrels == null) || (args.rf_qrels != null && !hasRelDocs)) {
if (args.arbitraryScoreTieBreak) {
// Figure out how to break the scoring ties.
rs = searcher.search(query, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits);
} else {
rs = searcher.search(query, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits, BREAK_SCORE_TIES_BY_DOCID, true);
}
}
List<String> queryTokens = AnalyzerUtils.analyze(analyzer, queryString);
queries.put(qid.toString(), queryTokens);
RerankerContext context = new RerankerContext<>(searcher, qid, query, null, queryString, queryTokens, null, args);
ScoredDocuments scoredFbDocs;
if (isRerank && args.rf_qrels != null) {
if (hasRelDocs) {
scoredFbDocs = queryQrels;
} else {
// if no relevant documents, only perform score based tie breaking next
LOG.info("No relevant documents for " + qid.toString());
scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
cascade = new RerankerCascade();
cascade.add(new ScoreTiesAdjusterReranker());
}
} else {
scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
}
return cascade.run(scoredFbDocs, context);
}
use of io.anserini.rerank.RerankerContext in project Anserini by castorini.
the class AxiomReranker method processExternalContext.
/**
* If the external reranking context is not null we will first search against the external
* index and return the top ranked documents.
*
* @param docs The initial ranking results against target index. We will return them if external
* index is null.
*
* @return Top ranked ScoredDocuments from searching external index
*/
private ScoredDocuments processExternalContext(ScoredDocuments docs, RerankerContext<T> context) throws IOException {
if (this.externalIndexPath != null) {
Path indexPath = Paths.get(this.externalIndexPath);
if (!Files.exists(indexPath) || !Files.isDirectory(indexPath) || !Files.isReadable(indexPath)) {
throw new IllegalArgumentException(this.externalIndexPath + " does not exist or is not a directory.");
}
IndexReader reader = DirectoryReader.open(FSDirectory.open(indexPath));
IndexSearcher searcher = new IndexSearcher(reader);
searcher.setSimilarity(context.getIndexSearcher().getSimilarity());
SearchArgs args = new SearchArgs();
args.hits = this.R;
args.arbitraryScoreTieBreak = context.getSearchArgs().arbitraryScoreTieBreak;
args.searchtweets = context.getSearchArgs().searchtweets;
RerankerContext<T> externalContext = new RerankerContext<>(searcher, context.getQueryId(), context.getQuery(), context.getQueryDocId(), context.getQueryText(), context.getQueryTokens(), context.getFilter(), args);
return searchTopDocs(null, externalContext);
} else {
return docs;
}
}
use of io.anserini.rerank.RerankerContext in project Anserini by castorini.
the class SimpleImpactSearcher method _search.
// internal implementation
protected Result[] _search(Query query, int k) throws IOException {
// Create an IndexSearch only once. Note that the object is thread safe.
if (searcher == null) {
searcher = new IndexSearcher(reader);
searcher.setSimilarity(similarity);
}
SearchArgs searchArgs = new SearchArgs();
searchArgs.arbitraryScoreTieBreak = false;
searchArgs.hits = k;
TopDocs rs;
RerankerContext context;
rs = searcher.search(query, k, BREAK_SCORE_TIES_BY_DOCID, true);
context = new RerankerContext<>(searcher, null, query, null, null, null, null, searchArgs);
ScoredDocuments hits = cascade.run(ScoredDocuments.fromTopDocs(rs, searcher), context);
Result[] results = new Result[hits.ids.length];
for (int i = 0; i < hits.ids.length; i++) {
Document doc = hits.documents[i];
String docid = doc.getField(IndexArgs.ID).stringValue();
IndexableField field;
field = doc.getField(IndexArgs.CONTENTS);
String contents = field == null ? null : field.stringValue();
field = doc.getField(IndexArgs.RAW);
String raw = field == null ? null : field.stringValue();
results[i] = new Result(docid, hits.ids[i], hits.scores[i], contents, raw, doc);
}
return results;
}
use of io.anserini.rerank.RerankerContext in project Anserini by castorini.
the class SimpleTweetSearcher method searchTweets.
protected Result[] searchTweets(Query query, List<String> queryTokens, String queryString, int k, long t) throws IOException {
// Create an IndexSearch only once. Note that the object is thread safe.
if (searcher == null) {
searcher = new IndexSearcher(reader);
searcher.setSimilarity(similarity);
}
SearchArgs searchArgs = new SearchArgs();
searchArgs.arbitraryScoreTieBreak = false;
searchArgs.hits = k;
searchArgs.searchtweets = true;
TopDocs rs;
RerankerContext context;
// Do not consider the tweets with tweet ids that are beyond the queryTweetTime
// <querytweettime> tag contains the timestamp of the query in terms of the
// chronologically nearest tweet id within the corpus
Query filter = LongPoint.newRangeQuery(TweetGenerator.TweetField.ID_LONG.name, 0L, t);
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(filter, BooleanClause.Occur.FILTER);
builder.add(query, BooleanClause.Occur.MUST);
Query compositeQuery = builder.build();
rs = searcher.search(compositeQuery, useRM3 ? searchArgs.rerankcutoff : k, BREAK_SCORE_TIES_BY_TWEETID, true);
context = new RerankerContext<>(searcher, null, compositeQuery, null, queryString, queryTokens, filter, searchArgs);
ScoredDocuments hits = cascade.run(ScoredDocuments.fromTopDocs(rs, searcher), context);
Result[] results = new Result[hits.ids.length];
for (int i = 0; i < hits.ids.length; i++) {
Document doc = hits.documents[i];
String docid = doc.getField(IndexArgs.ID).stringValue();
IndexableField field;
field = doc.getField(IndexArgs.CONTENTS);
String contents = field == null ? null : field.stringValue();
field = doc.getField(IndexArgs.RAW);
String raw = field == null ? null : field.stringValue();
results[i] = new Result(docid, hits.ids[i], hits.scores[i], contents, raw, doc);
}
return results;
}
Aggregations