use of io.anserini.rerank.RerankerCascade in project Anserini by castorini.
the class SearchCollection method searchTweets.
public <K> ScoredDocuments searchTweets(IndexSearcher searcher, K qid, String queryString, long t, RerankerCascade cascade, ScoredDocuments queryQrels, boolean hasRelDocs) throws IOException {
Query keywordQuery;
if (args.sdm) {
keywordQuery = new SdmQueryGenerator(args.sdm_tw, args.sdm_ow, args.sdm_uw).buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
} else {
try {
QueryGenerator generator = (QueryGenerator) Class.forName("io.anserini.search.query." + args.queryGenerator).getConstructor().newInstance();
keywordQuery = generator.buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
} catch (Exception e) {
e.printStackTrace();
throw new IllegalArgumentException("Unable to load QueryGenerator: " + args.topicReader);
}
}
List<String> queryTokens = AnalyzerUtils.analyze(analyzer, queryString);
// Do not consider the tweets with tweet ids that are beyond the queryTweetTime
// <querytweettime> tag contains the timestamp of the query in terms of the
// chronologically nearest tweet id within the corpus
Query filter = LongPoint.newRangeQuery(TweetGenerator.TweetField.ID_LONG.name, 0L, t);
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(filter, BooleanClause.Occur.FILTER);
builder.add(keywordQuery, BooleanClause.Occur.MUST);
Query compositeQuery = builder.build();
TopDocs rs = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {});
if (!isRerank || (args.rerankcutoff > 0 && args.rf_qrels == null) || (args.rf_qrels != null && !hasRelDocs)) {
if (args.arbitraryScoreTieBreak) {
// Figure out how to break the scoring ties.
rs = searcher.search(compositeQuery, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits);
} else {
rs = searcher.search(compositeQuery, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits, BREAK_SCORE_TIES_BY_TWEETID, true);
}
}
RerankerContext context = new RerankerContext<>(searcher, qid, keywordQuery, null, queryString, queryTokens, filter, args);
ScoredDocuments scoredFbDocs;
if (isRerank && args.rf_qrels != null) {
if (hasRelDocs) {
scoredFbDocs = queryQrels;
} else {
// if no relevant documents, only perform score based tie breaking next
scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
cascade = new RerankerCascade();
cascade.add(new ScoreTiesAdjusterReranker());
}
} else {
scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
}
return cascade.run(scoredFbDocs, context);
}
use of io.anserini.rerank.RerankerCascade in project Anserini by castorini.
the class SearchCollection method constructRerankers.
private List<RerankerCascade> constructRerankers() throws IOException {
List<RerankerCascade> cascades = new ArrayList<>();
if (args.rm3) {
for (String fbTerms : args.rm3_fbTerms) {
for (String fbDocs : args.rm3_fbDocs) {
for (String originalQueryWeight : args.rm3_originalQueryWeight) {
String tag;
if (this.args.rf_qrels != null) {
tag = String.format("rm3Rf(fbTerms=%s,originalQueryWeight=%s)", fbTerms, originalQueryWeight);
} else {
tag = String.format("rm3(fbTerms=%s,fbDocs=%s,originalQueryWeight=%s)", fbTerms, fbDocs, originalQueryWeight);
}
RerankerCascade cascade = new RerankerCascade(tag);
cascade.add(new Rm3Reranker(analyzer, IndexArgs.CONTENTS, Integer.valueOf(fbTerms), Integer.valueOf(fbDocs), Float.valueOf(originalQueryWeight), args.rm3_outputQuery, !args.rm3_noTermFilter));
cascade.add(new ScoreTiesAdjusterReranker());
cascades.add(cascade);
}
}
}
} else if (args.axiom) {
for (String r : args.axiom_r) {
for (String n : args.axiom_n) {
for (String beta : args.axiom_beta) {
for (String top : args.axiom_top) {
for (String seed : args.axiom_seed) {
String tag;
if (this.args.rf_qrels != null) {
tag = String.format("axRf(seed=%s,n=%s,beta=%s,top=%s)", seed, n, beta, top);
} else {
tag = String.format("ax(seed=%s,r=%s,n=%s,beta=%s,top=%s)", seed, r, n, beta, top);
}
RerankerCascade cascade = new RerankerCascade(tag);
cascade.add(new AxiomReranker(args.index, args.axiom_index, IndexArgs.CONTENTS, args.axiom_deterministic, Integer.valueOf(seed), Integer.valueOf(r), Integer.valueOf(n), Float.valueOf(beta), Integer.valueOf(top), args.axiom_docids, args.axiom_outputQuery, args.searchtweets));
cascade.add(new ScoreTiesAdjusterReranker());
cascades.add(cascade);
}
}
}
}
}
} else if (args.bm25prf) {
for (String fbTerms : args.bm25prf_fbTerms) {
for (String fbDocs : args.bm25prf_fbDocs) {
for (String k1 : args.bm25prf_k1) {
for (String b : args.bm25prf_b) {
for (String newTermWeight : args.bm25prf_newTermWeight) {
String tag;
if (this.args.rf_qrels != null) {
tag = String.format("bm25Rf(fbTerms=%s,k1=%s,b=%s,newTermWeight=%s)", fbTerms, k1, b, newTermWeight);
} else {
tag = String.format("bm25prf(fbTerms=%s,fbDocs=%s,k1=%s,b=%s,newTermWeight=%s)", fbTerms, fbDocs, k1, b, newTermWeight);
}
RerankerCascade cascade = new RerankerCascade(tag);
cascade.add(new BM25PrfReranker(analyzer, IndexArgs.CONTENTS, Integer.valueOf(fbTerms), Integer.valueOf(fbDocs), Float.valueOf(k1), Float.valueOf(b), Float.valueOf(newTermWeight), args.bm25prf_outputQuery));
cascade.add(new ScoreTiesAdjusterReranker());
cascades.add(cascade);
}
}
}
}
}
} else {
RerankerCascade cascade = new RerankerCascade();
cascade.add(new ScoreTiesAdjusterReranker());
cascades.add(cascade);
}
return cascades;
}
use of io.anserini.rerank.RerankerCascade in project Anserini by castorini.
the class SearchCollection method search.
public <K> ScoredDocuments search(IndexSearcher searcher, K qid, String queryString, RerankerCascade cascade, ScoredDocuments queryQrels, boolean hasRelDocs) throws IOException {
Query query = null;
if (args.sdm) {
query = new SdmQueryGenerator(args.sdm_tw, args.sdm_ow, args.sdm_uw).buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
} else {
QueryGenerator generator;
try {
generator = (QueryGenerator) Class.forName("io.anserini.search.query." + args.queryGenerator).getConstructor().newInstance();
} catch (Exception e) {
e.printStackTrace();
throw new IllegalArgumentException("Unable to load QueryGenerator: " + args.topicReader);
}
query = generator.buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
}
TopDocs rs = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {});
if (!isRerank || (args.rerankcutoff > 0 && args.rf_qrels == null) || (args.rf_qrels != null && !hasRelDocs)) {
if (args.arbitraryScoreTieBreak) {
// Figure out how to break the scoring ties.
rs = searcher.search(query, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits);
} else {
rs = searcher.search(query, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits, BREAK_SCORE_TIES_BY_DOCID, true);
}
}
List<String> queryTokens = AnalyzerUtils.analyze(analyzer, queryString);
queries.put(qid.toString(), queryTokens);
RerankerContext context = new RerankerContext<>(searcher, qid, query, null, queryString, queryTokens, null, args);
ScoredDocuments scoredFbDocs;
if (isRerank && args.rf_qrels != null) {
if (hasRelDocs) {
scoredFbDocs = queryQrels;
} else {
// if no relevant documents, only perform score based tie breaking next
LOG.info("No relevant documents for " + qid.toString());
scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
cascade = new RerankerCascade();
cascade.add(new ScoreTiesAdjusterReranker());
}
} else {
scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
}
return cascade.run(scoredFbDocs, context);
}
use of io.anserini.rerank.RerankerCascade in project Anserini by castorini.
the class SimpleSearcher method setRM3.
/**
* Enables RM3 query expansion with default parameters.
*
* @param fbTerms number of expansion terms
* @param fbDocs number of expansion documents
* @param originalQueryWeight weight to assign to the original query
* @param outputQuery flag to print original and expanded queries
* @param filterTerms whether to filter terms to be English only
*/
public void setRM3(int fbTerms, int fbDocs, float originalQueryWeight, boolean outputQuery, boolean filterTerms) {
useRM3 = true;
cascade = new RerankerCascade("rm3");
cascade.add(new Rm3Reranker(this.analyzer, IndexArgs.CONTENTS, fbTerms, fbDocs, originalQueryWeight, outputQuery, filterTerms));
cascade.add(new ScoreTiesAdjusterReranker());
}
use of io.anserini.rerank.RerankerCascade in project Anserini by castorini.
the class SimpleSearcher method unsetRM3.
/**
* Disables RM3 query expansion.
*/
public void unsetRM3() {
this.useRM3 = false;
cascade = new RerankerCascade();
cascade.add(new ScoreTiesAdjusterReranker());
}
Aggregations