use of io.anserini.rerank.RerankerCascade in project Anserini by castorini.
the class SearchCollection method search.
public <K> ScoredDocuments search(IndexSearcher searcher, K qid, String queryString, RerankerCascade cascade, ScoredDocuments queryQrels, boolean hasRelDocs) throws IOException {
Query query = null;
if (args.sdm) {
query = new SdmQueryGenerator(args.sdm_tw, args.sdm_ow, args.sdm_uw).buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
} else {
QueryGenerator generator;
try {
generator = (QueryGenerator) Class.forName("io.anserini.search.query." + args.queryGenerator).getConstructor().newInstance();
} catch (Exception e) {
e.printStackTrace();
throw new IllegalArgumentException("Unable to load QueryGenerator: " + args.topicReader);
}
query = generator.buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
}
TopDocs rs = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {});
if (!isRerank || (args.rerankcutoff > 0 && args.rf_qrels == null) || (args.rf_qrels != null && !hasRelDocs)) {
if (args.arbitraryScoreTieBreak) {
// Figure out how to break the scoring ties.
rs = searcher.search(query, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits);
} else {
rs = searcher.search(query, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits, BREAK_SCORE_TIES_BY_DOCID, true);
}
}
List<String> queryTokens = AnalyzerUtils.analyze(analyzer, queryString);
queries.put(qid.toString(), queryTokens);
RerankerContext context = new RerankerContext<>(searcher, qid, query, null, queryString, queryTokens, null, args);
ScoredDocuments scoredFbDocs;
if (isRerank && args.rf_qrels != null) {
if (hasRelDocs) {
scoredFbDocs = queryQrels;
} else {
// if no relevant documents, only perform score based tie breaking next
LOG.info("No relevant documents for " + qid.toString());
scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
cascade = new RerankerCascade();
cascade.add(new ScoreTiesAdjusterReranker());
}
} else {
scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
}
return cascade.run(scoredFbDocs, context);
}
use of io.anserini.rerank.RerankerCascade in project Anserini by castorini.
the class SimpleSearcher method setRM3.
/**
* Enables RM3 query expansion with default parameters.
*
* @param fbTerms number of expansion terms
* @param fbDocs number of expansion documents
* @param originalQueryWeight weight to assign to the original query
* @param outputQuery flag to print original and expanded queries
* @param filterTerms whether to filter terms to be English only
*/
public void setRM3(int fbTerms, int fbDocs, float originalQueryWeight, boolean outputQuery, boolean filterTerms) {
useRM3 = true;
cascade = new RerankerCascade("rm3");
cascade.add(new Rm3Reranker(this.analyzer, IndexArgs.CONTENTS, fbTerms, fbDocs, originalQueryWeight, outputQuery, filterTerms));
cascade.add(new ScoreTiesAdjusterReranker());
}
use of io.anserini.rerank.RerankerCascade in project Anserini by castorini.
the class SimpleSearcher method unsetRM3.
/**
* Disables RM3 query expansion.
*/
public void unsetRM3() {
this.useRM3 = false;
cascade = new RerankerCascade();
cascade.add(new ScoreTiesAdjusterReranker());
}
use of io.anserini.rerank.RerankerCascade in project Anserini by castorini.
the class DumpTweetsLtrData method main.
public static void main(String[] argv) throws Exception {
long curTime = System.nanoTime();
LtrArgs args = new LtrArgs();
CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(90));
try {
parser.parseArgument(argv);
} catch (CmdLineException e) {
System.err.println(e.getMessage());
parser.printUsage(System.err);
System.err.println("Example: DumpTweetsLtrData" + parser.printExample(OptionHandlerFilter.REQUIRED));
return;
}
LOG.info("Reading index at " + args.index);
Directory dir = FSDirectory.open(Paths.get(args.index));
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = new IndexSearcher(reader);
if (args.ql) {
LOG.info("Using QL scoring model");
searcher.setSimilarity(new LMDirichletSimilarity(args.mu));
} else if (args.bm25) {
LOG.info("Using BM25 scoring model");
searcher.setSimilarity(new BM25Similarity(args.k1, args.b));
} else {
LOG.error("Error: Must specify scoring model!");
System.exit(-1);
}
Qrels qrels = new Qrels(args.qrels);
FeatureExtractors extractors = null;
if (args.extractors != null) {
extractors = FeatureExtractors.loadExtractor(args.extractors);
}
PrintStream out = new PrintStream(new FileOutputStream(new File(args.output)));
RerankerCascade cascade = new RerankerCascade();
cascade.add(new RemoveRetweetsTemporalTiebreakReranker());
cascade.add(new TweetsLtrDataGenerator(out, qrels, extractors));
MicroblogTopicSet topics = MicroblogTopicSet.fromFile(new File(args.topics));
LOG.info("Initialized complete! (elapsed time = " + (System.nanoTime() - curTime) / 1000000 + "ms)");
long totalTime = 0;
int cnt = 0;
for (MicroblogTopic topic : topics) {
long curQueryTime = System.nanoTime();
Query filter = LongPoint.newRangeQuery(StatusField.ID.name, 0L, topic.getQueryTweetTime());
Query query = AnalyzerUtils.buildBagOfWordsQuery(StatusField.TEXT.name, IndexTweets.ANALYZER, topic.getQuery());
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(filter, BooleanClause.Occur.FILTER);
builder.add(query, BooleanClause.Occur.MUST);
Query q = builder.build();
TopDocs rs = searcher.search(q, args.hits);
List<String> queryTokens = AnalyzerUtils.tokenize(IndexTweets.ANALYZER, topic.getQuery());
RerankerContext context = new RerankerContext(searcher, query, topic.getId(), topic.getQuery(), queryTokens, StatusField.TEXT.name, filter);
cascade.run(ScoredDocuments.fromTopDocs(rs, searcher), context);
long qtime = (System.nanoTime() - curQueryTime) / 1000000;
LOG.info("Query " + topic.getId() + " (elapsed time = " + qtime + "ms)");
totalTime += qtime;
cnt++;
}
LOG.info("All queries completed!");
LOG.info("Total elapsed time = " + totalTime + "ms");
LOG.info("Average query latency = " + (totalTime / cnt) + "ms");
reader.close();
out.close();
}
use of io.anserini.rerank.RerankerCascade in project anserini by castorini.
the class SearchCollection method search.
public <K> ScoredDocuments search(IndexSearcher searcher, K qid, String queryString, RerankerCascade cascade, ScoredDocuments queryQrels, boolean hasRelDocs) throws IOException {
Query query = null;
if (args.sdm) {
query = new SdmQueryGenerator(args.sdm_tw, args.sdm_ow, args.sdm_uw).buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
} else {
QueryGenerator generator;
try {
generator = (QueryGenerator) Class.forName("io.anserini.search.query." + args.queryGenerator).getConstructor().newInstance();
} catch (Exception e) {
e.printStackTrace();
throw new IllegalArgumentException("Unable to load QueryGenerator: " + args.topicReader);
}
query = generator.buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
}
TopDocs rs = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {});
if (!isRerank || (args.rerankcutoff > 0 && args.rf_qrels == null) || (args.rf_qrels != null && !hasRelDocs)) {
if (args.arbitraryScoreTieBreak) {
// Figure out how to break the scoring ties.
rs = searcher.search(query, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits);
} else {
rs = searcher.search(query, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits, BREAK_SCORE_TIES_BY_DOCID, true);
}
}
List<String> queryTokens = AnalyzerUtils.analyze(analyzer, queryString);
queries.put(qid.toString(), queryTokens);
RerankerContext context = new RerankerContext<>(searcher, qid, query, null, queryString, queryTokens, null, args);
ScoredDocuments scoredFbDocs;
if (isRerank && args.rf_qrels != null) {
if (hasRelDocs) {
scoredFbDocs = queryQrels;
} else {
// if no relevant documents, only perform score based tie breaking next
LOG.info("No relevant documents for " + qid.toString());
scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
cascade = new RerankerCascade();
cascade.add(new ScoreTiesAdjusterReranker());
}
} else {
scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
}
return cascade.run(scoredFbDocs, context);
}
Aggregations