use of io.anserini.rerank.lib.ScoreTiesAdjusterReranker in project Anserini by castorini.
the class SearchElastic method searchTweets.
public <K> ScoredDocuments searchTweets(String queryString, long t) {
SearchHits results = null;
String specials = "+-=&|><!(){}[]^\"~*?:\\/";
for (int i = 0; i < specials.length(); i++) {
char c = specials.charAt(i);
queryString = queryString.replace(String.valueOf(c), " ");
}
// Do not consider the tweets with tweet ids that are beyond the queryTweetTime
// <querytweettime> tag contains the timestamp of the query in terms of the
// chronologically nearest tweet id within the corpus
RangeQueryBuilder queryTweetTime = QueryBuilders.rangeQuery(TweetGenerator.TweetField.ID_LONG.name).from(0L).to(t);
QueryStringQueryBuilder queryTerms = QueryBuilders.queryStringQuery(queryString).defaultField("contents").analyzer("english");
BoolQueryBuilder query = QueryBuilders.boolQuery().filter(queryTweetTime).should(queryTerms);
SearchRequest searchRequest = new SearchRequest(args.esIndex);
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
sourceBuilder.query(query);
sourceBuilder.size(args.hits);
sourceBuilder.sort(new ScoreSortBuilder().order(SortOrder.DESC));
sourceBuilder.sort(new FieldSortBuilder(TweetGenerator.TweetField.ID_LONG.name).order(SortOrder.DESC));
searchRequest.source(sourceBuilder);
try {
SearchResponse searchResponse = client.search(searchRequest, COMMON_OPTIONS);
results = searchResponse.getHits();
} catch (Exception e) {
LOG.error("Exception during ES query: ", e);
}
ScoreTiesAdjusterReranker reranker = new ScoreTiesAdjusterReranker();
return reranker.rerank(ScoredDocuments.fromESDocs(results), null);
}
use of io.anserini.rerank.lib.ScoreTiesAdjusterReranker in project Anserini by castorini.
the class SearchSolr method searchTweets.
public <K> ScoredDocuments searchTweets(String queryString, long t) {
SolrDocumentList results = null;
SolrQuery solrq = new SolrQuery();
solrq.set("df", "contents");
solrq.set("fl", "* score");
// Remove double quotes in query since they are special syntax in Solr query parser
solrq.setQuery(queryString.replace("\"", ""));
solrq.setRows(args.hits);
solrq.setSort(SortClause.desc("score"));
solrq.addSort(SortClause.desc(TweetGenerator.TweetField.ID_LONG.name));
// Do not consider the tweets with tweet ids that are beyond the queryTweetTime
// <querytweettime> tag contains the timestamp of the query in terms of the
// chronologically nearest tweet id within the corpus
Query filter = LongPoint.newRangeQuery(TweetGenerator.TweetField.ID_LONG.name, 0L, t);
solrq.set("fq", filter.toString());
try {
QueryResponse response = client.query(args.solrIndex, solrq);
results = response.getResults();
} catch (Exception e) {
LOG.error("Exception during Solr query: ", e);
}
ScoreTiesAdjusterReranker reranker = new ScoreTiesAdjusterReranker();
return reranker.rerank(ScoredDocuments.fromSolrDocs(results), null);
}
use of io.anserini.rerank.lib.ScoreTiesAdjusterReranker in project Anserini by castorini.
the class SearchSolr method search.
public <K> ScoredDocuments search(String queryString) {
SolrDocumentList results = null;
SolrQuery solrq = new SolrQuery();
solrq.set("df", "contents");
solrq.set("fl", "* score");
// Remove some characters in query which are special syntax in Solr query parser
solrq.setQuery(queryString.replaceAll("[+=&|<>!(){}~*?:/\"\\^\\-\\[\\]\\\\]", " "));
solrq.setRows(args.hits);
solrq.setSort(SortClause.desc("score"));
solrq.addSort(SortClause.asc(IndexArgs.ID));
try {
QueryResponse response = client.query(args.solrIndex, solrq);
results = response.getResults();
} catch (Exception e) {
LOG.error("Exception during Solr query: ", e);
}
ScoreTiesAdjusterReranker reranker = new ScoreTiesAdjusterReranker();
return reranker.rerank(ScoredDocuments.fromSolrDocs(results), null);
}
use of io.anserini.rerank.lib.ScoreTiesAdjusterReranker in project Anserini by castorini.
the class SearchCollection method searchTweets.
public <K> ScoredDocuments searchTweets(IndexSearcher searcher, K qid, String queryString, long t, RerankerCascade cascade, ScoredDocuments queryQrels, boolean hasRelDocs) throws IOException {
Query keywordQuery;
if (args.sdm) {
keywordQuery = new SdmQueryGenerator(args.sdm_tw, args.sdm_ow, args.sdm_uw).buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
} else {
try {
QueryGenerator generator = (QueryGenerator) Class.forName("io.anserini.search.query." + args.queryGenerator).getConstructor().newInstance();
keywordQuery = generator.buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
} catch (Exception e) {
e.printStackTrace();
throw new IllegalArgumentException("Unable to load QueryGenerator: " + args.topicReader);
}
}
List<String> queryTokens = AnalyzerUtils.analyze(analyzer, queryString);
// Do not consider the tweets with tweet ids that are beyond the queryTweetTime
// <querytweettime> tag contains the timestamp of the query in terms of the
// chronologically nearest tweet id within the corpus
Query filter = LongPoint.newRangeQuery(TweetGenerator.TweetField.ID_LONG.name, 0L, t);
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(filter, BooleanClause.Occur.FILTER);
builder.add(keywordQuery, BooleanClause.Occur.MUST);
Query compositeQuery = builder.build();
TopDocs rs = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {});
if (!isRerank || (args.rerankcutoff > 0 && args.rf_qrels == null) || (args.rf_qrels != null && !hasRelDocs)) {
if (args.arbitraryScoreTieBreak) {
// Figure out how to break the scoring ties.
rs = searcher.search(compositeQuery, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits);
} else {
rs = searcher.search(compositeQuery, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits, BREAK_SCORE_TIES_BY_TWEETID, true);
}
}
RerankerContext context = new RerankerContext<>(searcher, qid, keywordQuery, null, queryString, queryTokens, filter, args);
ScoredDocuments scoredFbDocs;
if (isRerank && args.rf_qrels != null) {
if (hasRelDocs) {
scoredFbDocs = queryQrels;
} else {
// if no relevant documents, only perform score based tie breaking next
scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
cascade = new RerankerCascade();
cascade.add(new ScoreTiesAdjusterReranker());
}
} else {
scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
}
return cascade.run(scoredFbDocs, context);
}
use of io.anserini.rerank.lib.ScoreTiesAdjusterReranker in project Anserini by castorini.
the class SearchCollection method constructRerankers.
private List<RerankerCascade> constructRerankers() throws IOException {
List<RerankerCascade> cascades = new ArrayList<>();
if (args.rm3) {
for (String fbTerms : args.rm3_fbTerms) {
for (String fbDocs : args.rm3_fbDocs) {
for (String originalQueryWeight : args.rm3_originalQueryWeight) {
String tag;
if (this.args.rf_qrels != null) {
tag = String.format("rm3Rf(fbTerms=%s,originalQueryWeight=%s)", fbTerms, originalQueryWeight);
} else {
tag = String.format("rm3(fbTerms=%s,fbDocs=%s,originalQueryWeight=%s)", fbTerms, fbDocs, originalQueryWeight);
}
RerankerCascade cascade = new RerankerCascade(tag);
cascade.add(new Rm3Reranker(analyzer, IndexArgs.CONTENTS, Integer.valueOf(fbTerms), Integer.valueOf(fbDocs), Float.valueOf(originalQueryWeight), args.rm3_outputQuery, !args.rm3_noTermFilter));
cascade.add(new ScoreTiesAdjusterReranker());
cascades.add(cascade);
}
}
}
} else if (args.axiom) {
for (String r : args.axiom_r) {
for (String n : args.axiom_n) {
for (String beta : args.axiom_beta) {
for (String top : args.axiom_top) {
for (String seed : args.axiom_seed) {
String tag;
if (this.args.rf_qrels != null) {
tag = String.format("axRf(seed=%s,n=%s,beta=%s,top=%s)", seed, n, beta, top);
} else {
tag = String.format("ax(seed=%s,r=%s,n=%s,beta=%s,top=%s)", seed, r, n, beta, top);
}
RerankerCascade cascade = new RerankerCascade(tag);
cascade.add(new AxiomReranker(args.index, args.axiom_index, IndexArgs.CONTENTS, args.axiom_deterministic, Integer.valueOf(seed), Integer.valueOf(r), Integer.valueOf(n), Float.valueOf(beta), Integer.valueOf(top), args.axiom_docids, args.axiom_outputQuery, args.searchtweets));
cascade.add(new ScoreTiesAdjusterReranker());
cascades.add(cascade);
}
}
}
}
}
} else if (args.bm25prf) {
for (String fbTerms : args.bm25prf_fbTerms) {
for (String fbDocs : args.bm25prf_fbDocs) {
for (String k1 : args.bm25prf_k1) {
for (String b : args.bm25prf_b) {
for (String newTermWeight : args.bm25prf_newTermWeight) {
String tag;
if (this.args.rf_qrels != null) {
tag = String.format("bm25Rf(fbTerms=%s,k1=%s,b=%s,newTermWeight=%s)", fbTerms, k1, b, newTermWeight);
} else {
tag = String.format("bm25prf(fbTerms=%s,fbDocs=%s,k1=%s,b=%s,newTermWeight=%s)", fbTerms, fbDocs, k1, b, newTermWeight);
}
RerankerCascade cascade = new RerankerCascade(tag);
cascade.add(new BM25PrfReranker(analyzer, IndexArgs.CONTENTS, Integer.valueOf(fbTerms), Integer.valueOf(fbDocs), Float.valueOf(k1), Float.valueOf(b), Float.valueOf(newTermWeight), args.bm25prf_outputQuery));
cascade.add(new ScoreTiesAdjusterReranker());
cascades.add(cascade);
}
}
}
}
}
} else {
RerankerCascade cascade = new RerankerCascade();
cascade.add(new ScoreTiesAdjusterReranker());
cascades.add(cascade);
}
return cascades;
}
Aggregations