Search in sources :

Example 1 with ScoreTiesAdjusterReranker

use of io.anserini.rerank.lib.ScoreTiesAdjusterReranker in project Anserini by castorini.

the class SearchElastic method searchTweets.

public <K> ScoredDocuments searchTweets(String queryString, long t) {
    SearchHits results = null;
    String specials = "+-=&|><!(){}[]^\"~*?:\\/";
    for (int i = 0; i < specials.length(); i++) {
        char c = specials.charAt(i);
        queryString = queryString.replace(String.valueOf(c), " ");
    }
    // Do not consider the tweets with tweet ids that are beyond the queryTweetTime
    // <querytweettime> tag contains the timestamp of the query in terms of the
    // chronologically nearest tweet id within the corpus
    RangeQueryBuilder queryTweetTime = QueryBuilders.rangeQuery(TweetGenerator.TweetField.ID_LONG.name).from(0L).to(t);
    QueryStringQueryBuilder queryTerms = QueryBuilders.queryStringQuery(queryString).defaultField("contents").analyzer("english");
    BoolQueryBuilder query = QueryBuilders.boolQuery().filter(queryTweetTime).should(queryTerms);
    SearchRequest searchRequest = new SearchRequest(args.esIndex);
    SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
    sourceBuilder.query(query);
    sourceBuilder.size(args.hits);
    sourceBuilder.sort(new ScoreSortBuilder().order(SortOrder.DESC));
    sourceBuilder.sort(new FieldSortBuilder(TweetGenerator.TweetField.ID_LONG.name).order(SortOrder.DESC));
    searchRequest.source(sourceBuilder);
    try {
        SearchResponse searchResponse = client.search(searchRequest, COMMON_OPTIONS);
        results = searchResponse.getHits();
    } catch (Exception e) {
        LOG.error("Exception during ES query: ", e);
    }
    ScoreTiesAdjusterReranker reranker = new ScoreTiesAdjusterReranker();
    return reranker.rerank(ScoredDocuments.fromESDocs(results), null);
}
Also used : SearchRequest(org.elasticsearch.action.search.SearchRequest) FieldSortBuilder(org.elasticsearch.search.sort.FieldSortBuilder) RangeQueryBuilder(org.elasticsearch.index.query.RangeQueryBuilder) IOException(java.io.IOException) CmdLineException(org.kohsuke.args4j.CmdLineException) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) SearchResponse(org.elasticsearch.action.search.SearchResponse) ScoreSortBuilder(org.elasticsearch.search.sort.ScoreSortBuilder) BoolQueryBuilder(org.elasticsearch.index.query.BoolQueryBuilder) ScoreTiesAdjusterReranker(io.anserini.rerank.lib.ScoreTiesAdjusterReranker) SearchHits(org.elasticsearch.search.SearchHits) QueryStringQueryBuilder(org.elasticsearch.index.query.QueryStringQueryBuilder)

Example 2 with ScoreTiesAdjusterReranker

use of io.anserini.rerank.lib.ScoreTiesAdjusterReranker in project Anserini by castorini.

the class SearchSolr method searchTweets.

public <K> ScoredDocuments searchTweets(String queryString, long t) {
    SolrDocumentList results = null;
    SolrQuery solrq = new SolrQuery();
    solrq.set("df", "contents");
    solrq.set("fl", "* score");
    // Remove double quotes in query since they are special syntax in Solr query parser
    solrq.setQuery(queryString.replace("\"", ""));
    solrq.setRows(args.hits);
    solrq.setSort(SortClause.desc("score"));
    solrq.addSort(SortClause.desc(TweetGenerator.TweetField.ID_LONG.name));
    // Do not consider the tweets with tweet ids that are beyond the queryTweetTime
    // <querytweettime> tag contains the timestamp of the query in terms of the
    // chronologically nearest tweet id within the corpus
    Query filter = LongPoint.newRangeQuery(TweetGenerator.TweetField.ID_LONG.name, 0L, t);
    solrq.set("fq", filter.toString());
    try {
        QueryResponse response = client.query(args.solrIndex, solrq);
        results = response.getResults();
    } catch (Exception e) {
        LOG.error("Exception during Solr query: ", e);
    }
    ScoreTiesAdjusterReranker reranker = new ScoreTiesAdjusterReranker();
    return reranker.rerank(ScoredDocuments.fromSolrDocs(results), null);
}
Also used : Query(org.apache.lucene.search.Query) SolrQuery(org.apache.solr.client.solrj.SolrQuery) QueryResponse(org.apache.solr.client.solrj.response.QueryResponse) ScoreTiesAdjusterReranker(io.anserini.rerank.lib.ScoreTiesAdjusterReranker) SolrDocumentList(org.apache.solr.common.SolrDocumentList) SolrQuery(org.apache.solr.client.solrj.SolrQuery) IOException(java.io.IOException) CmdLineException(org.kohsuke.args4j.CmdLineException)

Example 3 with ScoreTiesAdjusterReranker

use of io.anserini.rerank.lib.ScoreTiesAdjusterReranker in project Anserini by castorini.

the class SearchSolr method search.

public <K> ScoredDocuments search(String queryString) {
    SolrDocumentList results = null;
    SolrQuery solrq = new SolrQuery();
    solrq.set("df", "contents");
    solrq.set("fl", "* score");
    // Remove some characters in query which are special syntax in Solr query parser
    solrq.setQuery(queryString.replaceAll("[+=&|<>!(){}~*?:/\"\\^\\-\\[\\]\\\\]", " "));
    solrq.setRows(args.hits);
    solrq.setSort(SortClause.desc("score"));
    solrq.addSort(SortClause.asc(IndexArgs.ID));
    try {
        QueryResponse response = client.query(args.solrIndex, solrq);
        results = response.getResults();
    } catch (Exception e) {
        LOG.error("Exception during Solr query: ", e);
    }
    ScoreTiesAdjusterReranker reranker = new ScoreTiesAdjusterReranker();
    return reranker.rerank(ScoredDocuments.fromSolrDocs(results), null);
}
Also used : QueryResponse(org.apache.solr.client.solrj.response.QueryResponse) ScoreTiesAdjusterReranker(io.anserini.rerank.lib.ScoreTiesAdjusterReranker) SolrDocumentList(org.apache.solr.common.SolrDocumentList) SolrQuery(org.apache.solr.client.solrj.SolrQuery) IOException(java.io.IOException) CmdLineException(org.kohsuke.args4j.CmdLineException)

Example 4 with ScoreTiesAdjusterReranker

use of io.anserini.rerank.lib.ScoreTiesAdjusterReranker in project Anserini by castorini.

the class SearchCollection method searchTweets.

public <K> ScoredDocuments searchTweets(IndexSearcher searcher, K qid, String queryString, long t, RerankerCascade cascade, ScoredDocuments queryQrels, boolean hasRelDocs) throws IOException {
    Query keywordQuery;
    if (args.sdm) {
        keywordQuery = new SdmQueryGenerator(args.sdm_tw, args.sdm_ow, args.sdm_uw).buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
    } else {
        try {
            QueryGenerator generator = (QueryGenerator) Class.forName("io.anserini.search.query." + args.queryGenerator).getConstructor().newInstance();
            keywordQuery = generator.buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
        } catch (Exception e) {
            e.printStackTrace();
            throw new IllegalArgumentException("Unable to load QueryGenerator: " + args.topicReader);
        }
    }
    List<String> queryTokens = AnalyzerUtils.analyze(analyzer, queryString);
    // Do not consider the tweets with tweet ids that are beyond the queryTweetTime
    // <querytweettime> tag contains the timestamp of the query in terms of the
    // chronologically nearest tweet id within the corpus
    Query filter = LongPoint.newRangeQuery(TweetGenerator.TweetField.ID_LONG.name, 0L, t);
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(filter, BooleanClause.Occur.FILTER);
    builder.add(keywordQuery, BooleanClause.Occur.MUST);
    Query compositeQuery = builder.build();
    TopDocs rs = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {});
    if (!isRerank || (args.rerankcutoff > 0 && args.rf_qrels == null) || (args.rf_qrels != null && !hasRelDocs)) {
        if (args.arbitraryScoreTieBreak) {
            // Figure out how to break the scoring ties.
            rs = searcher.search(compositeQuery, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits);
        } else {
            rs = searcher.search(compositeQuery, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits, BREAK_SCORE_TIES_BY_TWEETID, true);
        }
    }
    RerankerContext context = new RerankerContext<>(searcher, qid, keywordQuery, null, queryString, queryTokens, filter, args);
    ScoredDocuments scoredFbDocs;
    if (isRerank && args.rf_qrels != null) {
        if (hasRelDocs) {
            scoredFbDocs = queryQrels;
        } else {
            // if no relevant documents, only perform score based tie breaking next
            scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
            cascade = new RerankerCascade();
            cascade.add(new ScoreTiesAdjusterReranker());
        }
    } else {
        scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
    }
    return cascade.run(scoredFbDocs, context);
}
Also used : TotalHits(org.apache.lucene.search.TotalHits) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) TermInSetQuery(org.apache.lucene.search.TermInSetQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) ScoredDocuments(io.anserini.rerank.ScoredDocuments) QueryNodeException(org.apache.lucene.queryparser.flexible.core.QueryNodeException) IOException(java.io.IOException) CompletionException(java.util.concurrent.CompletionException) CmdLineException(org.kohsuke.args4j.CmdLineException) AtomicMoveNotSupportedException(java.nio.file.AtomicMoveNotSupportedException) TopDocs(org.apache.lucene.search.TopDocs) RerankerCascade(io.anserini.rerank.RerankerCascade) QueryGenerator(io.anserini.search.query.QueryGenerator) SdmQueryGenerator(io.anserini.search.query.SdmQueryGenerator) ScoreTiesAdjusterReranker(io.anserini.rerank.lib.ScoreTiesAdjusterReranker) SdmQueryGenerator(io.anserini.search.query.SdmQueryGenerator) RerankerContext(io.anserini.rerank.RerankerContext)

Example 5 with ScoreTiesAdjusterReranker

use of io.anserini.rerank.lib.ScoreTiesAdjusterReranker in project Anserini by castorini.

the class SearchCollection method constructRerankers.

private List<RerankerCascade> constructRerankers() throws IOException {
    List<RerankerCascade> cascades = new ArrayList<>();
    if (args.rm3) {
        for (String fbTerms : args.rm3_fbTerms) {
            for (String fbDocs : args.rm3_fbDocs) {
                for (String originalQueryWeight : args.rm3_originalQueryWeight) {
                    String tag;
                    if (this.args.rf_qrels != null) {
                        tag = String.format("rm3Rf(fbTerms=%s,originalQueryWeight=%s)", fbTerms, originalQueryWeight);
                    } else {
                        tag = String.format("rm3(fbTerms=%s,fbDocs=%s,originalQueryWeight=%s)", fbTerms, fbDocs, originalQueryWeight);
                    }
                    RerankerCascade cascade = new RerankerCascade(tag);
                    cascade.add(new Rm3Reranker(analyzer, IndexArgs.CONTENTS, Integer.valueOf(fbTerms), Integer.valueOf(fbDocs), Float.valueOf(originalQueryWeight), args.rm3_outputQuery, !args.rm3_noTermFilter));
                    cascade.add(new ScoreTiesAdjusterReranker());
                    cascades.add(cascade);
                }
            }
        }
    } else if (args.axiom) {
        for (String r : args.axiom_r) {
            for (String n : args.axiom_n) {
                for (String beta : args.axiom_beta) {
                    for (String top : args.axiom_top) {
                        for (String seed : args.axiom_seed) {
                            String tag;
                            if (this.args.rf_qrels != null) {
                                tag = String.format("axRf(seed=%s,n=%s,beta=%s,top=%s)", seed, n, beta, top);
                            } else {
                                tag = String.format("ax(seed=%s,r=%s,n=%s,beta=%s,top=%s)", seed, r, n, beta, top);
                            }
                            RerankerCascade cascade = new RerankerCascade(tag);
                            cascade.add(new AxiomReranker(args.index, args.axiom_index, IndexArgs.CONTENTS, args.axiom_deterministic, Integer.valueOf(seed), Integer.valueOf(r), Integer.valueOf(n), Float.valueOf(beta), Integer.valueOf(top), args.axiom_docids, args.axiom_outputQuery, args.searchtweets));
                            cascade.add(new ScoreTiesAdjusterReranker());
                            cascades.add(cascade);
                        }
                    }
                }
            }
        }
    } else if (args.bm25prf) {
        for (String fbTerms : args.bm25prf_fbTerms) {
            for (String fbDocs : args.bm25prf_fbDocs) {
                for (String k1 : args.bm25prf_k1) {
                    for (String b : args.bm25prf_b) {
                        for (String newTermWeight : args.bm25prf_newTermWeight) {
                            String tag;
                            if (this.args.rf_qrels != null) {
                                tag = String.format("bm25Rf(fbTerms=%s,k1=%s,b=%s,newTermWeight=%s)", fbTerms, k1, b, newTermWeight);
                            } else {
                                tag = String.format("bm25prf(fbTerms=%s,fbDocs=%s,k1=%s,b=%s,newTermWeight=%s)", fbTerms, fbDocs, k1, b, newTermWeight);
                            }
                            RerankerCascade cascade = new RerankerCascade(tag);
                            cascade.add(new BM25PrfReranker(analyzer, IndexArgs.CONTENTS, Integer.valueOf(fbTerms), Integer.valueOf(fbDocs), Float.valueOf(k1), Float.valueOf(b), Float.valueOf(newTermWeight), args.bm25prf_outputQuery));
                            cascade.add(new ScoreTiesAdjusterReranker());
                            cascades.add(cascade);
                        }
                    }
                }
            }
        }
    } else {
        RerankerCascade cascade = new RerankerCascade();
        cascade.add(new ScoreTiesAdjusterReranker());
        cascades.add(cascade);
    }
    return cascades;
}
Also used : RerankerCascade(io.anserini.rerank.RerankerCascade) Rm3Reranker(io.anserini.rerank.lib.Rm3Reranker) AxiomReranker(io.anserini.rerank.lib.AxiomReranker) ArrayList(java.util.ArrayList) ScoreTiesAdjusterReranker(io.anserini.rerank.lib.ScoreTiesAdjusterReranker) BM25PrfReranker(io.anserini.rerank.lib.BM25PrfReranker)

Aggregations

ScoreTiesAdjusterReranker (io.anserini.rerank.lib.ScoreTiesAdjusterReranker)9 IOException (java.io.IOException)6 CmdLineException (org.kohsuke.args4j.CmdLineException)6 RerankerCascade (io.anserini.rerank.RerankerCascade)5 Query (org.apache.lucene.search.Query)3 RerankerContext (io.anserini.rerank.RerankerContext)2 ScoredDocuments (io.anserini.rerank.ScoredDocuments)2 Rm3Reranker (io.anserini.rerank.lib.Rm3Reranker)2 QueryGenerator (io.anserini.search.query.QueryGenerator)2 SdmQueryGenerator (io.anserini.search.query.SdmQueryGenerator)2 AtomicMoveNotSupportedException (java.nio.file.AtomicMoveNotSupportedException)2 CompletionException (java.util.concurrent.CompletionException)2 QueryNodeException (org.apache.lucene.queryparser.flexible.core.QueryNodeException)2 BooleanQuery (org.apache.lucene.search.BooleanQuery)2 TermInSetQuery (org.apache.lucene.search.TermInSetQuery)2 TopDocs (org.apache.lucene.search.TopDocs)2 TotalHits (org.apache.lucene.search.TotalHits)2 SolrQuery (org.apache.solr.client.solrj.SolrQuery)2 QueryResponse (org.apache.solr.client.solrj.response.QueryResponse)2 SolrDocumentList (org.apache.solr.common.SolrDocumentList)2