Search in sources :

Example 6 with RerankerCascade

use of io.anserini.rerank.RerankerCascade in project Anserini by castorini.

the class SearchCollection method searchTweets.

public <K> ScoredDocuments searchTweets(IndexSearcher searcher, K qid, String queryString, long t, RerankerCascade cascade, ScoredDocuments queryQrels, boolean hasRelDocs) throws IOException {
    Query keywordQuery;
    if (args.sdm) {
        keywordQuery = new SdmQueryGenerator(args.sdm_tw, args.sdm_ow, args.sdm_uw).buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
    } else {
        try {
            QueryGenerator generator = (QueryGenerator) Class.forName("io.anserini.search.query." + args.queryGenerator).getConstructor().newInstance();
            keywordQuery = generator.buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
        } catch (Exception e) {
            e.printStackTrace();
            throw new IllegalArgumentException("Unable to load QueryGenerator: " + args.topicReader);
        }
    }
    List<String> queryTokens = AnalyzerUtils.analyze(analyzer, queryString);
    // Do not consider the tweets with tweet ids that are beyond the queryTweetTime
    // <querytweettime> tag contains the timestamp of the query in terms of the
    // chronologically nearest tweet id within the corpus
    Query filter = LongPoint.newRangeQuery(TweetGenerator.TweetField.ID_LONG.name, 0L, t);
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(filter, BooleanClause.Occur.FILTER);
    builder.add(keywordQuery, BooleanClause.Occur.MUST);
    Query compositeQuery = builder.build();
    TopDocs rs = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {});
    if (!isRerank || (args.rerankcutoff > 0 && args.rf_qrels == null) || (args.rf_qrels != null && !hasRelDocs)) {
        if (args.arbitraryScoreTieBreak) {
            // Figure out how to break the scoring ties.
            rs = searcher.search(compositeQuery, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits);
        } else {
            rs = searcher.search(compositeQuery, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits, BREAK_SCORE_TIES_BY_TWEETID, true);
        }
    }
    RerankerContext context = new RerankerContext<>(searcher, qid, keywordQuery, null, queryString, queryTokens, filter, args);
    ScoredDocuments scoredFbDocs;
    if (isRerank && args.rf_qrels != null) {
        if (hasRelDocs) {
            scoredFbDocs = queryQrels;
        } else {
            // if no relevant documents, only perform score based tie breaking next
            scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
            cascade = new RerankerCascade();
            cascade.add(new ScoreTiesAdjusterReranker());
        }
    } else {
        scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
    }
    return cascade.run(scoredFbDocs, context);
}
Also used : TotalHits(org.apache.lucene.search.TotalHits) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) TermInSetQuery(org.apache.lucene.search.TermInSetQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) ScoredDocuments(io.anserini.rerank.ScoredDocuments) QueryNodeException(org.apache.lucene.queryparser.flexible.core.QueryNodeException) IOException(java.io.IOException) CompletionException(java.util.concurrent.CompletionException) CmdLineException(org.kohsuke.args4j.CmdLineException) AtomicMoveNotSupportedException(java.nio.file.AtomicMoveNotSupportedException) TopDocs(org.apache.lucene.search.TopDocs) RerankerCascade(io.anserini.rerank.RerankerCascade) QueryGenerator(io.anserini.search.query.QueryGenerator) SdmQueryGenerator(io.anserini.search.query.SdmQueryGenerator) ScoreTiesAdjusterReranker(io.anserini.rerank.lib.ScoreTiesAdjusterReranker) SdmQueryGenerator(io.anserini.search.query.SdmQueryGenerator) RerankerContext(io.anserini.rerank.RerankerContext)

Example 7 with RerankerCascade

use of io.anserini.rerank.RerankerCascade in project Anserini by castorini.

the class SearchCollection method constructRerankers.

private List<RerankerCascade> constructRerankers() throws IOException {
    List<RerankerCascade> cascades = new ArrayList<>();
    if (args.rm3) {
        for (String fbTerms : args.rm3_fbTerms) {
            for (String fbDocs : args.rm3_fbDocs) {
                for (String originalQueryWeight : args.rm3_originalQueryWeight) {
                    String tag;
                    if (this.args.rf_qrels != null) {
                        tag = String.format("rm3Rf(fbTerms=%s,originalQueryWeight=%s)", fbTerms, originalQueryWeight);
                    } else {
                        tag = String.format("rm3(fbTerms=%s,fbDocs=%s,originalQueryWeight=%s)", fbTerms, fbDocs, originalQueryWeight);
                    }
                    RerankerCascade cascade = new RerankerCascade(tag);
                    cascade.add(new Rm3Reranker(analyzer, IndexArgs.CONTENTS, Integer.valueOf(fbTerms), Integer.valueOf(fbDocs), Float.valueOf(originalQueryWeight), args.rm3_outputQuery, !args.rm3_noTermFilter));
                    cascade.add(new ScoreTiesAdjusterReranker());
                    cascades.add(cascade);
                }
            }
        }
    } else if (args.axiom) {
        for (String r : args.axiom_r) {
            for (String n : args.axiom_n) {
                for (String beta : args.axiom_beta) {
                    for (String top : args.axiom_top) {
                        for (String seed : args.axiom_seed) {
                            String tag;
                            if (this.args.rf_qrels != null) {
                                tag = String.format("axRf(seed=%s,n=%s,beta=%s,top=%s)", seed, n, beta, top);
                            } else {
                                tag = String.format("ax(seed=%s,r=%s,n=%s,beta=%s,top=%s)", seed, r, n, beta, top);
                            }
                            RerankerCascade cascade = new RerankerCascade(tag);
                            cascade.add(new AxiomReranker(args.index, args.axiom_index, IndexArgs.CONTENTS, args.axiom_deterministic, Integer.valueOf(seed), Integer.valueOf(r), Integer.valueOf(n), Float.valueOf(beta), Integer.valueOf(top), args.axiom_docids, args.axiom_outputQuery, args.searchtweets));
                            cascade.add(new ScoreTiesAdjusterReranker());
                            cascades.add(cascade);
                        }
                    }
                }
            }
        }
    } else if (args.bm25prf) {
        for (String fbTerms : args.bm25prf_fbTerms) {
            for (String fbDocs : args.bm25prf_fbDocs) {
                for (String k1 : args.bm25prf_k1) {
                    for (String b : args.bm25prf_b) {
                        for (String newTermWeight : args.bm25prf_newTermWeight) {
                            String tag;
                            if (this.args.rf_qrels != null) {
                                tag = String.format("bm25Rf(fbTerms=%s,k1=%s,b=%s,newTermWeight=%s)", fbTerms, k1, b, newTermWeight);
                            } else {
                                tag = String.format("bm25prf(fbTerms=%s,fbDocs=%s,k1=%s,b=%s,newTermWeight=%s)", fbTerms, fbDocs, k1, b, newTermWeight);
                            }
                            RerankerCascade cascade = new RerankerCascade(tag);
                            cascade.add(new BM25PrfReranker(analyzer, IndexArgs.CONTENTS, Integer.valueOf(fbTerms), Integer.valueOf(fbDocs), Float.valueOf(k1), Float.valueOf(b), Float.valueOf(newTermWeight), args.bm25prf_outputQuery));
                            cascade.add(new ScoreTiesAdjusterReranker());
                            cascades.add(cascade);
                        }
                    }
                }
            }
        }
    } else {
        RerankerCascade cascade = new RerankerCascade();
        cascade.add(new ScoreTiesAdjusterReranker());
        cascades.add(cascade);
    }
    return cascades;
}
Also used : RerankerCascade(io.anserini.rerank.RerankerCascade) Rm3Reranker(io.anserini.rerank.lib.Rm3Reranker) AxiomReranker(io.anserini.rerank.lib.AxiomReranker) ArrayList(java.util.ArrayList) ScoreTiesAdjusterReranker(io.anserini.rerank.lib.ScoreTiesAdjusterReranker) BM25PrfReranker(io.anserini.rerank.lib.BM25PrfReranker)

Example 8 with RerankerCascade

use of io.anserini.rerank.RerankerCascade in project Anserini by castorini.

the class SearchCollection method search.

public <K> ScoredDocuments search(IndexSearcher searcher, K qid, String queryString, RerankerCascade cascade, ScoredDocuments queryQrels, boolean hasRelDocs) throws IOException {
    Query query = null;
    if (args.sdm) {
        query = new SdmQueryGenerator(args.sdm_tw, args.sdm_ow, args.sdm_uw).buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
    } else {
        QueryGenerator generator;
        try {
            generator = (QueryGenerator) Class.forName("io.anserini.search.query." + args.queryGenerator).getConstructor().newInstance();
        } catch (Exception e) {
            e.printStackTrace();
            throw new IllegalArgumentException("Unable to load QueryGenerator: " + args.topicReader);
        }
        query = generator.buildQuery(IndexArgs.CONTENTS, analyzer, queryString);
    }
    TopDocs rs = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {});
    if (!isRerank || (args.rerankcutoff > 0 && args.rf_qrels == null) || (args.rf_qrels != null && !hasRelDocs)) {
        if (args.arbitraryScoreTieBreak) {
            // Figure out how to break the scoring ties.
            rs = searcher.search(query, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits);
        } else {
            rs = searcher.search(query, (isRerank && args.rf_qrels == null) ? args.rerankcutoff : args.hits, BREAK_SCORE_TIES_BY_DOCID, true);
        }
    }
    List<String> queryTokens = AnalyzerUtils.analyze(analyzer, queryString);
    queries.put(qid.toString(), queryTokens);
    RerankerContext context = new RerankerContext<>(searcher, qid, query, null, queryString, queryTokens, null, args);
    ScoredDocuments scoredFbDocs;
    if (isRerank && args.rf_qrels != null) {
        if (hasRelDocs) {
            scoredFbDocs = queryQrels;
        } else {
            // if no relevant documents, only perform score based tie breaking next
            LOG.info("No relevant documents for " + qid.toString());
            scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
            cascade = new RerankerCascade();
            cascade.add(new ScoreTiesAdjusterReranker());
        }
    } else {
        scoredFbDocs = ScoredDocuments.fromTopDocs(rs, searcher);
    }
    return cascade.run(scoredFbDocs, context);
}
Also used : TotalHits(org.apache.lucene.search.TotalHits) Query(org.apache.lucene.search.Query) TermInSetQuery(org.apache.lucene.search.TermInSetQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) ScoredDocuments(io.anserini.rerank.ScoredDocuments) QueryNodeException(org.apache.lucene.queryparser.flexible.core.QueryNodeException) IOException(java.io.IOException) CompletionException(java.util.concurrent.CompletionException) CmdLineException(org.kohsuke.args4j.CmdLineException) AtomicMoveNotSupportedException(java.nio.file.AtomicMoveNotSupportedException) TopDocs(org.apache.lucene.search.TopDocs) RerankerCascade(io.anserini.rerank.RerankerCascade) QueryGenerator(io.anserini.search.query.QueryGenerator) SdmQueryGenerator(io.anserini.search.query.SdmQueryGenerator) ScoreTiesAdjusterReranker(io.anserini.rerank.lib.ScoreTiesAdjusterReranker) SdmQueryGenerator(io.anserini.search.query.SdmQueryGenerator) RerankerContext(io.anserini.rerank.RerankerContext)

Example 9 with RerankerCascade

use of io.anserini.rerank.RerankerCascade in project Anserini by castorini.

the class SimpleSearcher method setRM3.

/**
 * Enables RM3 query expansion with default parameters.
 *
 * @param fbTerms number of expansion terms
 * @param fbDocs number of expansion documents
 * @param originalQueryWeight weight to assign to the original query
 * @param outputQuery flag to print original and expanded queries
 * @param filterTerms whether to filter terms to be English only
 */
public void setRM3(int fbTerms, int fbDocs, float originalQueryWeight, boolean outputQuery, boolean filterTerms) {
    useRM3 = true;
    cascade = new RerankerCascade("rm3");
    cascade.add(new Rm3Reranker(this.analyzer, IndexArgs.CONTENTS, fbTerms, fbDocs, originalQueryWeight, outputQuery, filterTerms));
    cascade.add(new ScoreTiesAdjusterReranker());
}
Also used : RerankerCascade(io.anserini.rerank.RerankerCascade) Rm3Reranker(io.anserini.rerank.lib.Rm3Reranker) ScoreTiesAdjusterReranker(io.anserini.rerank.lib.ScoreTiesAdjusterReranker)

Example 10 with RerankerCascade

use of io.anserini.rerank.RerankerCascade in project Anserini by castorini.

the class SimpleSearcher method unsetRM3.

/**
 * Disables RM3 query expansion.
 */
public void unsetRM3() {
    this.useRM3 = false;
    cascade = new RerankerCascade();
    cascade.add(new ScoreTiesAdjusterReranker());
}
Also used : RerankerCascade(io.anserini.rerank.RerankerCascade) ScoreTiesAdjusterReranker(io.anserini.rerank.lib.ScoreTiesAdjusterReranker)

Aggregations

RerankerCascade (io.anserini.rerank.RerankerCascade)11 ScoreTiesAdjusterReranker (io.anserini.rerank.lib.ScoreTiesAdjusterReranker)5 BM25Similarity (org.apache.lucene.search.similarities.BM25Similarity)5 CmdLineException (org.kohsuke.args4j.CmdLineException)5 RerankerContext (io.anserini.rerank.RerankerContext)4 FeatureExtractors (io.anserini.ltr.feature.FeatureExtractors)3 IdentityReranker (io.anserini.rerank.IdentityReranker)3 ScoredDocuments (io.anserini.rerank.ScoredDocuments)3 Qrels (io.anserini.util.Qrels)3 File (java.io.File)3 IOException (java.io.IOException)3 PrintStream (java.io.PrintStream)3 AtomicMoveNotSupportedException (java.nio.file.AtomicMoveNotSupportedException)3 Path (java.nio.file.Path)3 CompletionException (java.util.concurrent.CompletionException)3 QueryNodeException (org.apache.lucene.queryparser.flexible.core.QueryNodeException)3 Rm3Reranker (io.anserini.rerank.lib.Rm3Reranker)2 Rm3Reranker (io.anserini.rerank.rm3.Rm3Reranker)2 RemoveRetweetsTemporalTiebreakReranker (io.anserini.rerank.twitter.RemoveRetweetsTemporalTiebreakReranker)2 QueryGenerator (io.anserini.search.query.QueryGenerator)2