use of io.anserini.rerank.ScoredDocuments in project Anserini by castorini.
the class PyseriniEntryPoint method search.
/**
* Prints TREC submission file to the standard output stream.
*
* @param topics queries
* @param similarity similarity
* @throws IOException
* @throws ParseException
*/
public Map<String, Float> search(SortedMap<Integer, String> topics, Similarity similarity, int numHits, RerankerCascade cascade, boolean useQueryParser, boolean keepstopwords) throws IOException, ParseException {
Map<String, Float> scoredDocs = new LinkedHashMap<>();
IndexSearcher searcher = new IndexSearcher(reader);
searcher.setSimilarity(similarity);
EnglishAnalyzer ea = keepstopwords ? new EnglishAnalyzer(CharArraySet.EMPTY_SET) : new EnglishAnalyzer();
QueryParser queryParser = new QueryParser(FIELD_BODY, ea);
queryParser.setDefaultOperator(QueryParser.Operator.OR);
for (Map.Entry<Integer, String> entry : topics.entrySet()) {
int qID = entry.getKey();
String queryString = entry.getValue();
Query query = useQueryParser ? queryParser.parse(queryString) : AnalyzerUtils.buildBagOfWordsQuery(FIELD_BODY, ea, queryString);
TopDocs rs = searcher.search(query, numHits);
ScoreDoc[] hits = rs.scoreDocs;
List<String> queryTokens = AnalyzerUtils.tokenize(ea, queryString);
RerankerContext context = new RerankerContext(searcher, query, String.valueOf(qID), queryString, queryTokens, FIELD_BODY, null);
ScoredDocuments docs = cascade.run(ScoredDocuments.fromTopDocs(rs, searcher), context);
for (int i = 0; i < docs.documents.length; i++) {
String docid = docs.documents[i].getField(FIELD_ID).stringValue();
float score = docs.scores[i];
scoredDocs.put(docid, score);
}
}
return scoredDocs;
}
use of io.anserini.rerank.ScoredDocuments in project Anserini by castorini.
the class RetrieveSentences method search.
public Map<String, Float> search(SortedMap<Integer, String> topics, int numHits) throws IOException, ParseException {
IndexSearcher searcher = new IndexSearcher(reader);
//using BM25 scoring model
Similarity similarity = new BM25Similarity(0.9f, 0.4f);
searcher.setSimilarity(similarity);
EnglishAnalyzer ea = new EnglishAnalyzer();
QueryParser queryParser = new QueryParser(FIELD_BODY, ea);
queryParser.setDefaultOperator(QueryParser.Operator.OR);
Map<String, Float> scoredDocs = new LinkedHashMap<>();
for (Map.Entry<Integer, String> entry : topics.entrySet()) {
int qID = entry.getKey();
String queryString = entry.getValue();
Query query = AnalyzerUtils.buildBagOfWordsQuery(FIELD_BODY, ea, queryString);
TopDocs rs = searcher.search(query, numHits);
ScoreDoc[] hits = rs.scoreDocs;
ScoredDocuments docs = ScoredDocuments.fromTopDocs(rs, searcher);
for (int i = 0; i < docs.documents.length; i++) {
scoredDocs.put(docs.documents[i].getField(FIELD_ID).stringValue(), docs.scores[i]);
}
}
return scoredDocs;
}
Aggregations