use of io.anserini.search.query.PhraseQueryGenerator in project Anserini by castorini.
the class IndexReaderUtils method getTermCountsWithAnalyzer.
/**
* Returns count information on a term or a phrase.
*
* @param reader index reader
* @param termStr term
* @param analyzer analyzer to use
* @return df (+cf if only one term) of the phrase
* @throws IOException if error encountered during access to index
*/
public static Map<String, Long> getTermCountsWithAnalyzer(IndexReader reader, String termStr, Analyzer analyzer) throws IOException {
if (AnalyzerUtils.analyze(analyzer, termStr).size() > 1) {
Query query = new PhraseQueryGenerator().buildQuery(IndexArgs.CONTENTS, analyzer, termStr);
IndexSearcher searcher = new IndexSearcher(reader);
TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
searcher.search(query, totalHitCountCollector);
return Map.ofEntries(Map.entry("docFreq", (long) totalHitCountCollector.getTotalHits()));
}
Term t = new Term(IndexArgs.CONTENTS, AnalyzerUtils.analyze(analyzer, termStr).get(0));
Map<String, Long> termInfo = Map.ofEntries(Map.entry("collectionFreq", reader.totalTermFreq(t)), Map.entry("docFreq", (long) reader.docFreq(t)));
return termInfo;
}
Aggregations