use of io.anserini.qa.passage.IdfPassageScorer in project Anserini by castorini.
the class PyseriniEntryPoint method getRankedPassages.
public List<String> getRankedPassages(String query, int numHits, int k) throws Exception {
Map<String, Float> docScore = search(query, numHits);
Map<String, Float> sentencesMap = new LinkedHashMap<>();
TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
for (Map.Entry<String, Float> doc : docScore.entrySet()) {
List<Sentence> sentences = indexUtils.getSentDocument(doc.getKey());
for (Sentence thisSent : sentences) {
List<CoreLabel> tokens = tokenizerFactory.getTokenizer(new StringReader(thisSent.text())).tokenize();
String answerTokens = tokens.stream().map(CoreLabel::toString).collect(Collectors.joining(" "));
sentencesMap.put(answerTokens, doc.getValue());
}
}
passageScorer = new IdfPassageScorer(indexDir, k);
String queryTokens = tokenizerFactory.getTokenizer(new StringReader(query)).tokenize().stream().map(CoreLabel::toString).collect(Collectors.joining(" "));
passageScorer.score(query, sentencesMap);
List<String> topSentences = new ArrayList<>();
List<ScoredPassage> topPassages = passageScorer.extractTopPassages();
for (ScoredPassage s : topPassages) {
topSentences.add(s.getSentence() + "\t" + s.getScore());
}
return topSentences;
}
Aggregations