use of org.apache.lucene.search.similarities.ClassicSimilarity in project lucene-solr by apache.
the class TestQueryRescorer method testNullScorerTermQuery.
// Test LUCENE-5682
public void testNullScorerTermQuery() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
Document doc = new Document();
doc.add(newStringField("id", "0", Field.Store.YES));
doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(newStringField("id", "1", Field.Store.YES));
// 1 extra token, but wizard and oz are close;
doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
w.close();
// Do ordinary BooleanQuery:
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
IndexSearcher searcher = getSearcher(r);
searcher.setSimilarity(new ClassicSimilarity());
TopDocs hits = searcher.search(bq.build(), 10);
assertEquals(2, hits.totalHits);
assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
// Now, resort using TermQuery on term that does not exist.
TermQuery tq = new TermQuery(new Term("field", "gold"));
TopDocs hits2 = QueryRescorer.rescore(searcher, hits, tq, 2.0, 10);
// Just testing that null scorer is handled.
assertEquals(2, hits2.totalHits);
r.close();
dir.close();
}
use of org.apache.lucene.search.similarities.ClassicSimilarity in project lucene-solr by apache.
the class ClassicSimilarityFactory method getSimilarity.
@Override
public Similarity getSimilarity() {
ClassicSimilarity sim = new ClassicSimilarity();
sim.setDiscountOverlaps(discountOverlaps);
return sim;
}
use of org.apache.lucene.search.similarities.ClassicSimilarity in project Anserini by castorini.
the class IdfPassageScorer method score.
@Override
public void score(String query, Map<String, Float> sentences) throws Exception {
// EnglishAnalyzer ea = new EnglishAnalyzer(StopFilter.makeStopSet(stopWords));
EnglishAnalyzer ea = new EnglishAnalyzer(CharArraySet.EMPTY_SET);
QueryParser qp = new QueryParser(LuceneDocumentGenerator.FIELD_BODY, ea);
ClassicSimilarity similarity = new ClassicSimilarity();
String escapedQuery = qp.escape(query);
Query question = qp.parse(escapedQuery);
HashSet<String> questionTerms = new HashSet<>(Arrays.asList(question.toString().trim().toLowerCase().split("\\s+")));
// add the question terms to the termIDF Map
for (String questionTerm : questionTerms) {
try {
TermQuery q = (TermQuery) qp.parse(questionTerm);
Term t = q.getTerm();
double termIDF = similarity.idf(reader.docFreq(t), reader.numDocs());
termIdfMap.put(questionTerm, String.valueOf(termIDF));
} catch (Exception e) {
continue;
}
}
// avoid duplicate passages
HashSet<String> seenSentences = new HashSet<>();
for (Map.Entry<String, Float> sent : sentences.entrySet()) {
double idf = 0.0;
HashSet<String> seenTerms = new HashSet<>();
String[] terms = sent.getKey().toLowerCase().split("\\s+");
for (String term : terms) {
try {
TermQuery q = (TermQuery) qp.parse(term);
Term t = q.getTerm();
double termIDF = similarity.idf(reader.docFreq(t), reader.numDocs());
termIdfMap.put(term, String.valueOf(termIDF));
if (questionTerms.contains(t.toString()) && !seenTerms.contains(t.toString())) {
idf += termIDF;
seenTerms.add(t.toString());
} else {
idf += 0.0;
}
} catch (Exception e) {
continue;
}
}
double weightedScore = idf + 0.0001 * sent.getValue();
ScoredPassage scoredPassage = new ScoredPassage(sent.getKey(), weightedScore, sent.getValue());
if ((scoredPassageHeap.size() < topPassages || weightedScore > scoredPassageHeap.peekLast().getScore()) && !seenSentences.contains(sent)) {
if (scoredPassageHeap.size() == topPassages) {
scoredPassageHeap.pollLast();
}
scoredPassageHeap.add(scoredPassage);
seenSentences.add(sent.getKey());
}
}
}
Aggregations