use of org.apache.lucene.search.similarities.ClassicSimilarity in project Anserini by castorini.
the class TFIDFFeatureExtractor method extract.
@Override
public float extract(Document doc, Terms terms, RerankerContext context) {
float score = 0.0f;
Map<String, Long> countMap = new HashMap<>();
Map<String, Integer> docFreqs = new HashMap<>();
IndexReader reader = context.getIndexSearcher().getIndexReader();
long numDocs = reader.numDocs();
for (String queryToken : context.getQueryTokens()) {
try {
docFreqs.put(queryToken, reader.docFreq(new Term(context.getField(), queryToken)));
} catch (IOException e) {
LOG.error("Error trying to read document frequency");
docFreqs.put(queryToken, 0);
}
}
try {
TermsEnum termsEnum = terms.iterator();
while (termsEnum.next() != null) {
String termString = termsEnum.term().utf8ToString();
if (context.getQueryTokens().contains(termString)) {
countMap.put(termString, termsEnum.totalTermFreq());
}
}
} catch (IOException e) {
LOG.error("Error while accessing term vector");
}
TFIDFSimilarity similarity = new ClassicSimilarity();
// number of query tokens found
// how many of our query tokens were found
float coord = similarity.coord(countMap.size(), context.getQueryTokens().size());
for (String token : context.getQueryTokens()) {
long termFreq = countMap.containsKey(token) ? countMap.get(token) : 0;
long docFreq = docFreqs.containsKey(token) ? docFreqs.get(token) : 0;
float tf = similarity.tf(termFreq);
float idf = similarity.idf(docFreq, numDocs);
score += tf * idf * idf;
}
score *= coord;
return score;
}
use of org.apache.lucene.search.similarities.ClassicSimilarity in project elasticsearch by elastic.
the class MoreLikeThisQuery method rewrite.
@Override
public Query rewrite(IndexReader reader) throws IOException {
Query rewritten = super.rewrite(reader);
if (rewritten != this) {
return rewritten;
}
XMoreLikeThis mlt = new XMoreLikeThis(reader, similarity == null ? new ClassicSimilarity() : similarity);
mlt.setFieldNames(moreLikeFields);
mlt.setAnalyzer(analyzer);
mlt.setMinTermFreq(minTermFrequency);
mlt.setMinDocFreq(minDocFreq);
mlt.setMaxDocFreq(maxDocFreq);
mlt.setMaxQueryTerms(maxQueryTerms);
mlt.setMinWordLen(minWordLen);
mlt.setMaxWordLen(maxWordLen);
mlt.setStopWords(stopWords);
mlt.setBoost(boostTerms);
mlt.setBoostFactor(boostTermsFactor);
if (this.unlikeText != null || this.unlikeFields != null) {
handleUnlike(mlt, this.unlikeText, this.unlikeFields);
}
return createQuery(mlt);
}
use of org.apache.lucene.search.similarities.ClassicSimilarity in project elasticsearch by elastic.
the class SimilarityTests method testResolveSimilaritiesFromMapping_classic.
public void testResolveSimilaritiesFromMapping_classic() throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties").startObject("field1").field("type", "text").field("similarity", "my_similarity").endObject().endObject().endObject().endObject().string();
Settings indexSettings = Settings.builder().put("index.similarity.my_similarity.type", "classic").put("index.similarity.my_similarity.discount_overlaps", false).build();
IndexService indexService = createIndex("foo", indexSettings);
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(ClassicSimilarityProvider.class));
ClassicSimilarity similarity = (ClassicSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
assertThat(similarity.getDiscountOverlaps(), equalTo(false));
}
use of org.apache.lucene.search.similarities.ClassicSimilarity in project elasticsearch by elastic.
the class BlendedTermQueryTests method setSimilarity.
public IndexSearcher setSimilarity(IndexSearcher searcher) {
Similarity similarity = random().nextBoolean() ? new BM25Similarity() : new ClassicSimilarity();
searcher.setSimilarity(similarity);
return searcher;
}
use of org.apache.lucene.search.similarities.ClassicSimilarity in project lucene-solr by apache.
the class TestSweetSpotSimilarityFactory method testDefaults.
/** default parameters */
public void testDefaults() throws Exception {
SweetSpotSimilarity sim = getSimilarity("text", SweetSpotSimilarity.class);
// SSS tf w/defaults should behave just like DS
ClassicSimilarity d = new ClassicSimilarity();
for (int i = 0; i <= 1000; i++) {
assertEquals("tf: i=" + i, d.tf(i), sim.tf(i), 0.0F);
}
// default norm sanity check
assertEquals("norm 1", 1.00F, computeNorm(sim, 1), 0.0F);
assertEquals("norm 4", 0.50F, computeNorm(sim, 4), 0.0F);
assertEquals("norm 16", 0.25F, computeNorm(sim, 16), 0.0F);
}
Aggregations