use of org.apache.lucene.search.similarities.ClassicSimilarity in project lucene-solr by apache.
the class TestPayloadSpanUtil method testPayloadSpanUtil.
public void testPayloadSpanUtil() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(new ClassicSimilarity()));
Document doc = new Document();
doc.add(newTextField(FIELD, "xx rr yy mm pp", Field.Store.YES));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(reader);
PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext());
Collection<byte[]> payloads = psu.getPayloadsForQuery(new TermQuery(new Term(FIELD, "rr")));
if (VERBOSE) {
System.out.println("Num payloads:" + payloads.size());
for (final byte[] bytes : payloads) {
System.out.println(new String(bytes, StandardCharsets.UTF_8));
}
}
reader.close();
directory.close();
}
use of org.apache.lucene.search.similarities.ClassicSimilarity in project lucene-solr by apache.
the class TestSpans method testSpanScorerZeroSloppyFreq.
public void testSpanScorerZeroSloppyFreq() throws Exception {
IndexReaderContext topReaderContext = searcher.getTopReaderContext();
List<LeafReaderContext> leaves = topReaderContext.leaves();
int subIndex = ReaderUtil.subIndex(11, leaves);
for (int i = 0, c = leaves.size(); i < c; i++) {
final LeafReaderContext ctx = leaves.get(i);
final Similarity sim = new ClassicSimilarity() {
@Override
public float sloppyFreq(int distance) {
return 0.0f;
}
};
final Similarity oldSim = searcher.getSimilarity(true);
Scorer spanScorer;
try {
searcher.setSimilarity(sim);
SpanQuery snq = spanNearOrderedQuery(field, 1, "t1", "t2");
spanScorer = searcher.createNormalizedWeight(snq, true).scorer(ctx);
} finally {
searcher.setSimilarity(oldSim);
}
if (i == subIndex) {
assertTrue("first doc", spanScorer.iterator().nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals("first doc number", spanScorer.docID() + ctx.docBase, 11);
float score = spanScorer.score();
assertTrue("first doc score should be zero, " + score, score == 0.0f);
} else {
assertTrue("no second doc", spanScorer == null || spanScorer.iterator().nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
}
}
}
use of org.apache.lucene.search.similarities.ClassicSimilarity in project lucene-solr by apache.
the class TestPayloadExplanations method setUp.
@Override
public void setUp() throws Exception {
super.setUp();
searcher.setSimilarity(new ClassicSimilarity() {
@Override
public float scorePayload(int doc, int start, int end, BytesRef payload) {
return 1 + (payload.hashCode() % 10);
}
});
}
use of org.apache.lucene.search.similarities.ClassicSimilarity in project lucene-solr by apache.
the class SchemaSimilarityFactory method getSimilarity.
@Override
public Similarity getSimilarity() {
if (null == core) {
throw new IllegalStateException("SchemaSimilarityFactory can not be used until SolrCoreAware.inform has been called");
}
if (null == similarity) {
// Need to instantiate lazily, can't do this in inform(SolrCore) because of chicken/egg
// circular initialization hell with core.getLatestSchema() to lookup defaultSimFromFieldType
Similarity defaultSim = null;
if (null == defaultSimFromFieldType) {
// nothing configured, choose a sensible implicit default...
defaultSim = this.core.getSolrConfig().luceneMatchVersion.onOrAfter(Version.LUCENE_6_0_0) ? new BM25Similarity() : new ClassicSimilarity();
} else {
FieldType defSimFT = core.getLatestSchema().getFieldTypeByName(defaultSimFromFieldType);
if (null == defSimFT) {
throw new SolrException(ErrorCode.SERVER_ERROR, "SchemaSimilarityFactory configured with " + INIT_OPT + "='" + defaultSimFromFieldType + "' but that <fieldType> does not exist");
}
defaultSim = defSimFT.getSimilarity();
if (null == defaultSim) {
throw new SolrException(ErrorCode.SERVER_ERROR, "SchemaSimilarityFactory configured with " + INIT_OPT + "='" + defaultSimFromFieldType + "' but that <fieldType> does not define a <similarity>");
}
}
similarity = new SchemaSimilarity(defaultSim);
}
return similarity;
}
use of org.apache.lucene.search.similarities.ClassicSimilarity in project Anserini by castorini.
the class IdfPassageScorer method getTermIdfJSON.
@Override
public JSONObject getTermIdfJSON(List<String> sentList) {
// EnglishAnalyzer ea = new EnglishAnalyzer(StopFilter.makeStopSet(stopWords));
EnglishAnalyzer ea = new EnglishAnalyzer(CharArraySet.EMPTY_SET);
QueryParser qp = new QueryParser(LuceneDocumentGenerator.FIELD_BODY, ea);
ClassicSimilarity similarity = new ClassicSimilarity();
for (String sent : sentList) {
String[] thisSentence = sent.trim().split("\\s+");
for (String term : thisSentence) {
try {
TermQuery q = (TermQuery) qp.parse(term);
Term t = q.getTerm();
double termIDF = similarity.idf(reader.docFreq(t), reader.numDocs());
termIdfMap.put(term, String.valueOf(termIDF));
} catch (Exception e) {
continue;
}
}
}
return new JSONObject(termIdfMap);
}
Aggregations