use of org.apache.lucene.search.similarities.Similarity in project lucene-solr by apache.
the class TestDocValuesScoring method testSimple.
/* for comparing floats */
public void testSimple() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
Field field = newTextField("foo", "", Field.Store.NO);
doc.add(field);
Field dvField = new FloatDocValuesField("foo_boost", 0.0F);
doc.add(dvField);
Field field2 = newTextField("bar", "", Field.Store.NO);
doc.add(field2);
field.setStringValue("quick brown fox");
field2.setStringValue("quick brown fox");
// boost x2
dvField.setFloatValue(2f);
iw.addDocument(doc);
field.setStringValue("jumps over lazy brown dog");
field2.setStringValue("jumps over lazy brown dog");
// boost x4
dvField.setFloatValue(4f);
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
// no boosting
IndexSearcher searcher1 = newSearcher(ir, false);
final Similarity base = searcher1.getSimilarity(true);
// boosting
IndexSearcher searcher2 = newSearcher(ir, false);
searcher2.setSimilarity(new PerFieldSimilarityWrapper() {
final Similarity fooSim = new BoostingSimilarity(base, "foo_boost");
@Override
public Similarity get(String field) {
return "foo".equals(field) ? fooSim : base;
}
});
// in this case, we searched on field "foo". first document should have 2x the score.
TermQuery tq = new TermQuery(new Term("foo", "quick"));
QueryUtils.check(random(), tq, searcher1);
QueryUtils.check(random(), tq, searcher2);
TopDocs noboost = searcher1.search(tq, 10);
TopDocs boost = searcher2.search(tq, 10);
assertEquals(1, noboost.totalHits);
assertEquals(1, boost.totalHits);
//System.out.println(searcher2.explain(tq, boost.scoreDocs[0].doc));
assertEquals(boost.scoreDocs[0].score, noboost.scoreDocs[0].score * 2f, SCORE_EPSILON);
// this query matches only the second document, which should have 4x the score.
tq = new TermQuery(new Term("foo", "jumps"));
QueryUtils.check(random(), tq, searcher1);
QueryUtils.check(random(), tq, searcher2);
noboost = searcher1.search(tq, 10);
boost = searcher2.search(tq, 10);
assertEquals(1, noboost.totalHits);
assertEquals(1, boost.totalHits);
assertEquals(boost.scoreDocs[0].score, noboost.scoreDocs[0].score * 4f, SCORE_EPSILON);
// search on on field bar just for kicks, nothing should happen, since we setup
// our sim provider to only use foo_boost for field foo.
tq = new TermQuery(new Term("bar", "quick"));
QueryUtils.check(random(), tq, searcher1);
QueryUtils.check(random(), tq, searcher2);
noboost = searcher1.search(tq, 10);
boost = searcher2.search(tq, 10);
assertEquals(1, noboost.totalHits);
assertEquals(1, boost.totalHits);
assertEquals(boost.scoreDocs[0].score, noboost.scoreDocs[0].score, SCORE_EPSILON);
ir.close();
dir.close();
}
use of org.apache.lucene.search.similarities.Similarity in project lucene-solr by apache.
the class TestNorms method buildIndex.
// TODO: create a testNormsNotPresent ourselves by adding/deleting/merging docs
public void buildIndex(Directory dir) throws IOException {
Random random = random();
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig config = newIndexWriterConfig(analyzer);
Similarity provider = new MySimProvider();
config.setSimilarity(provider);
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
final LineFileDocs docs = new LineFileDocs(random);
int num = atLeast(100);
for (int i = 0; i < num; i++) {
Document doc = docs.nextDoc();
int boost = TestUtil.nextInt(random, 1, 255);
String value = IntStream.range(0, boost).mapToObj(k -> Integer.toString(boost)).collect(Collectors.joining(" "));
Field f = new TextField(BYTE_TEST_FIELD, value, Field.Store.YES);
doc.add(f);
writer.addDocument(doc);
doc.removeField(BYTE_TEST_FIELD);
if (rarely()) {
writer.commit();
}
}
writer.commit();
writer.close();
docs.close();
}
use of org.apache.lucene.search.similarities.Similarity in project lucene-solr by apache.
the class TestDiversifiedTopDocsCollector method setUp.
@Override
public void setUp() throws Exception {
super.setUp();
// populate an index with documents - artist, song and weeksAtNumberOne
dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
Field yearField = newTextField("year", "", Field.Store.NO);
SortedDocValuesField artistField = new SortedDocValuesField("artist", new BytesRef(""));
Field weeksAtNumberOneField = new FloatDocValuesField("weeksAtNumberOne", 0.0F);
Field weeksStoredField = new StoredField("weeks", 0.0F);
Field idField = newStringField("id", "", Field.Store.YES);
Field songField = newTextField("song", "", Field.Store.NO);
Field storedArtistField = newTextField("artistName", "", Field.Store.NO);
doc.add(idField);
doc.add(weeksAtNumberOneField);
doc.add(storedArtistField);
doc.add(songField);
doc.add(weeksStoredField);
doc.add(yearField);
doc.add(artistField);
parsedRecords.clear();
for (int i = 0; i < hitsOfThe60s.length; i++) {
String[] cols = hitsOfThe60s[i].split("\t");
Record record = new Record(String.valueOf(i), cols[0], cols[1], cols[2], Float.parseFloat(cols[3]));
parsedRecords.put(record.id, record);
idField.setStringValue(record.id);
yearField.setStringValue(record.year);
storedArtistField.setStringValue(record.artist);
artistField.setBytesValue(new BytesRef(record.artist));
songField.setStringValue(record.song);
weeksStoredField.setFloatValue(record.weeks);
weeksAtNumberOneField.setFloatValue(record.weeks);
writer.addDocument(doc);
if (i % 10 == 0) {
// Causes the creation of multiple segments for our test
writer.commit();
}
}
reader = writer.getReader();
writer.close();
searcher = newSearcher(reader);
artistDocValues = MultiDocValues.getSortedValues(reader, "artist");
// All searches sort by song popularity
final Similarity base = searcher.getSimilarity(true);
searcher.setSimilarity(new DocValueSimilarity(base, "weeksAtNumberOne"));
}
use of org.apache.lucene.search.similarities.Similarity in project lucene-solr by apache.
the class TestBulkSchemaAPI method assertFieldSimilarity.
/**
* whitebox checks the Similarity for the specified field according to {@link SolrCore#getLatestSchema}
*
* Executes each of the specified Similarity-accepting validators.
*/
@SafeVarargs
private static <T extends Similarity> void assertFieldSimilarity(String fieldname, Class<T> expected, Consumer<T>... validators) {
CoreContainer cc = jetty.getCoreContainer();
try (SolrCore core = cc.getCore("collection1")) {
SimilarityFactory simfac = core.getLatestSchema().getSimilarityFactory();
assertNotNull(simfac);
assertTrue("test only works with SchemaSimilarityFactory", simfac instanceof SchemaSimilarityFactory);
Similarity mainSim = core.getLatestSchema().getSimilarity();
assertNotNull(mainSim);
// sanity check simfac vs sim in use - also verify infom called on simfac, otherwise exception
assertEquals(mainSim, simfac.getSimilarity());
assertTrue("test only works with PerFieldSimilarityWrapper, SchemaSimilarityFactory redefined?", mainSim instanceof PerFieldSimilarityWrapper);
Similarity fieldSim = ((PerFieldSimilarityWrapper) mainSim).get(fieldname);
assertEquals("wrong sim for field=" + fieldname, expected, fieldSim.getClass());
Arrays.asList(validators).forEach(v -> v.accept((T) fieldSim));
}
}
use of org.apache.lucene.search.similarities.Similarity in project lucene-solr by apache.
the class ChangedSchemaMergeTest method testSanityOfSchemaSimilarityFactoryInform.
public void testSanityOfSchemaSimilarityFactoryInform() {
// sanity check that SchemaSimilarityFactory will throw an Exception if you
// try to use it w/o inform(SolrCoreAware) otherwise assertSimilarity is useless
SchemaSimilarityFactory broken = new SchemaSimilarityFactory();
broken.init(new ModifiableSolrParams());
// NO INFORM
try {
Similarity bogus = broken.getSimilarity();
fail("SchemaSimilarityFactory should have thrown IllegalStateException b/c inform not used");
} catch (IllegalStateException expected) {
assertTrue("GOT: " + expected.getMessage(), expected.getMessage().contains("SolrCoreAware.inform"));
}
}
Aggregations