use of org.apache.lucene.search.similarities.ClassicSimilarity in project lucene-solr by apache.
the class TestSweetSpotSimilarityFactory method testBaselineParameters.
/** baseline with parameters */
public void testBaselineParameters() throws Exception {
SweetSpotSimilarity sim = getSimilarity("text_baseline", SweetSpotSimilarity.class);
ClassicSimilarity d = new ClassicSimilarity();
// constant up to 6
for (int i = 1; i <= 6; i++) {
assertEquals("tf i=" + i, 1.5F, sim.tf(i), 0.0F);
}
// less then default sim above 6
for (int i = 6; i <= 1000; i++) {
assertTrue("tf: i=" + i + " : s=" + sim.tf(i) + " < d=" + d.tf(i), sim.tf(i) < d.tf(i));
}
// norms: plateau from 3-5
assertEquals("norm 1 == 7", computeNorm(sim, 1), computeNorm(sim, 7), 0.0F);
assertEquals("norm 2 == 6", computeNorm(sim, 1), computeNorm(sim, 7), 0.0F);
assertEquals("norm 3", 1.00F, computeNorm(sim, 3), 0.0F);
assertEquals("norm 4", 1.00F, computeNorm(sim, 4), 0.0F);
assertEquals("norm 5", 1.00F, computeNorm(sim, 5), 0.0F);
assertTrue("norm 6 too high: " + computeNorm(sim, 6), computeNorm(sim, 6) < 1.0F);
assertTrue("norm 7 higher then norm 6", computeNorm(sim, 7) < computeNorm(sim, 6));
assertEquals("norm 20", 0.25F, computeNorm(sim, 20), 0.0F);
}
use of org.apache.lucene.search.similarities.ClassicSimilarity in project lucene-solr by apache.
the class TestBoolean2 method beforeClass.
@BeforeClass
public static void beforeClass() throws Exception {
// in some runs, test immediate adjacency of matches - in others, force a full bucket gap between docs
NUM_FILLER_DOCS = random().nextBoolean() ? 0 : BooleanScorer.SIZE;
PRE_FILLER_DOCS = TestUtil.nextInt(random(), 0, (NUM_FILLER_DOCS / 2));
if (VERBOSE) {
System.out.println("TEST: NUM_FILLER_DOCS=" + NUM_FILLER_DOCS + " PRE_FILLER_DOCS=" + PRE_FILLER_DOCS);
}
if (NUM_FILLER_DOCS * PRE_FILLER_DOCS > 100000) {
directory = newFSDirectory(createTempDir());
} else {
directory = newDirectory();
}
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
// randomized codecs are sometimes too costly for this test:
iwc.setCodec(Codec.forName("Lucene70"));
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter writer = new RandomIndexWriter(random(), directory, iwc);
// we'll make a ton of docs, disable store/norms/vectors
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setOmitNorms(true);
Document doc = new Document();
for (int filler = 0; filler < PRE_FILLER_DOCS; filler++) {
writer.addDocument(doc);
}
for (int i = 0; i < docFields.length; i++) {
doc.add(new Field(field, docFields[i], ft));
writer.addDocument(doc);
doc = new Document();
for (int filler = 0; filler < NUM_FILLER_DOCS; filler++) {
writer.addDocument(doc);
}
}
writer.close();
littleReader = DirectoryReader.open(directory);
searcher = newSearcher(littleReader);
// this is intentionally using the baseline sim, because it compares against bigSearcher (which uses a random one)
searcher.setSimilarity(new ClassicSimilarity());
// make a copy of our index using a single segment
if (NUM_FILLER_DOCS * PRE_FILLER_DOCS > 100000) {
singleSegmentDirectory = newFSDirectory(createTempDir());
} else {
singleSegmentDirectory = newDirectory();
}
// TODO: this test does not need to be doing this crazy stuff. please improve it!
for (String fileName : directory.listAll()) {
if (fileName.startsWith("extra")) {
continue;
}
singleSegmentDirectory.copyFrom(directory, fileName, fileName, IOContext.DEFAULT);
singleSegmentDirectory.sync(Collections.singleton(fileName));
}
iwc = newIndexWriterConfig(new MockAnalyzer(random()));
// we need docID order to be preserved:
// randomized codecs are sometimes too costly for this test:
iwc.setCodec(Codec.forName("Lucene70"));
iwc.setMergePolicy(newLogMergePolicy());
try (IndexWriter w = new IndexWriter(singleSegmentDirectory, iwc)) {
w.forceMerge(1, true);
}
singleSegmentReader = DirectoryReader.open(singleSegmentDirectory);
singleSegmentSearcher = newSearcher(singleSegmentReader);
singleSegmentSearcher.setSimilarity(searcher.getSimilarity(true));
// Make big index
dir2 = copyOf(directory);
// First multiply small test index:
mulFactor = 1;
int docCount = 0;
if (VERBOSE) {
System.out.println("\nTEST: now copy index...");
}
do {
if (VERBOSE) {
System.out.println("\nTEST: cycle...");
}
final Directory copy = copyOf(dir2);
iwc = newIndexWriterConfig(new MockAnalyzer(random()));
// randomized codecs are sometimes too costly for this test:
iwc.setCodec(Codec.forName("Lucene70"));
RandomIndexWriter w = new RandomIndexWriter(random(), dir2, iwc);
w.addIndexes(copy);
copy.close();
docCount = w.maxDoc();
w.close();
mulFactor *= 2;
} while (docCount < 3000 * NUM_FILLER_DOCS);
iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setMaxBufferedDocs(TestUtil.nextInt(random(), 50, 1000));
// randomized codecs are sometimes too costly for this test:
iwc.setCodec(Codec.forName("Lucene70"));
RandomIndexWriter w = new RandomIndexWriter(random(), dir2, iwc);
doc = new Document();
doc.add(new Field("field2", "xxx", ft));
for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++) {
w.addDocument(doc);
}
doc = new Document();
doc.add(new Field("field2", "big bad bug", ft));
for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++) {
w.addDocument(doc);
}
reader = w.getReader();
bigSearcher = newSearcher(reader);
w.close();
}
use of org.apache.lucene.search.similarities.ClassicSimilarity in project lucene-solr by apache.
the class TestComplexExplanations method setUp.
@Override
public void setUp() throws Exception {
super.setUp();
// TODO: switch to BM25?
searcher.setSimilarity(new ClassicSimilarity());
}
use of org.apache.lucene.search.similarities.ClassicSimilarity in project lucene-solr by apache.
the class TestMinShouldMatch2 method beforeClass.
@BeforeClass
public static void beforeClass() throws Exception {
dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
final int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
addSome(doc, alwaysTerms);
if (random().nextInt(100) < 90) {
addSome(doc, commonTerms);
}
if (random().nextInt(100) < 50) {
addSome(doc, mediumTerms);
}
if (random().nextInt(100) < 10) {
addSome(doc, rareTerms);
}
iw.addDocument(doc);
}
iw.forceMerge(1);
iw.close();
r = DirectoryReader.open(dir);
reader = getOnlyLeafReader(r);
searcher = new IndexSearcher(reader);
searcher.setSimilarity(new ClassicSimilarity());
}
use of org.apache.lucene.search.similarities.ClassicSimilarity in project lucene-solr by apache.
the class TestValueSources method testQuery.
public void testQuery() throws Exception {
Similarity saved = searcher.getSimilarity(true);
try {
searcher.setSimilarity(new ClassicSimilarity());
ValueSource vs = new QueryValueSource(new TermQuery(new Term("string", "bar")), 42F);
assertHits(new FunctionQuery(vs), new float[] { 42F, 1.4054651F });
// valuesource should exist only for things matching the term query
// sanity check via quick & dirty wrapper arround tf
ValueSource expected = new MultiFloatFunction(new ValueSource[] { new TFValueSource("bogus", "bogus", "string", new BytesRef("bar")) }) {
@Override
protected String name() {
return "tf_based_exists";
}
@Override
protected float func(int doc, FunctionValues[] valsArr) throws IOException {
return valsArr[0].floatVal(doc);
}
@Override
protected boolean exists(int doc, FunctionValues[] valsArr) throws IOException {
// if tf > 0, then it should exist
return 0 < func(doc, valsArr);
}
};
assertExists(expected, vs);
// Query matches all docs, func exists for all docs
vs = new QueryValueSource(new TermQuery(new Term("text", "test")), 0F);
assertAllExist(vs);
// Query matches no docs, func exists for no docs
vs = new QueryValueSource(new TermQuery(new Term("bogus", "does not exist")), 0F);
assertNoneExist(vs);
} finally {
searcher.setSimilarity(saved);
}
}
Aggregations