use of org.apache.lucene.search.similarities.Similarity in project lucene-solr by apache.
the class TestNorms method buildIndex.
// TODO: create a testNormsNotPresent ourselves by adding/deleting/merging docs
public void buildIndex(Directory dir) throws IOException {
Random random = random();
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig config = newIndexWriterConfig(analyzer);
Similarity provider = new MySimProvider();
config.setSimilarity(provider);
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
final LineFileDocs docs = new LineFileDocs(random);
int num = atLeast(100);
for (int i = 0; i < num; i++) {
Document doc = docs.nextDoc();
int boost = TestUtil.nextInt(random, 1, 255);
String value = IntStream.range(0, boost).mapToObj(k -> Integer.toString(boost)).collect(Collectors.joining(" "));
Field f = new TextField(BYTE_TEST_FIELD, value, Field.Store.YES);
doc.add(f);
writer.addDocument(doc);
doc.removeField(BYTE_TEST_FIELD);
if (rarely()) {
writer.commit();
}
}
writer.commit();
writer.close();
docs.close();
}
use of org.apache.lucene.search.similarities.Similarity in project lucene-solr by apache.
the class TestDiversifiedTopDocsCollector method setUp.
@Override
public void setUp() throws Exception {
super.setUp();
// populate an index with documents - artist, song and weeksAtNumberOne
dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
Field yearField = newTextField("year", "", Field.Store.NO);
SortedDocValuesField artistField = new SortedDocValuesField("artist", new BytesRef(""));
Field weeksAtNumberOneField = new FloatDocValuesField("weeksAtNumberOne", 0.0F);
Field weeksStoredField = new StoredField("weeks", 0.0F);
Field idField = newStringField("id", "", Field.Store.YES);
Field songField = newTextField("song", "", Field.Store.NO);
Field storedArtistField = newTextField("artistName", "", Field.Store.NO);
doc.add(idField);
doc.add(weeksAtNumberOneField);
doc.add(storedArtistField);
doc.add(songField);
doc.add(weeksStoredField);
doc.add(yearField);
doc.add(artistField);
parsedRecords.clear();
for (int i = 0; i < hitsOfThe60s.length; i++) {
String[] cols = hitsOfThe60s[i].split("\t");
Record record = new Record(String.valueOf(i), cols[0], cols[1], cols[2], Float.parseFloat(cols[3]));
parsedRecords.put(record.id, record);
idField.setStringValue(record.id);
yearField.setStringValue(record.year);
storedArtistField.setStringValue(record.artist);
artistField.setBytesValue(new BytesRef(record.artist));
songField.setStringValue(record.song);
weeksStoredField.setFloatValue(record.weeks);
weeksAtNumberOneField.setFloatValue(record.weeks);
writer.addDocument(doc);
if (i % 10 == 0) {
// Causes the creation of multiple segments for our test
writer.commit();
}
}
reader = writer.getReader();
writer.close();
searcher = newSearcher(reader);
artistDocValues = MultiDocValues.getSortedValues(reader, "artist");
// All searches sort by song popularity
final Similarity base = searcher.getSimilarity(true);
searcher.setSimilarity(new DocValueSimilarity(base, "weeksAtNumberOne"));
}
use of org.apache.lucene.search.similarities.Similarity in project Anserini by castorini.
the class RetrieveSentences method search.
public Map<String, Float> search(SortedMap<Integer, String> topics, int numHits) throws IOException, ParseException {
IndexSearcher searcher = new IndexSearcher(reader);
//using BM25 scoring model
Similarity similarity = new BM25Similarity(0.9f, 0.4f);
searcher.setSimilarity(similarity);
EnglishAnalyzer ea = new EnglishAnalyzer();
QueryParser queryParser = new QueryParser(FIELD_BODY, ea);
queryParser.setDefaultOperator(QueryParser.Operator.OR);
Map<String, Float> scoredDocs = new LinkedHashMap<>();
for (Map.Entry<Integer, String> entry : topics.entrySet()) {
int qID = entry.getKey();
String queryString = entry.getValue();
Query query = AnalyzerUtils.buildBagOfWordsQuery(FIELD_BODY, ea, queryString);
TopDocs rs = searcher.search(query, numHits);
ScoreDoc[] hits = rs.scoreDocs;
ScoredDocuments docs = ScoredDocuments.fromTopDocs(rs, searcher);
for (int i = 0; i < docs.documents.length; i++) {
scoredDocs.put(docs.documents[i].getField(FIELD_ID).stringValue(), docs.scores[i]);
}
}
return scoredDocs;
}
Aggregations